Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()


    checkCJKToken(str, out_tokens);
  }
 
  public void testTokenStream() throws Exception {
    Analyzer analyzer = new CJKAnalyzer();
    TokenStream ts = analyzer.tokenStream("dummy", new StringReader("\u4e00\u4e01\u4e02"));
    TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
    assertTrue(ts.incrementToken());
    assertEquals("\u4e00\u4e01", termAtt.term());
    assertTrue(ts.incrementToken());
    assertEquals("\u4e01\u4e02", termAtt.term());
View Full Code Here


        String s = "football-soccer in the euro 2004 footie competition";
        QueryParser parser = new QueryParser(TEST_VERSION, "bookid", analyzer);
        Query query = parser.parse(srchkey);

        TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));

        Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);

        // Get 3 best fragments and seperate with a "..."
        tokenStream = analyzer.tokenStream(null, new StringReader(s));
View Full Code Here

        TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));

        Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);

        // Get 3 best fragments and seperate with a "..."
        tokenStream = analyzer.tokenStream(null, new StringReader(s));

        String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
        String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
        assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
            + " actual:" + result, expectedResult.equals(result));
View Full Code Here

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here

    Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2);
    searcher = setUpSearcher(analyzer);

    PhraseQuery q = new PhraseQuery();

    TokenStream ts = analyzer.tokenStream("content",
                                          new StringReader("this sentence"));
    int j = -1;
   
    PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
View Full Code Here

    Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2);
    searcher = setUpSearcher(analyzer);

    BooleanQuery q = new BooleanQuery();

    TokenStream ts = analyzer.tokenStream("content",
                                          new StringReader("test sentence"));
   
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
View Full Code Here

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random);
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd   "));
    stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct?
    stream = new CachingTokenFilter(stream);
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
View Full Code Here

  public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    MockDirectoryWrapper dir = newDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    TokenStream sink = tee.newSinkTokenStream();
    Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f1);
    doc.add(f2);
View Full Code Here

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
    w.close();
View Full Code Here

  public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    TokenStream sink = tee.newSinkTokenStream();
    Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f1);
    doc.add(f2);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.