Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()


        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here


    Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
    searcher = setUpSearcher(analyzer);

    PhraseQuery q = new PhraseQuery();

    TokenStream ts = analyzer.tokenStream("content",
                                          new StringReader("this sentence"));
    int j = -1;
   
    PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
View Full Code Here

    Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
    searcher = setUpSearcher(analyzer);

    BooleanQuery q = new BooleanQuery();

    TokenStream ts = analyzer.tokenStream("content",
                                          new StringReader("test sentence"));
   
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
   
    ts.reset();
View Full Code Here

     */
    protected Query getPrefixQuery(String field, String termStr)
            throws ParseException {
        // only create a prefix query when the term is a single word / token
        Analyzer a = getAnalyzer();
        TokenStream ts = a.tokenStream(field, new StringReader(termStr));
        int count = 0;
        boolean isCJ = false;
        try {
            Token t = new Token();
            while ((t = ts.next(t)) != null) {
View Full Code Here

        // retrieve analyzer
        Analyzer analyzer = retrieveAnalyzer(content);
               
        // @see org.apache.lucene.analysis.Analyzer - null for fieldname should be supported
        // most analyzers do not use the parameter fieldname
        TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(text));
        return tokenStream;
    }
   
    /**
     *
 
View Full Code Here

    while(true) {
      String s = TestUtil.randomRealisticUnicodeString(random());
      if (other != null && s.equals(other)) {
        continue;
      }
      try (TokenStream ts = a.tokenStream("foo", s)) {
        final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
        final BytesRef termBytes = termAtt.getBytesRef();
        ts.reset();

        int count = 0;
View Full Code Here

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    try (TokenStream stream = analyzer.tokenStream("field", "abcd   ")) {
      stream.reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
      TokenStream cachedStream = new CachingTokenFilter(stream);
      FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorPositions(true);
View Full Code Here

      }
    };
    for (int i = 0; i < 1000; i++) {
      String s = TestUtil.randomSimpleString(random);
      assertEquals(s, left.tokenStream("foo", newStringReader(s)),
                   right.tokenStream("foo", newStringReader(s)));
    }
  }
 
  // not so useful since its all one token?!
  public void testLetterAsciiHuge() throws Exception {
View Full Code Here

    };
    int numIterations = atLeast(50);
    for (int i = 0; i < numIterations; i++) {
      String s = TestUtil.randomSimpleString(random, maxLength);
      assertEquals(s, left.tokenStream("foo", newStringReader(s)),
                   right.tokenStream("foo", newStringReader(s)));
    }
  }
 
  public void testLetterHtmlish() throws Exception {
    Random random = random();
View Full Code Here

      }
    };
    for (int i = 0; i < 1000; i++) {
      String s = TestUtil.randomHtmlishString(random, 20);
      assertEquals(s, left.tokenStream("foo", newStringReader(s)),
                   right.tokenStream("foo", newStringReader(s)));
    }
  }
 
  public void testLetterHtmlishHuge() throws Exception {
    Random random = random();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.