Examples of tokenStream()


Examples of org.apache.lucene.analysis.core.WhitespaceAnalyzer.tokenStream()

  /** normal case, unfiltered analyzer */
  @Test
  public void testAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    validateTokens(allTokens, ts);
    ts.end();
    ts.close();
  }
View Full Code Here

Examples of org.apache.lucene.analysis.snowball.SnowballAnalyzer.tokenStream()

                                        org.apache.lucene.util.Version.LUCENE_CURRENT,
                                        "Spanish",
                                        SPANISH_STOP_WORDS);

                       
                        TokenStream tokenStream = analyzer.tokenStream(
                                        "content",
                                        new StringReader(indexCleanedOfHTMLTags));
                       
                        Token token = new Token();
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

  }
 
  protected Set<String> getHighlightWords(String searchString) {
    try {
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
      TokenStream stream = analyzer.tokenStream("content", new StringReader(searchString));
      TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
      for (boolean next = stream.incrementToken(); next; next = stream.incrementToken()) {
        String term = termAtt.term();
        if(log.isDebug()) log.debug(term);
      }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

  public static void main(String[] args) throws IOException {
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);    

    StringReader in = new StringReader("text to magically vectorize");
    TokenStream ts = analyzer.tokenStream("body", in);
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);

    Vector v1 = new RandomAccessSparseVector(100);                  
    while (ts.incrementToken()) {
      char[] termBuffer = termAtt.termBuffer();
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

              String superColumnName = merge(superColumnList, " ");
              superColumnList.clear();
              if(importer_.columnFamily.superColumn.tokenize)
              {
                  Analyzer analyzer = new StandardAnalyzer();
                  TokenStream ts = analyzer.tokenStream("superColumn", new StringReader(superColumnName));
                  Token token = null;
                  token = ts.next();
                  while(token != null)
                  {
                    superColumnList.add(token.termText());
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

      ClassifierContext ctx = new ClassifierContext(a,ds);
      ctx.initialize();
     
      //TODO: make the analyzer configurable
      StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
      TokenStream ts = analyzer.tokenStream(null, new InputStreamReader(new FileInputStream(inputFile), "UTF-8"));
    
      ArrayList<String> tokens = new ArrayList<String>(1000);
      while (ts.incrementToken()) {
        tokens.add(ts.getAttribute(CharTermAttribute.class).toString());
      }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.length(); i++) {
          String text = hits.doc(i).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here

Examples of org.apache.lucene.document.IntField.tokenStream()

 
  public void testNumericReuse() throws IOException {
    IntField intField = new IntField("foo", 5, Field.Store.NO);
   
    // passing null
    TokenStream ts = intField.tokenStream(null, null);
    assertTrue(ts instanceof NumericTokenStream);
    assertEquals(NumericUtils.PRECISION_STEP_DEFAULT_32, ((NumericTokenStream)ts).getPrecisionStep());
    assertNumericContents(5, ts);

    // now reuse previous stream
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.