List of tokenStream() Examples

Examples of tokenStream()

com.code972.elasticsearch.analysis.HebrewQueryLightAnalyzer.tokenStream()
com.tistory.devyongsik.analyzer.KoreanAnalyzer.tokenStream()
net.paoding.analysis.analyzer.PaodingAnalyzer.tokenStream()
org.apache.lucene.analysis.Analyzer.tokenStream()
Creates a TokenStream which tokenizes all the text in the provided Reader. Must be able to handle null field name for backward compatibility.
org.apache.lucene.analysis.MockAnalyzer.tokenStream()
org.apache.lucene.analysis.SimpleAnalyzer.tokenStream()
org.apache.lucene.analysis.WhitespaceAnalyzer.tokenStream()
org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer.tokenStream()
org.apache.lucene.analysis.core.KeywordAnalyzer.tokenStream()
org.apache.lucene.analysis.core.SimpleAnalyzer.tokenStream()
org.apache.lucene.analysis.core.StopAnalyzer.tokenStream()
org.apache.lucene.analysis.core.WhitespaceAnalyzer.tokenStream()
org.apache.lucene.analysis.snowball.SnowballAnalyzer.tokenStream()
Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter}
org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()
Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
org.apache.lucene.document.IntField.tokenStream()
org.apache.lucene.document.StringField.tokenStream()
org.apache.lucene.morphology.analyzer.MorphologyAnalyzer.tokenStream()
org.elasticsearch.common.lucene.all.AllField.tokenStream()
org.elasticsearch.index.analysis.NamedAnalyzer.tokenStream()
org.sf.mustru.utils.StandardBgramAnalyzer.tokenStream()
org.wltea.analyzer.lucene.IKAnalyzer.tokenStream()

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

    while(true) {
      String s = _TestUtil.randomRealisticUnicodeString(random);
      if (other != null && s.equals(other)) {
        continue;
      }
      final TokenStream ts = a.tokenStream("foo", new StringReader(s));
      final TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
      int count = 0;
      ts.reset();
      while(ts.incrementToken()) {
        if (count == 0 && !termAtt.term().equals(s)) {

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

            throw new IllegalArgumentException
              ("field must have either String or Reader value");


          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
          PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
          
          try {
            while (stream.incrementToken()) {

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

  }


  /** Test reuse of MorfologikFilter with leftover stems. */
  public final void testLeftoverStems() throws IOException {
    Analyzer a = getTestAnalyzer();
    TokenStream ts_1 = a.tokenStream("dummy", "liście");
    try {
      CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
      ts_1.reset();
      ts_1.incrementToken();
      assertEquals("first stream", "liście", termAtt_1.toString());

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

      ts_1.end();
    } finally {
      IOUtils.closeWhileHandlingException(ts_1);
    }


    TokenStream ts_2 = a.tokenStream("dummy", "danych");
    try {
      CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
      ts_2.reset();
      ts_2.incrementToken();
      assertEquals("second stream", "dany", termAtt_2.toString());

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

      }
    };
    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

      }
    };
    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
    w.close();

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

  public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    TokenStream sink = tee.newSinkTokenStream();
    Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f1);
    doc.add(f2);

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

  public static void main(String[] args) throws IOException {
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);     


    StringReader in = new StringReader("text to magically vectorize");
    TokenStream ts = analyzer.tokenStream("body", in);
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);


    Vector v1 = new RandomAccessSparseVector(100);                   
    while (ts.incrementToken()) {
      char[] termBuffer = termAtt.termBuffer();

View Full Code Here

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()

              String superColumnName = merge(superColumnList, " ");
              superColumnList.clear();
              if(importer_.columnFamily.superColumn.tokenize)
              {
                  Analyzer analyzer = new StandardAnalyzer();
                  TokenStream ts = analyzer.tokenStream("superColumn", new StringReader(superColumnName));
                  Token token = null;
                  token = ts.next();
                  while(token != null)
                  {
                    superColumnList.add(token.termText());

View Full Code Here

0 1 2 3 4 5 6

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.