List of tokenStream() Examples

Examples of tokenStream()

com.code972.elasticsearch.analysis.HebrewQueryLightAnalyzer.tokenStream()
com.tistory.devyongsik.analyzer.KoreanAnalyzer.tokenStream()
net.paoding.analysis.analyzer.PaodingAnalyzer.tokenStream()
org.apache.lucene.analysis.Analyzer.tokenStream()
Creates a TokenStream which tokenizes all the text in the provided Reader. Must be able to handle null field name for backward compatibility.
org.apache.lucene.analysis.MockAnalyzer.tokenStream()
org.apache.lucene.analysis.SimpleAnalyzer.tokenStream()
org.apache.lucene.analysis.WhitespaceAnalyzer.tokenStream()
org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer.tokenStream()
org.apache.lucene.analysis.core.KeywordAnalyzer.tokenStream()
org.apache.lucene.analysis.core.SimpleAnalyzer.tokenStream()
org.apache.lucene.analysis.core.StopAnalyzer.tokenStream()
org.apache.lucene.analysis.core.WhitespaceAnalyzer.tokenStream()
org.apache.lucene.analysis.snowball.SnowballAnalyzer.tokenStream()
Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter}
org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()
Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
org.apache.lucene.document.IntField.tokenStream()
org.apache.lucene.document.StringField.tokenStream()
org.apache.lucene.morphology.analyzer.MorphologyAnalyzer.tokenStream()
org.elasticsearch.common.lucene.all.AllField.tokenStream()
org.elasticsearch.index.analysis.NamedAnalyzer.tokenStream()
org.sf.mustru.utils.StandardBgramAnalyzer.tokenStream()
org.wltea.analyzer.lucene.IKAnalyzer.tokenStream()

Examples of org.apache.lucene.document.StringField.tokenStream()

  
  public void testStringField() throws IOException {
    StringField stringField = new StringField("foo", "bar", Field.Store.NO);
    
    // passing null
    TokenStream ts = stringField.tokenStream(null, null);
    assertTokenStreamContents(ts, 
        new String[] { "bar" },
        new int[]    { 0 },
        new int[]    { 3 }
    );

View Full Code Here

Examples of org.apache.lucene.morphology.analyzer.MorphologyAnalyzer.tokenStream()

    public void testPm() throws Exception {
        LuceneMorphology russianLuceneMorphology = new RussianLuceneMorphology();
        LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();


        MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
        TokenStream stream = russianAnalyzer.tokenStream("name", new FastStringReader("тест пм тест"));
        MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);
        assertSimpleTSOutput(englishFilter, new String[] {"тест", "тесто", "", "тест", "тесто"});
    }


}

View Full Code Here

Examples of org.elasticsearch.common.lucene.all.AllField.tokenStream()

        DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
        byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
        Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
        AllField field = (AllField) doc.getField("_all");
        // One field is boosted so we should see AllTokenStream used:
        assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer(), null), Matchers.instanceOf(AllTokenStream.class));
        AllEntries allEntries = field.getAllEntries();
        assertThat(allEntries.fields().size(), equalTo(3));
        assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
        assertThat(allEntries.fields().contains("name.last"), equalTo(true));
        assertThat(allEntries.fields().contains("simple1"), equalTo(true));

View Full Code Here

Examples of org.elasticsearch.index.analysis.NamedAnalyzer.tokenStream()

    @Test
    public void testCanUseFromNamedAnalyzer() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)),
                    new String[]{"just", "a", "little", "test", Integer.toString(i)},
                    new int[]{ 0,  5,  7, 14, 19},
                    new int[]{ 4,  6, 13, 18, 20},
                    new int[]{ 1,  1,  1,  1,  1});
    }

View Full Code Here

Examples of org.sf.mustru.utils.StandardBgramAnalyzer.tokenStream()

  synBuffer.append( (adjectives.size() > 0) ? wnetTools.getSynonyms(adjectives.get(0), "a"):"");
  String[] synonyms = synBuffer.toString().trim().split(" ");
 */ 
  //*-- tokenize the question
  StandardBgramAnalyzer analyzer = new StandardBgramAnalyzer(); analyzer.setExtractEntities(true);
  TokenStream stream = analyzer.tokenStream("contents", new StringReader(question));
  ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
  entities = new ArrayList<String>();    //*-- list of entities in the question
  while ( (token = stream.next()) != null) 
   { tokenList.add(token); if (token.type().equals("<ENTITY>")) entities.add(token.termText()); }

View Full Code Here

Examples of org.wltea.analyzer.lucene.IKAnalyzer.tokenStream()

    Analyzer analyzer = new IKAnalyzer(true);
    
    //获取Lucene的TokenStream对象
      TokenStream ts = null;
    try {
      ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
//      ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子，你可以直接运行它！IKAnalyer can analysis english text too"));
      //获取词元位置属性
        OffsetAttribute  offset = ts.addAttribute(OffsetAttribute.class); 
        //获取词元文本属性
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);

View Full Code Here

0 1 2 3 4 5 6

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.