Examples of tokenStream()


Examples of org.apache.lucene.document.StringField.tokenStream()

 
  public void testStringField() throws IOException {
    StringField stringField = new StringField("foo", "bar", Field.Store.NO);
   
    // passing null
    TokenStream ts = stringField.tokenStream(null, null);
    assertTokenStreamContents(ts,
        new String[] { "bar" },
        new int[]    { 0 },
        new int[]    { 3 }
    );
View Full Code Here

Examples of org.apache.lucene.morphology.analyzer.MorphologyAnalyzer.tokenStream()

    public void testPm() throws Exception {
        LuceneMorphology russianLuceneMorphology = new RussianLuceneMorphology();
        LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();

        MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
        TokenStream stream = russianAnalyzer.tokenStream("name", new FastStringReader("тест пм тест"));
        MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);
        assertSimpleTSOutput(englishFilter, new String[] {"тест", "тесто", "", "тест", "тесто"});
    }

}
View Full Code Here

Examples of org.elasticsearch.common.lucene.all.AllField.tokenStream()

        DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
        byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
        Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
        AllField field = (AllField) doc.getField("_all");
        // One field is boosted so we should see AllTokenStream used:
        assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer(), null), Matchers.instanceOf(AllTokenStream.class));
        AllEntries allEntries = field.getAllEntries();
        assertThat(allEntries.fields().size(), equalTo(3));
        assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
        assertThat(allEntries.fields().contains("name.last"), equalTo(true));
        assertThat(allEntries.fields().contains("simple1"), equalTo(true));
View Full Code Here

Examples of org.elasticsearch.index.analysis.NamedAnalyzer.tokenStream()

    @Test
    public void testCanUseFromNamedAnalyzer() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)),
                    new String[]{"just", "a", "little", "test", Integer.toString(i)},
                    new int[]{ 057, 14, 19},
                    new int[]{ 46, 13, 18, 20},
                    new int[]{ 11111});
    }
View Full Code Here

Examples of org.sf.mustru.utils.StandardBgramAnalyzer.tokenStream()

  synBuffer.append( (adjectives.size() > 0) ? wnetTools.getSynonyms(adjectives.get(0), "a"):"");
  String[] synonyms = synBuffer.toString().trim().split(" ");
*/
  //*-- tokenize the question
  StandardBgramAnalyzer analyzer = new StandardBgramAnalyzer(); analyzer.setExtractEntities(true);
  TokenStream stream = analyzer.tokenStream("contents", new StringReader(question));
  ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
  entities = new ArrayList<String>();    //*-- list of entities in the question
  while ( (token = stream.next()) != null)
   { tokenList.add(token); if (token.type().equals("<ENTITY>")) entities.add(token.termText()); }
View Full Code Here

Examples of org.wltea.analyzer.lucene.IKAnalyzer.tokenStream()

    Analyzer analyzer = new IKAnalyzer(true);
   
    //获取Lucene的TokenStream对象
      TokenStream ts = null;
    try {
      ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
//      ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子,你可以直接运行它!IKAnalyer can analysis english text too"));
      //获取词元位置属性
        OffsetAttribute  offset = ts.addAttribute(OffsetAttribute.class);
        //获取词元文本属性
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.