Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()


    // field one
    TokenStream input = a.tokenStream("one", new StringReader(text));
    List<Token> realTokens = getTokens(input);
    assertTokEqual(realTokens, expectedTokens1);
    // field two
    input = a.tokenStream("two", new StringReader(text));
    realTokens = getTokens(input);
    assertTokEqual(realTokens, expectedTokens2);
    // field three
    input = a.tokenStream("three", new StringReader(text));
    realTokens = getTokens(input);
View Full Code Here


    // field two
    input = a.tokenStream("two", new StringReader(text));
    realTokens = getTokens(input);
    assertTokEqual(realTokens, expectedTokens2);
    // field three
    input = a.tokenStream("three", new StringReader(text));
    realTokens = getTokens(input);
    assertTokEqual(realTokens, expectedTokens3);
  }
 
  public void testQueryParsing() throws IOException, ParseException {
View Full Code Here

 
  public void testTermOffsetsTokenStream() throws Exception {
    String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
    Analyzer a1 = new WhitespaceAnalyzer();
    TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
        a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
    for( String v : multivalued ){
      TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
      Analyzer a2 = new WhitespaceAnalyzer();
      TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
      Token t1 = new Token();
View Full Code Here

    TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
        a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
    for( String v : multivalued ){
      TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
      Analyzer a2 = new WhitespaceAnalyzer();
      TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
      Token t1 = new Token();
      Token t2 = new Token();
      for( t1 = ts1.next( t1 ); t1 != null; t1 = ts1.next( t1 ) ){
        t2 = ts2.next( t2 );
        assertEquals( t2, t1 );
View Full Code Here

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
    w.close();
View Full Code Here

  public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    TokenStream sink = tee.newSinkTokenStream();
    Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f1);
    doc.add(f2);
View Full Code Here

        // most of the following code is taken from the Lucene QueryParser

        // Use the analyzer to get all the tokens, and then build a TermQuery,
        // PhraseQuery, or nothing based on the term count

        TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
        ArrayList<Token> lst = new ArrayList<Token>();
        Token t;
        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;
View Full Code Here

              FieldType ft = schema.getFieldType(name);
              Analyzer analyzer = ft.getAnalyzer();
              Collection<Object> vals = doc.getFieldValues(name);
              for (Object val : vals) {
                Reader reader = new StringReader(val.toString());
                TokenStream tstream = analyzer.tokenStream(name, reader);
                NamedList<NamedList<Object>> tokens = getTokens(tstream);
                theTokens.add(name, tokens);
              }
            }
          }
View Full Code Here

    if ("anchor".equals(fieldName))
      analyzer = ANCHOR_ANALYZER;
    else
      analyzer = CONTENT_ANALYZER;

    return analyzer.tokenStream(fieldName, reader);
  }
}
View Full Code Here

        // TODO: Decouple from lucene, allow the analyzer to be configurable.
        // TODO: Verifiy that it is necessary to create a new analyzer instance each time.
        Analyzer analyzer = new StandardAnalyzer();
        Reader docReader = new StringReader(doc);
        TokenStream tokenStream = analyzer.tokenStream(null, docReader);
       
        try {
            if (1 < maxTuple ){
                tokenStream= new TupleTokenizer(tokenStream, maxTuple);
            }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.