Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Analyzer.tokenStream()


  private List<String> tokenize(String string)
  {
    List<String> stringList = new ArrayList<String>();
      Analyzer analyzer = new StandardAnalyzer();
      TokenStream ts = analyzer.tokenStream("superColumn", new StringReader(string));
      Token token = null;
      try
      {
        token = ts.next();
        while(token != null)
View Full Code Here


                                        org.apache.lucene.util.Version.LUCENE_CURRENT,
                                        "Spanish",
                                        SPANISH_STOP_WORDS);

                       
                        TokenStream tokenStream = analyzer.tokenStream(
                                        "content",
                                        new StringReader(indexCleanedOfHTMLTags));
                       
                        Token token = new Token();
View Full Code Here

  @Test
  public void testTermOffsetsTokenStream() throws Exception {
    String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
    Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
        a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
    for( String v : multivalued ){
      TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
      Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
      TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
      while (ts1.incrementToken()) {
View Full Code Here

    TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
        a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
    for( String v : multivalued ){
      TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
      Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
      TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
      while (ts1.incrementToken()) {
        assertTrue(ts2.incrementToken());
        assertEquals(ts1, ts2);
      }
      assertFalse(ts2.incrementToken());
View Full Code Here

  public void testIndexingAnalysis() throws Exception {
    Analyzer a = schema.getAnalyzer();
    String text = "one two three si\uD834\uDD1Ex";

    // field one
    TokenStream input = a.tokenStream("one", new StringReader(text));
    assertTokenStreamContents(input,
        new String[] { "\u0001eno", "one", "\u0001owt", "two",
          "\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" },
        new int[] { 0, 0, 4, 4, 8, 8, 14, 14 },
        new int[] { 3, 3, 7, 7, 13, 13, 19, 19 },
View Full Code Here

        new int[] { 0, 0, 4, 4, 8, 8, 14, 14 },
        new int[] { 3, 3, 7, 7, 13, 13, 19, 19 },
        new int[] { 1, 0, 1, 0, 1, 0, 1, 0 }
    );
    // field two
    input = a.tokenStream("two", new StringReader(text));
    assertTokenStreamContents(input,
        new String[] { "\u0001eno", "\u0001owt",
          "\u0001eerht", "\u0001x\uD834\uDD1Eis" },
        new int[] { 0, 4, 8, 14 },
        new int[] { 3, 7, 13, 19 },
View Full Code Here

        new int[] { 0, 4, 8, 14 },
        new int[] { 3, 7, 13, 19 },
        new int[] { 1, 1, 1, 1 }
    );
    // field three
    input = a.tokenStream("three", new StringReader(text));
    assertTokenStreamContents(input,
        new String[] { "one", "two", "three", "si\uD834\uDD1Ex" },
        new int[] { 0, 4, 8, 14 },
        new int[] { 3, 7, 13, 19 }
    );
View Full Code Here

  }

  static void test(Reader reader, boolean verbose, long bytes)
       throws Exception {
    Analyzer analyzer = new SimpleAnalyzer();
    TokenStream stream = analyzer.tokenStream(null, reader);

    Date start = new Date();

    int count = 0;
    for (Token t = stream.next(); t!=null; t = stream.next()) {
View Full Code Here

   * Basic analyzer behavior should be to keep sequential terms in one
   * increment from one another.
   */
  public void testIncrementingPositions() throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer();
    TokenStream ts = analyzer.tokenStream("field",
                                new StringReader("one two three four five"));

    while (true) {
      Token token = ts.next();
      if (token == null) break;
View Full Code Here

            new WhitespaceTokenizer(new StringReader(expected2)));
    String expected3 = "one two three";
    List<Token> expectedTokens3 = getTokens(
            new WhitespaceTokenizer(new StringReader(expected3)));
    // field one
    TokenStream input = a.tokenStream("one", new StringReader(text));
    List<Token> realTokens = getTokens(input);
    assertTokEqual(realTokens, expectedTokens1);
    // field two
    input = a.tokenStream("two", new StringReader(text));
    realTokens = getTokens(input);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.