Examples of termText()


Examples of org.apache.lucene.analysis.Token.termText()

      if (increment > 0)
      { position = position + increment;
        System.out.println();
        System.out.print(position + ": ");
      }
      System.out.print("\t [" + token.termText() + ": " + token.type() + "] " + token.startOffset() + ":" + token.endOffset());
     
     } //*-- end of for
    System.out.println("");
  }

View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

  StandardBgramAnalyzer analyzer = new StandardBgramAnalyzer(); analyzer.setExtractEntities(true);
  TokenStream stream = analyzer.tokenStream("contents", new StringReader(question));
  ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
  entities = new ArrayList<String>();    //*-- list of entities in the question
  while ( (token = stream.next()) != null)
   { tokenList.add(token); if (token.type().equals("<ENTITY>")) entities.add(token.termText()); }
  //*-------------------------------------------------------------------
  //*-- build the query with the five components
  //*--
  //*-- 1. First identify the entity types for the query
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

   if (stoken != null)
    { Token tempToken = stoken; stoken = null;
      tempToken.setPositionIncrement(0); return(tempToken); }
  
   Token ctoken = input.next()if (ctoken == null) return (null);
   String ctext = ctoken.termText();
  
   //*-- check for an unigram entity
   String etype = ehash.get(ctext);
   if (etype != null) stoken = new Token(etype, ctoken.startOffset(), ctoken.endOffset(), "<ENTITY>");
  
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

    LowercaseWhitespaceAnalyzer analyzer = new LowercaseWhitespaceAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
    Token token = null;
    while ((token = tokenStream.next()) != null) {
      String termText = token.termText();

      /* Contained in Title */
      WildcardQuery titleQuery = new WildcardQuery(new Term(String.valueOf(INews.TITLE), termText));
      allFieldsQuery.add(new BooleanClause(titleQuery, Occur.SHOULD));

View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

        LowercaseWhitespaceAnalyzer analyzer = new LowercaseWhitespaceAnalyzer();
        TokenStream tokenStream = analyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
        Token token = null;
        while ((token = tokenStream.next()) != null) {
          Term term = new Term(fieldname, token.termText());
          similarityQuery.add(new BooleanClause(new FuzzyQuery(term), Occur.MUST));
        }

        return similarityQuery;
      }
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

    int i=0;
    try {
      for (; i < size; i++) {
        Token t1 = (Token) tokens1.get(i);
        Token t2 = (Token) tokens2.get(i);
        if (!(t1.termText().equals(t2.termText()))) throw new IllegalStateException("termText");
        if (t1.startOffset() != t2.startOffset()) throw new IllegalStateException("startOffset");
        if (t1.endOffset() != t2.endOffset()) throw new IllegalStateException("endOffset");
        if (!(t1.type().equals(t2.type()))) throw new IllegalStateException("type");
      }
      if (tokens1.size() != tokens2.size())   throw new IllegalStateException("size1=" + tokens1.size() + ", size2=" + tokens2.size());
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

  private String toString(List tokens) {
    if (tokens == null) return "null";
    String str = "[";
    for (int i=0; i < tokens.size(); i++) {
      Token t1 = (Token) tokens.get(i);
      str = str + "'" + t1.termText() + "', ";
    }
    return str + "]";
  }
 
  // trick to detect default platform charset
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

                    new StandardAnalyzer().tokenStream(null, new StringReader(escape(terms).toLowerCase()));

            while (true) {
                Token t = includeStream.next();
                if (t == null) break;
                query.add( new Term(fieldName, t.termText()) );
            }

            return query.getTerms().length > 0 ? query : null;
        } catch (Exception ex) {
            throw new RuntimeException(ex);
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

                  TokenStream ts = analyzer.tokenStream("superColumn", new StringReader(superColumnName));
                  Token token = null;
                  token = ts.next();
                  while(token != null)
                  {
                    superColumnList.add(token.termText());
                      token = ts.next();
                  }
              }
              else
              {
View Full Code Here

Examples of org.apache.lucene.analysis.Token.termText()

        TokenStream ts = new StandardAnalyzer().tokenStream(new HTMLParser(file).getReader());

        Token token = null;

        while ((token = ts.next()) != null) {
            System.out.println("ReTokenizeFile.reTokenize(File): " + token.termText() + " " +
                token.startOffset() + " " + token.endOffset() + " " + token.type());
        }

        return file.getAbsolutePath();
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.