Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.reset()


          System.out.println("Tokens for '" + asString + "':");
          while (stream.incrementToken()) {
            System.out.println(" '" + termAtt.term() + "'");
          }
          stream.reset();
          return stream;
          // Do the call a second time and return the result this time
          // Old behaviour
          // return nestedAnalyzer.tokenStream(fieldName, new StringReader(asString));
        } catch (IOException exc) {
View Full Code Here


      LinkedList<CToken> list = new LinkedList<CToken>();
      int wordsCount = 0;
     
      //collect token
      TokenStream ts = analyzer.tokenStream("", reader);
      ts.reset();
      TermAttribute termAtt = (TermAttribute) ts
          .addAttribute(TermAttribute.class);
      while (ts.incrementToken()) {
        if (printGate != null && printGate.filter(wordsCount)) {
          list.add(new CToken(termAtt.term(), wordsCount));
View Full Code Here

        continue;
      }
      final TokenStream ts = a.tokenStream("foo", new StringReader(s));
      final TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
      int count = 0;
      ts.reset();
      while(ts.incrementToken()) {
        if (count == 0 && !termAtt.term().equals(s)) {
          break;
        }
        count++;
View Full Code Here

      {
        List<String> terms = new ArrayList<String>();
        try {
          boolean hasMoreTokens = false;
         
          stream.reset();
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);

          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            terms.add(termAtt.term());
View Full Code Here

    // PhraseQuery, or nothing based on the term count

    TokenStream source;
    try {
      source = analyzer.reusableTokenStream(field, new StringReader(queryText));
      source.reset();
    } catch (IOException e) {
      source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermAttribute termAtt = null;
View Full Code Here

          } else {
            tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
          }

          // reset the TokenStream to the first token         
          tokenStream.reset();

          while (tokenStream.incrementToken()) {
            // TODO: this is a simple workaround to still work with tokens, not very effective, but as far as I know, this writer should get removed soon:
            final Token token = new Token();
            for (Iterator<AttributeImpl> atts = tokenStream.getAttributeImplsIterator(); atts.hasNext();) {
View Full Code Here

    try {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

      int corpusNumDocs = reader.numDocs();
      HashSet<String> processedTerms = new HashSet<String>();
      ts.reset();
      while (ts.incrementToken()) {
        String term = termAtt.toString();
        if (!processedTerms.contains(term)) {
          processedTerms.add(term);
          ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
View Full Code Here

  private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
      throws Exception {
    TokenStream ts1 = a1.tokenStream("bogus", text);
    TokenStream ts2 = a2.tokenStream("bogus", text);
    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();
View Full Code Here

  @SuppressWarnings("unused")
  private void dumpTokens(String input) throws IOException {
    TokenStream ts = getTestAnalyzer().tokenStream("dummy", input);
    try {
      ts.reset();

      MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
      CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
      while (ts.incrementToken()) {
        System.out.println(charTerm.toString() + " => " + attribute.getTags());
View Full Code Here

  public final void testLeftoverStems() throws IOException {
    Analyzer a = getTestAnalyzer();
    TokenStream ts_1 = a.tokenStream("dummy", "liście");
    try {
      CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
      ts_1.reset();
      ts_1.incrementToken();
      assertEquals("first stream", "liście", termAtt_1.toString());
      ts_1.end();
    } finally {
      IOUtils.closeWhileHandlingException(ts_1);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.