Examples of org.apache.lucene.analysis.TokenStream.reset()

Class org.apache.lucene.analysis.TokenStream

Examples of org.apache.lucene.analysis.TokenStream.reset()

org.apache.lucene.analysis.TokenStream.reset()
Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement this method. {@link #reset()} is not needed forthe standard indexing process. However, if the tokens of a TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the tokens when you store them away (on the first pass) as well as when you return them (on future passes after {@link #reset()}).


          System.out.println("Tokens for '" + asString + "':");
          while (stream.incrementToken()) {
            System.out.println(" '" + termAtt.term() + "'");
          }
          stream.reset();
          return stream;
          // Do the call a second time and return the result this time
          // Old behaviour
          // return nestedAnalyzer.tokenStream(fieldName, new StringReader(asString));
        } catch (IOException exc) {

View Full Code Here

      LinkedList<CToken> list = new LinkedList<CToken>();
      int wordsCount = 0;
      
      //collect token
      TokenStream ts = analyzer.tokenStream("", reader);
      ts.reset();
      TermAttribute termAtt = (TermAttribute) ts
          .addAttribute(TermAttribute.class);
      while (ts.incrementToken()) {
        if (printGate != null && printGate.filter(wordsCount)) {
          list.add(new CToken(termAtt.term(), wordsCount));

View Full Code Here

        continue;
      }
      final TokenStream ts = a.tokenStream("foo", new StringReader(s));
      final TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
      int count = 0;
      ts.reset();
      while(ts.incrementToken()) {
        if (count == 0 && !termAtt.term().equals(s)) {
          break;
        }
        count++;

View Full Code Here

      {
        List<String> terms = new ArrayList<String>();
        try {
          boolean hasMoreTokens = false;
          
          stream.reset(); 
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);


          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            terms.add(termAtt.term());

View Full Code Here

    // PhraseQuery, or nothing based on the term count


    TokenStream source;
    try {
      source = analyzer.reusableTokenStream(field, new StringReader(queryText));
      source.reset();
    } catch (IOException e) {
      source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermAttribute termAtt = null;

View Full Code Here

          } else {
            tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
          }


          // reset the TokenStream to the first token          
          tokenStream.reset();


          while (tokenStream.incrementToken()) {
            // TODO: this is a simple workaround to still work with tokens, not very effective, but as far as I know, this writer should get removed soon:
            final Token token = new Token();
            for (Iterator<AttributeImpl> atts = tokenStream.getAttributeImplsIterator(); atts.hasNext();) {

View Full Code Here

    try {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);


      int corpusNumDocs = reader.numDocs();
      HashSet<String> processedTerms = new HashSet<String>();
      ts.reset();
      while (ts.incrementToken()) {
        String term = termAtt.toString();
        if (!processedTerms.contains(term)) {
          processedTerms.add(term);
          ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term

View Full Code Here

  private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
      throws Exception {
    TokenStream ts1 = a1.tokenStream("bogus", text);
    TokenStream ts2 = a2.tokenStream("bogus", text);
    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();

View Full Code Here


  @SuppressWarnings("unused")
  private void dumpTokens(String input) throws IOException {
    TokenStream ts = getTestAnalyzer().tokenStream("dummy", input);
    try {
      ts.reset();


      MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
      CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
      while (ts.incrementToken()) {
        System.out.println(charTerm.toString() + " => " + attribute.getTags());

View Full Code Here

  public final void testLeftoverStems() throws IOException {
    Analyzer a = getTestAnalyzer();
    TokenStream ts_1 = a.tokenStream("dummy", "liście");
    try {
      CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
      ts_1.reset();
      ts_1.incrementToken();
      assertEquals("first stream", "liście", termAtt_1.toString());
      ts_1.end();
    } finally {
      IOUtils.closeWhileHandlingException(ts_1);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.