Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum.totalTermFreq()


          DocsAndPositionsEnum docsAndPositions = null;
          List<BoboTerm> boboTermList = new ArrayList<BoboTerm>();
          while ((text = termsEnum.next()) != null) {
            BoboTerm boboTerm = new BoboTerm();
            boboTerm.term = text.utf8ToString();
            boboTerm.freq = (int) termsEnum.totalTermFreq();
            docsAndPositions = termsEnum.docsAndPositions(null, docsAndPositions);
            if (docsAndPositions != null) {
              docsAndPositions.nextDoc();
              boboTerm.positions = new ArrayList<Integer>();
              boboTerm.startOffsets = new ArrayList<Integer>();
View Full Code Here


      DocsAndPositionsEnum newDpe = te.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_OFFSETS);
      if (newDpe == null) { // no positions and no offsets - just add terms if allowed
        if (!acceptTermsOnly) {
          return null;
        }
        int freq = (int)te.totalTermFreq();
        if (freq == -1) freq = 0;
        res.add(new IntPair(freq, te.term().utf8ToString()));
        continue;
      }
      dpe = newDpe;
View Full Code Here

      }
      dpe = newDpe;
      // term vectors have only one document, number 0
      if (dpe.nextDoc() == DocsEnum.NO_MORE_DOCS) { // oops
        // treat this as no positions nor offsets
        int freq = (int)te.totalTermFreq();
        if (freq == -1) freq = 0;
        res.add(new IntPair(freq, te.term().utf8ToString()));
        continue;
      }
      IntPair ip = new IntPair(dpe.freq(), te.term().utf8ToString());
View Full Code Here

    // code to reconstruct the original sequence of Tokens
    TermsEnum termsEnum = tpv.iterator(null);
    int totalTokens = 0;
    while(termsEnum.next() != null) {
      totalTokens += (int) termsEnum.totalTermFreq();
    }
    Token tokensInOriginalOrder[] = new Token[totalTokens];
    ArrayList<Token> unsortedTokens = null;
    termsEnum = tpv.iterator(null);
    BytesRef text;
View Full Code Here

        int ngramCount = countGrams(term);
        if (ngramCount > grams) {
          throw new IllegalArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
        }
        if (ngramCount == 1) {
          totTokens += termsEnum.totalTermFreq();
        }

        builder.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
      }
View Full Code Here

        }
        if (ngramCount == 1) {
          totTokens += termsEnum.totalTermFreq();
        }

        builder.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
      }

      fst = builder.finish();
      if (fst == null) {
        throw new IllegalArgumentException("need at least one suggestion");
View Full Code Here

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
        assertEquals(term, term2);
        assertEquals(termsEnum.docFreq(), termsEnum2.docFreq());
        assertEquals(termsEnum.totalTermFreq(), termsEnum2.totalTermFreq());

        if (ord == 0) {
          try {
            termsEnum.ord();
          } catch (UnsupportedOperationException uoe) {
View Full Code Here

        if (termsEnum == TermsEnum.EMPTY) continue;
        if (termsEnum.seekExact(term.bytes())) {
          if (termContext == null) {
            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
         
View Full Code Here

            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
         
        }
       
      }
View Full Code Here

      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (isNoiseWord(term)) {
        continue;
      }
      final int freq = (int) termsEnum.totalTermFreq();

      // increment frequency
      Int cnt = termFreqMap.get(term);
      if (cnt == null) {
        cnt = new Int();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.