Examples of org.apache.lucene.index.TermsEnum.totalTermFreq()

org.apache.lucene.index.TermsEnum.totalTermFreq()
Returns the total number of occurrences of this term across all documents (the sum of the freq() for each doc that has this term). This will be -1 if the codec doesn't support this measure. Note that, like other term measures, this measure does not take deleted documents into account.

          DocsAndPositionsEnum docsAndPositions = null;
          List<BoboTerm> boboTermList = new ArrayList<BoboTerm>();
          while ((text = termsEnum.next()) != null) {
            BoboTerm boboTerm = new BoboTerm();
            boboTerm.term = text.utf8ToString();
            boboTerm.freq = (int) termsEnum.totalTermFreq();
            docsAndPositions = termsEnum.docsAndPositions(null, docsAndPositions);
            if (docsAndPositions != null) {
              docsAndPositions.nextDoc();
              boboTerm.positions = new ArrayList<Integer>();
              boboTerm.startOffsets = new ArrayList<Integer>();

View Full Code Here

      DocsAndPositionsEnum newDpe = te.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_OFFSETS);
      if (newDpe == null) { // no positions and no offsets - just add terms if allowed
        if (!acceptTermsOnly) {
          return null;
        }
        int freq = (int)te.totalTermFreq();
        if (freq == -1) freq = 0;
        res.add(new IntPair(freq, te.term().utf8ToString()));
        continue;
      }
      dpe = newDpe;

View Full Code Here

      }
      dpe = newDpe;
      // term vectors have only one document, number 0
      if (dpe.nextDoc() == DocsEnum.NO_MORE_DOCS) { // oops
        // treat this as no positions nor offsets
        int freq = (int)te.totalTermFreq();
        if (freq == -1) freq = 0;
        res.add(new IntPair(freq, te.term().utf8ToString()));
        continue;
      }
      IntPair ip = new IntPair(dpe.freq(), te.term().utf8ToString());

View Full Code Here


    // code to reconstruct the original sequence of Tokens
    TermsEnum termsEnum = tpv.iterator(null);
    int totalTokens = 0;
    while(termsEnum.next() != null) {
      totalTokens += (int) termsEnum.totalTermFreq();
    }
    Token tokensInOriginalOrder[] = new Token[totalTokens];
    ArrayList<Token> unsortedTokens = null;
    termsEnum = tpv.iterator(null);
    BytesRef text;

View Full Code Here

        int ngramCount = countGrams(term);
        if (ngramCount > grams) {
          throw new IllegalArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
        }
        if (ngramCount == 1) {
          totTokens += termsEnum.totalTermFreq();
        }


        builder.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
      }

View Full Code Here

        }
        if (ngramCount == 1) {
          totTokens += termsEnum.totalTermFreq();
        }


        builder.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
      }


      fst = builder.finish();
      if (fst == null) {
        throw new IllegalArgumentException("need at least one suggestion");

View Full Code Here

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
        assertEquals(term, term2);
        assertEquals(termsEnum.docFreq(), termsEnum2.docFreq());
        assertEquals(termsEnum.totalTermFreq(), termsEnum2.totalTermFreq());


        if (ord == 0) {
          try {
            termsEnum.ord();
          } catch (UnsupportedOperationException uoe) {

View Full Code Here

        if (termsEnum == TermsEnum.EMPTY) continue;
        if (termsEnum.seekExact(term.bytes())) {
          if (termContext == null) {
            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }

View Full Code Here

            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
          
        }
        
      }

View Full Code Here

      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (isNoiseWord(term)) {
        continue;
      }
      final int freq = (int) termsEnum.totalTermFreq();


      // increment frequency
      Int cnt = termFreqMap.get(term);
      if (cnt == null) {
        cnt = new Int();

View Full Code Here

0 1 2 3 4 5 6

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.