Examples of org.apache.lucene.index.TermsEnum.totalTermFreq()

org.apache.lucene.index.TermsEnum.totalTermFreq()
Returns the total number of occurrences of this term across all documents (the sum of the freq() for each doc that has this term). This will be -1 if the codec doesn't support this measure. Note that, like other term measures, this measure does not take deleted documents into account.

        }
        if (ngramCount == 1) {
          totTokens += termsEnum.totalTermFreq();
        }


        builder.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
      }


      fst = builder.finish();
      if (fst == null) {
        throw new IllegalArgumentException("need at least one suggestion");

View Full Code Here

      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (isNoiseWord(term)) {
        continue;
      }
      final int freq = (int) termsEnum.totalTermFreq();


      // increment frequency
      Int cnt = termFreqMap.get(term);
      if (cnt == null) {
        cnt = new Int();

View Full Code Here

    IndexReader r = DirectoryReader.open(dir);
    Terms vector = r.getTermVectors(0).terms("field");
    assertEquals(1, vector.size());
    TermsEnum termsEnum = vector.iterator(null);
    termsEnum.next();
    assertEquals(2, termsEnum.totalTermFreq());
    DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
    assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(2, positions.freq());
    positions.nextPosition();
    assertEquals(0, positions.startOffset());

View Full Code Here

    while ( (bytes = termsEnum.next()) != null) {
      byte[] buf = new byte[bytes.length];
      System.arraycopy(bytes.bytes, 0, buf, 0, bytes.length);
      String term = new String(buf, "UTF-8");
      int df = termsEnum.docFreq();
      long cf = termsEnum.totalTermFreq();


      if ( df < min) {
        skippedTerms++;
        missingCnt += cf;
        continue;

View Full Code Here

                            while ((text = termsEnum.next()) != null) {
                                // skip invalid terms
                                if (termsEnum.docFreq() < 1) {
                                    continue;
                                }
                                if (termsEnum.totalTermFreq() < 1) {
                                    continue;
                                }
                                String term = text.utf8ToString();
                                TermInfo termInfo = new TermInfo();
                                if (request.getWithDocFreq()) {

View Full Code Here

                                TermInfo termInfo = new TermInfo();
                                if (request.getWithDocFreq()) {
                                    termInfo.docfreq(termsEnum.docFreq());
                                }
                                if (request.getWithTotalFreq()) {
                                    termInfo.totalFreq(termsEnum.totalTermFreq());
                                }
                                if (request.getTerm() == null || term.startsWith(request.getTerm())) {
                                    map.put(term, termInfo);
                                }
                            }

View Full Code Here

          List<String> texts = new ArrayList<String>();
          List<Integer> freqs = new ArrayList<Integer>();
          BytesRef text;
          while ((text = termsEnum.next()) != null) {
            texts.add(text.utf8ToString());
            int freq = (int) termsEnum.totalTermFreq();
            freqs.add(freq);
          }
          tvMap.put(field, new TermFrequencyVector(texts, freqs));
        }
      }

View Full Code Here

      UnicodeUtil.UTF8toUTF16( text, spare );
      final String term = spare.toString();
      if ( isNoiseWord( term ) ) {
        continue;
      }
      final int freq = (int) termsEnum.totalTermFreq();


      // increment frequency
      Int cnt = termFreqMap.get( term );
      if ( cnt == null ) {
        cnt = new Int();

View Full Code Here

            BytesRef next = iterator.next();
            assertThat(next, Matchers.notNullValue());
            assertThat("expected " + string, string, equalTo(next.utf8ToString()));
            assertThat(next, Matchers.notNullValue());
            if (string.equals("the")) {
                assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
            } else {
                assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
            }


            DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);

View Full Code Here

            assertThat("expected " + string, string, equalTo(next.utf8ToString()));
            assertThat(next, Matchers.notNullValue());
            if (string.equals("the")) {
                assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
            } else {
                assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
            }


            DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
            assertThat(docsAndPositions.nextDoc(), equalTo(0));
            assertThat(freq[j], equalTo(docsAndPositions.freq()));

View Full Code Here

0 1 2 3 4 5 6

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.