Package org.apache.lucene.index

Examples of org.apache.lucene.index.IndexReader.terms()


        LinkedList termsWithPrefix = new LinkedList();
        IndexReader ir = IndexReader.open(indexStore);

        // this TermEnum gives "piccadilly", "pie" and "pizza".
        String prefix = "pi";
        TermEnum te = ir.terms(new Term("body", prefix + "*"));
        do {
            if (te.term().text().startsWith(prefix))
            {
                termsWithPrefix.add(te.term());
            }
View Full Code Here


    // Separately count how many tokens are actually in the index:
    IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true);
    assertEquals(NUM_DOCS, reader.numDocs());

    TermEnum terms = reader.terms();
    TermDocs termDocs = reader.termDocs();
    int totalTokenCount2 = 0;
    while(terms.next()) {
      termDocs.seek(terms.term());
      while(termDocs.next())
View Full Code Here

        reader = IndexReader.open(args[i]);
      }
    }

    TermFreqQueue tiq = new TermFreqQueue(count);
    TermEnum terms = reader.terms();
     
    int minFreq = 0;
    while (terms.next()) {
      if (terms.docFreq() > minFreq) {
        tiq.put(new TermFreq(terms.term(), terms.docFreq()));
View Full Code Here

        int min = mincount - 1; // the smallest value in the top 'N' values
        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        String startTerm = prefix == null ? "" : prefix; // ft.toInternal(prefix);
        TermEnum te = r.terms(new Term(fieldName, startTerm));
        TermDocs td = r.termDocs();

        if (docs.size() >= mincount) {
            do {
                Term t = te.term();
View Full Code Here

            try {
                ir = IndexReader.open(root);
                logger.info("Unique document id  " + uniqueId);

                TermEnum uidterm = ir.terms(new Term(
                            DocumStruct.FIELD_DOCUMENTID, uniqueId));

                while ((uidterm.term() != null) &&
                        (uidterm.term().field() == DocumStruct.FIELD_DOCUMENTID)) {
                    System.out.println("delete ...");
View Full Code Here

      }

      private void preheatIndex(final Index index) throws SearcherException {
        IndexReader reader = index.getReader();
        try {
          reader.terms(); // for heating the index.
        } catch (IOException e) {
          throw new SearcherException(e);
        }
      }
View Full Code Here

        IndexReader reader = IndexReader.open(indexWriter, true);

        TermDocs termDocs = reader.termDocs();

        TermEnum termEnum = reader.terms(new Term("int1", ""));
        termDocs.seek(termEnum);
        assertThat(termDocs.next(), equalTo(true));
        assertThat(termDocs.doc(), equalTo(0));
        assertThat(termDocs.freq(), equalTo(1));
View Full Code Here

        termDocs.seek(termEnum);
        assertThat(termDocs.next(), equalTo(true));
        assertThat(termDocs.doc(), equalTo(0));
        assertThat(termDocs.freq(), equalTo(1));

        termEnum = reader.terms(new Term("int2", ""));
        termDocs.seek(termEnum);
        assertThat(termDocs.next(), equalTo(true));
        assertThat(termDocs.doc(), equalTo(0));
        assertThat(termDocs.freq(), equalTo(2));
View Full Code Here

     * in the entire index. To get the in-cluster frequency, we need to query the index to get the term
     * frequencies in each document. The number of results of this call will be the in-cluster document
     * frequency.
     */

    TermEnum te = reader.terms(new Term(contentField, ""));
    Map<String, TermEntry> termEntryMap = new LinkedHashMap<String, TermEntry>();

    try {
      int count = 0;

View Full Code Here

    List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();

    int clusterSize = wvws.size();

    for (TermEntry termEntry : termEntryMap.values()) {
      int corpusDF = reader.terms(new Term(this.contentField, termEntry.getTerm())).docFreq();
      int outDF = corpusDF - termEntry.getDocFreq();
      int inDF = termEntry.getDocFreq();
      double logLikelihoodRatio = scoreDocumentFrequencies(inDF, outDF, clusterSize, numDocs);
      TermInfoClusterInOut termInfoCluster =
          new TermInfoClusterInOut(termEntry.getTerm(), inDF, outDF, logLikelihoodRatio);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.