Package org.apache.lucene.index

Examples of org.apache.lucene.index.DocsEnum


      };
    }

    @Override
    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
      final DocsEnum inReuse;
      final SortingDocsEnum wrapReuse;
      if (reuse != null && reuse instanceof SortingDocsEnum) {
        // if we're asked to reuse the given DocsEnum and it is Sorting, return
        // the wrapped one, since some Codecs expect it.
        wrapReuse = (SortingDocsEnum) reuse;
        inReuse = wrapReuse.getWrapped();
      } else {
        wrapReuse = null;
        inReuse = reuse;
      }

      final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
      final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
      return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
    }
View Full Code Here


    int doc = -1;
    DirectoryReader reader = readerManager.acquire();
    try {
      final BytesRef catTerm = new BytesRef(FacetsConfig.pathToString(categoryPath.components, categoryPath.length));
      TermsEnum termsEnum = null; // reuse
      DocsEnum docs = null; // reuse
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) {
          termsEnum = terms.iterator(termsEnum);
          if (termsEnum.seekExact(catTerm)) {
            // liveDocs=null because the taxonomy has no deletes
            docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
            // if the term was found, we know it has exactly one document.
            doc = docs.nextDoc() + ctx.docBase;
            break;
          }
        }
      }
    } finally {
View Full Code Here

    boolean aborted = false;
    DirectoryReader reader = readerManager.acquire();
    try {
      TermsEnum termsEnum = null;
      DocsEnum docsEnum = null;
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) { // cannot really happen, but be on the safe side
          termsEnum = terms.iterator(termsEnum);
          while (termsEnum.next() != null) {
            if (!cache.isFull()) {
              BytesRef t = termsEnum.term();
              // Since we guarantee uniqueness of categories, each term has exactly
              // one document. Also, since we do not allow removing categories (and
              // hence documents), there are no deletions in the index. Therefore, it
              // is sufficient to call next(), and then doc(), exactly once with no
              // 'validation' checks.
              FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(t.utf8ToString()));
              docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
              boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase);
              assert !res : "entries should not have been evicted from the cache";
            } else {
              // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
              aborted = true;
              break;
View Full Code Here

      final int size = r.numDocs();
      final OrdinalMap ordinalMap = map;
      ordinalMap.setSize(size);
      int base = 0;
      TermsEnum te = null;
      DocsEnum docs = null;
      for (final AtomicReaderContext ctx : r.leaves()) {
        final AtomicReader ar = ctx.reader();
        final Terms terms = ar.terms(Consts.FULL);
        te = terms.iterator(te);
        while (te.next() != null) {
          FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
          final int ordinal = addCategory(cp);
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
        }
        base += ar.maxDoc(); // no deletions, so we're ok
      }
      ordinalMap.addDone();
    } finally {
View Full Code Here

      hasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) > 0;
      hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
      hasPayloads = fieldInfo.hasPayloads();

      BytesRef term;
      DocsEnum docsEnum = null;
      DocsAndPositionsEnum docsAndPositionsEnum = null;
      final TermsEnum termsEnum = termsIn.iterator(null);
      int termOffset = 0;

      final IntArrayWriter scratch = new IntArrayWriter();

      // Used for payloads, if any:
      final RAMOutputStream ros = new RAMOutputStream();

      // if (DEBUG) {
      //   System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
      // }

      while ((term = termsEnum.next()) != null) {
        final int docFreq = termsEnum.docFreq();
        final long totalTermFreq = termsEnum.totalTermFreq();

        // if (DEBUG) {
        //   System.out.println("  term=" + term.utf8ToString());
        // }

        termOffsets[count] = termOffset;

        if (termBytes.length < (termOffset + term.length)) {
          termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
        }
        System.arraycopy(term.bytes, term.offset, termBytes, termOffset, term.length);
        termOffset += term.length;
        termOffsets[count+1] = termOffset;

        if (hasPos) {
          docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
        } else {
          docsEnum = termsEnum.docs(null, docsEnum);
        }

        final TermAndSkip ent;

        final DocsEnum docsEnum2;
        if (hasPos) {
          docsEnum2 = docsAndPositionsEnum;
        } else {
          docsEnum2 = docsEnum;
        }

        int docID;

        if (docFreq <= lowFreqCutoff) {

          ros.reset();

          // Pack postings for low-freq terms into a single int[]:
          while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            scratch.add(docID);
            if (hasFreq) {
              final int freq = docsEnum2.freq();
              scratch.add(freq);
              if (hasPos) {
                for(int pos=0;pos<freq;pos++) {
                  scratch.add(docsAndPositionsEnum.nextPosition());
                  if (hasOffsets) {
                    scratch.add(docsAndPositionsEnum.startOffset());
                    scratch.add(docsAndPositionsEnum.endOffset());
                  }
                  if (hasPayloads) {
                    final BytesRef payload = docsAndPositionsEnum.getPayload();
                    if (payload != null) {
                      scratch.add(payload.length);
                      ros.writeBytes(payload.bytes, payload.offset, payload.length);
                    } else {
                      scratch.add(0);
                    }
                  }
                }
              }
            }
          }

          final byte[] payloads;
          if (hasPayloads) {
            payloads = new byte[(int) ros.getFilePointer()];
            ros.writeTo(payloads, 0);
          } else {
            payloads = null;
          }

          final int[] postings = scratch.get();
       
          ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
        } else {
          final int[] docs = new int[docFreq];
          final int[] freqs;
          final int[][] positions;
          final byte[][][] payloads;
          if (hasFreq) {
            freqs = new int[docFreq];
            if (hasPos) {
              positions = new int[docFreq][];
              if (hasPayloads) {
                payloads = new byte[docFreq][][];
              } else {
                payloads = null;
              }
            } else {
              positions = null;
              payloads = null;
            }
          } else {
            freqs = null;
            positions = null;
            payloads = null;
          }

          // Use separate int[] for the postings for high-freq
          // terms:
          int upto = 0;
          while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            docs[upto] = docID;
            if (hasFreq) {
              final int freq = docsEnum2.freq();
              freqs[upto] = freq;
              if (hasPos) {
                final int mult;
                if (hasOffsets) {
                  mult = 3;
View Full Code Here

  @Test
  public void testDocsEnum() throws Exception {
    Bits mappedLiveDocs = randomLiveDocs(reader.maxDoc());
    TermsEnum termsEnum = reader.terms(DOCS_ENUM_FIELD).iterator(null);
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOCS_ENUM_TERM)));
    DocsEnum docs = termsEnum.docs(mappedLiveDocs, null);

    int doc;
    int prev = -1;
    while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(doc));
      assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(reader.document(doc).get(ID_FIELD)));
      while (++prev < doc) {
        assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
      }
    }
    while (++prev < reader.maxDoc()) {
      assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
    }

    DocsEnum reuse = docs;
    docs = termsEnum.docs(mappedLiveDocs, reuse);
    if (docs instanceof SortingDocsEnum) {
      assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
    }
    doc = -1;
View Full Code Here

    int count = 0;
    BytesRef term;
    while ((term = te.next()) != null) {
      OpenBitSet termBitset = new OpenBitSet(reader.maxDoc());
      DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, contentField, term);
      int docID;
      while ((docID = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        //check to see if we don't have an deletions (null) or if document is live
        if (liveDocs != null && !liveDocs.get(docID)) {
          // document is deleted...
          termBitset.set(docsEnum.docID());
        }
      }
      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
View Full Code Here

          }
        }

        final TermsEnum termsEnum = termsEnum(terms);

        DocsEnum docs = null;
        FixedBitSet docsWithField = null;
        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          visitTerm(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            visitDoc(docID);
            if (setDocsWithField) {
View Full Code Here

      Terms terms = indexReader.terms("body");
      TermsEnum iterator = terms.iterator(null);
      IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
      MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc());
      while ((iterator.next()) != null) {
        DocsEnum docs = iterator.docs(random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()), null, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
     
      assertEquals(terms.size(), enums.size());
    }
View Full Code Here

    for (AtomicReaderContext ctx : open.leaves()) {
      Terms terms = ctx.reader().terms("body");
      TermsEnum iterator = terms.iterator(null);
      IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
      MatchNoBits bits = new Bits.MatchNoBits(open.maxDoc());
      DocsEnum docs = null;
      while ((iterator.next()) != null) {
        docs = iterator.docs(bits, docs, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
     
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.DocsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.