Package org.apache.lucene.index

Examples of org.apache.lucene.index.Terms


      assert !terms.isEmpty();
      final AtomicReader reader = context.reader();
      final Bits liveDocs = acceptDocs;
      PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];

      final Terms fieldTerms = reader.terms(field);
      if (fieldTerms == null) {
        return null;
      }

      // Reuse single TermsEnum below:
      final TermsEnum te = fieldTerms.iterator(null);
     
      for (int i = 0; i < terms.size(); i++) {
        final Term t = terms.get(i);
        final TermState state = states[i].get(context.ord);
        if (state == null) { /* term doesnt exist in this segment */
 
View Full Code Here


          return null;
        } else {
          final Info info = getInfo(i);
          info.sortTerms();

          return new Terms() {
            @Override
            public TermsEnum iterator(TermsEnum reuse) {
              return new MemoryTermsEnum(info);
            }
View Full Code Here

    try {
      final BytesRef catTerm = new BytesRef(FacetsConfig.pathToString(categoryPath.components, categoryPath.length));
      TermsEnum termsEnum = null; // reuse
      DocsEnum docs = null; // reuse
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) {
          termsEnum = terms.iterator(termsEnum);
          if (termsEnum.seekExact(catTerm)) {
            // liveDocs=null because the taxonomy has no deletes
            docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
            // if the term was found, we know it has exactly one document.
            doc = docs.nextDoc() + ctx.docBase;
View Full Code Here

    DirectoryReader reader = readerManager.acquire();
    try {
      TermsEnum termsEnum = null;
      DocsEnum docsEnum = null;
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) { // cannot really happen, but be on the safe side
          termsEnum = terms.iterator(termsEnum);
          while (termsEnum.next() != null) {
            if (!cache.isFull()) {
              BytesRef t = termsEnum.term();
              // Since we guarantee uniqueness of categories, each term has exactly
              // one document. Also, since we do not allow removing categories (and
View Full Code Here

      int base = 0;
      TermsEnum te = null;
      DocsEnum docs = null;
      for (final AtomicReaderContext ctx : r.leaves()) {
        final AtomicReader ar = ctx.reader();
        final Terms terms = ar.terms(Consts.FULL);
        te = terms.iterator(te);
        while (te.next() != null) {
          FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
          final int ordinal = addCategory(cp);
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
View Full Code Here

  private final Map<String,SimpleTextTerms> termsCache = new HashMap<String,SimpleTextTerms>();

  @Override
  synchronized public Terms terms(String field) throws IOException {
    Terms terms = termsCache.get(field);
    if (terms == null) {
      Long fp = fields.get(field);
      if (fp == null) {
        return null;
      } else {
View Full Code Here

      final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>();
      if (multipleValuesPerDocument) {
        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);

                for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
                  if (!docToJoinScore.containsKey(doc)) {
                    docToJoinScore.put(doc, joinScore);
                  }
                }
              }
            }
          }
        } else {
          toSearcher.search(new MatchAllDocsQuery(), new Collector() {

            private SortedSetDocValues docTermOrds;
            private final BytesRef scratch = new BytesRef();
            private int docBase;

            @Override
            public void collect(int doc) throws IOException {
              docTermOrds.setDocument(doc);
              long ord;
              while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                docTermOrds.lookupOrd(ord, scratch);
                JoinScore joinScore = joinValueToJoinScores.get(scratch);
                if (joinScore == null) {
                  continue;
                }
                Integer basedDoc = docBase + doc;
                // First encountered join value determines the score.
                // Something to keep in mind for many-to-many relations.
                if (!docToJoinScore.containsKey(basedDoc)) {
                  docToJoinScore.put(basedDoc, joinScore);
                }
              }
            }

            @Override
            public void setNextReader(AtomicReaderContext context) throws IOException {
              docBase = context.docBase;
              docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {return false;}
            @Override
            public void setScorer(Scorer scorer) {}
          });
        }
      } else {
        toSearcher.search(new MatchAllDocsQuery(), new Collector() {

          private BinaryDocValues terms;
          private int docBase;
          private final BytesRef spare = new BytesRef();

          @Override
          public void collect(int doc) {
            terms.get(doc, spare);
            JoinScore joinScore = joinValueToJoinScores.get(spare);
            if (joinScore == null) {
              return;
            }
            docToJoinScore.put(docBase + doc, joinScore);
View Full Code Here

  }

  private double getTextTermFreqForClass(BytesRef c) throws IOException {
    double avgNumberOfUniqueTerms = 0;
    for (String textFieldName : textFieldNames) {
      Terms terms = MultiFields.getTerms(atomicReader, textFieldName);
      long numPostings = terms.getSumDocFreq(); // number of term/doc pairs
      avgNumberOfUniqueTerms += numPostings / (double) terms.getDocCount(); // avg # of unique terms per doc
    }
    int docsWithC = atomicReader.docFreq(new Term(classFieldName, c));
    return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
  }
View Full Code Here

  private static List<Term> sample(Random random, IndexReader reader, int size) throws IOException {
    List<Term> sample = new ArrayList<Term>();
    Fields fields = MultiFields.getFields(reader);
    for (String field : fields) {
      Terms terms = fields.terms(field);
      assertNotNull(terms);
      TermsEnum termsEnum = terms.iterator(null);
      while (termsEnum.next() != null) {
        if (sample.size() >= size) {
          int pos = random.nextInt(size);
          sample.set(pos, new Term(field, termsEnum.term()));
        } else {
View Full Code Here

      bi.setText(content);
      int doc = docids[i];
      int leaf = ReaderUtil.subIndex(doc, leaves);
      AtomicReaderContext subContext = leaves.get(leaf);
      AtomicReader r = subContext.reader();
      Terms t = r.terms(field);
      if (t == null) {
        continue; // nothing to do
      }
      if (leaf != lastLeaf) {
        termsEnum = t.iterator(null);
        postings = new DocsAndPositionsEnum[allTerms.length];
      }
      if (automata.length > 0) {
        DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata);
        dp.advance(doc - subContext.docBase);
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.Terms

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.