Examples of org.apache.lucene.index.Terms

org.apache.lucene.index.Terms
Access to the terms in a specific field. See {@link Fields}. @lucene.experimental

      assert !terms.isEmpty();
      final AtomicReader reader = context.reader();
      final Bits liveDocs = acceptDocs;
      PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];


      final Terms fieldTerms = reader.terms(field);
      if (fieldTerms == null) {
        return null;
      }


      // Reuse single TermsEnum below:
      final TermsEnum te = fieldTerms.iterator(null);
      
      for (int i = 0; i < terms.size(); i++) {
        final Term t = terms.get(i);
        final TermState state = states[i].get(context.ord);
        if (state == null) { /* term doesnt exist in this segment */

View Full Code Here

          return null;
        } else {
          final Info info = getInfo(i);
          info.sortTerms();


          return new Terms() {
            @Override 
            public TermsEnum iterator(TermsEnum reuse) {
              return new MemoryTermsEnum(info);
            }

View Full Code Here

    try {
      final BytesRef catTerm = new BytesRef(FacetsConfig.pathToString(categoryPath.components, categoryPath.length));
      TermsEnum termsEnum = null; // reuse
      DocsEnum docs = null; // reuse
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) {
          termsEnum = terms.iterator(termsEnum);
          if (termsEnum.seekExact(catTerm)) {
            // liveDocs=null because the taxonomy has no deletes
            docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
            // if the term was found, we know it has exactly one document.
            doc = docs.nextDoc() + ctx.docBase;

View Full Code Here

    DirectoryReader reader = readerManager.acquire();
    try {
      TermsEnum termsEnum = null;
      DocsEnum docsEnum = null;
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) { // cannot really happen, but be on the safe side
          termsEnum = terms.iterator(termsEnum);
          while (termsEnum.next() != null) {
            if (!cache.isFull()) {
              BytesRef t = termsEnum.term();
              // Since we guarantee uniqueness of categories, each term has exactly
              // one document. Also, since we do not allow removing categories (and

View Full Code Here

      int base = 0;
      TermsEnum te = null;
      DocsEnum docs = null;
      for (final AtomicReaderContext ctx : r.leaves()) {
        final AtomicReader ar = ctx.reader();
        final Terms terms = ar.terms(Consts.FULL);
        te = terms.iterator(te);
        while (te.next() != null) {
          FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
          final int ordinal = addCategory(cp);
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          ordinalMap.addMapping(docs.nextDoc() + base, ordinal);

View Full Code Here


  private final Map<String,SimpleTextTerms> termsCache = new HashMap<String,SimpleTextTerms>();


  @Override
  synchronized public Terms terms(String field) throws IOException {
    Terms terms = termsCache.get(field);
    if (terms == null) {
      Long fp = fields.get(field);
      if (fp == null) {
        return null;
      } else {

View Full Code Here


      final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>();
      if (multipleValuesPerDocument) {
        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);


                for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
                  if (!docToJoinScore.containsKey(doc)) {
                    docToJoinScore.put(doc, joinScore);
                  }
                }
              }
            }
          }
        } else {
          toSearcher.search(new MatchAllDocsQuery(), new Collector() {


            private SortedSetDocValues docTermOrds;
            private final BytesRef scratch = new BytesRef();
            private int docBase;


            @Override
            public void collect(int doc) throws IOException {
              docTermOrds.setDocument(doc);
              long ord;
              while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                docTermOrds.lookupOrd(ord, scratch);
                JoinScore joinScore = joinValueToJoinScores.get(scratch);
                if (joinScore == null) {
                  continue;
                }
                Integer basedDoc = docBase + doc;
                // First encountered join value determines the score.
                // Something to keep in mind for many-to-many relations.
                if (!docToJoinScore.containsKey(basedDoc)) {
                  docToJoinScore.put(basedDoc, joinScore);
                }
              }
            }


            @Override
            public void setNextReader(AtomicReaderContext context) throws IOException {
              docBase = context.docBase;
              docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
            }


            @Override
            public boolean acceptsDocsOutOfOrder() {return false;}
            @Override
            public void setScorer(Scorer scorer) {}
          });
        }
      } else {
        toSearcher.search(new MatchAllDocsQuery(), new Collector() {


          private BinaryDocValues terms;
          private int docBase;
          private final BytesRef spare = new BytesRef();


          @Override
          public void collect(int doc) {
            terms.get(doc, spare);
            JoinScore joinScore = joinValueToJoinScores.get(spare);
            if (joinScore == null) {
              return;
            }
            docToJoinScore.put(docBase + doc, joinScore);

View Full Code Here

  }


  private double getTextTermFreqForClass(BytesRef c) throws IOException {
    double avgNumberOfUniqueTerms = 0;
    for (String textFieldName : textFieldNames) {
      Terms terms = MultiFields.getTerms(atomicReader, textFieldName);
      long numPostings = terms.getSumDocFreq(); // number of term/doc pairs
      avgNumberOfUniqueTerms += numPostings / (double) terms.getDocCount(); // avg # of unique terms per doc
    }
    int docsWithC = atomicReader.docFreq(new Term(classFieldName, c));
    return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
  }

View Full Code Here


  private static List<Term> sample(Random random, IndexReader reader, int size) throws IOException {
    List<Term> sample = new ArrayList<Term>();
    Fields fields = MultiFields.getFields(reader);
    for (String field : fields) {
      Terms terms = fields.terms(field);
      assertNotNull(terms);
      TermsEnum termsEnum = terms.iterator(null);
      while (termsEnum.next() != null) {
        if (sample.size() >= size) {
          int pos = random.nextInt(size);
          sample.set(pos, new Term(field, termsEnum.term()));
        } else {

View Full Code Here

      bi.setText(content);
      int doc = docids[i];
      int leaf = ReaderUtil.subIndex(doc, leaves);
      AtomicReaderContext subContext = leaves.get(leaf);
      AtomicReader r = subContext.reader();
      Terms t = r.terms(field);
      if (t == null) {
        continue; // nothing to do
      }
      if (leaf != lastLeaf) {
        termsEnum = t.iterator(null);
        postings = new DocsAndPositionsEnum[allTerms.length];
      }
      if (automata.length > 0) {
        DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata);
        dp.advance(doc - subContext.docBase);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.index.Terms

cc.twittertools.index.ExtractTermStatisticsFromIndex

com.basho.yokozuna.handler.EntropyData

com.browseengine.bobo.facets.data.FacetDataCache

com.browseengine.bobo.facets.data.MultiValueFacetDataCache

com.browseengine.bobo.facets.data.MultiValueWithWeightFacetDataCache

com.browseengine.bobo.facets.impl.CompactMultiValueFacetHandler

com.browseengine.bobo.sort.SortCollectorImpl

org.apache.blur.index.ExitableReader$ExitableFields

org.apache.blur.lucene.warmup.IndexWarmup

org.apache.blur.manager.IndexManager

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.