Package com.browseengine.bobo.util

Examples of com.browseengine.bobo.util.BigSegmentedArray


      bits |= 0x00000001 << (i-1)
    }
   
    final int finalBits = bits;
   
    final BigSegmentedArray orderArray = dataCache.orderArray;
   
    if (indexes.length == 0)
    {
      return EmptyDocIdSet.getInstance();
    }
    else
    {
      return new RandomAccessDocIdSet()
      {
        @Override
        public DocIdSetIterator iterator()
        {
          return new CompactMultiValueFacetDocIdSetIterator(dataCache,indexes,finalBits);
        }

            @Override
            final public boolean get(int docId)
            {
              return (orderArray.get(docId) & finalBits) != 0x0;
            }
      };
    }
  }
View Full Code Here


  /* (non-Javadoc)
   * @see com.browseengine.bobo.facets.FacetCountCollector#getCountDistribution()
   */
  public BigSegmentedArray getCountDistribution() {
    BigSegmentedArray dist = null;
    if(_latPredefinedRangeIndexes != null) {
      dist = new LazyBigIntArray(_latPredefinedRangeIndexes.length);
      int n = 0;
      int start;
      int end;
      for(int[] range: _latPredefinedRangeIndexes) {
        start = range[0];
        end = range[1];
        int sum = 0;
        for(int i = start; i < end; i++) {
          sum += _latCount[i];
        }
        dist.add(n++, sum);
      }
    }
    return dist;
  }
View Full Code Here

    public DocComparator getComparator(IndexReader reader, int docbase)
        throws IOException {
      if (!(reader instanceof BoboIndexReader)) throw new IllegalStateException("reader not instance of "+BoboIndexReader.class);
      BoboIndexReader boboReader = (BoboIndexReader)reader;
      final FacetDataCache dataCache = _facetHandler.getFacetData(boboReader);
      final BigSegmentedArray orderArray = dataCache.orderArray;
      return new DocComparator() {
       
        @Override
        public Comparable value(ScoreDoc doc) {
          int index = orderArray.get(doc.doc);
              return dataCache.valArray.get(index);
        }
       
        @Override
        public int compare(ScoreDoc doc1, ScoreDoc doc2) {
          return orderArray.get(doc1.doc) - orderArray.get(doc2.doc)
        }
      };
    }
View Full Code Here

  public void load(String fieldName,IndexReader reader,TermListFactory<T> listFactory) throws IOException
  {
    String field = fieldName.intern();
    int maxDoc = reader.maxDoc();

    BigSegmentedArray order = this.orderArray;
    if (order == null) // we want to reuse the memory
    {
      order = newInstance(_termCountSize, maxDoc);
    } else
    {
      order.ensureCapacity(maxDoc); // no need to fill to 0, we are reseting the
                                    // data anyway
    }
    this.orderArray = order;

    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int length = maxDoc + 1;
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    TermDocs termDocs = reader.termDocs();
    TermEnum termEnum = reader.terms(new Term(field, ""));
    int t = 0; // current term number

    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    int totalFreq = 0;
    // int df = 0;
    t++;
    try
    {
      do
      {
        Term term = termEnum.term();
        if (term == null || term.field() != field)
          break;

        if (t > order.maxValue())
        {
          throw new IOException("maximum number of value cannot exceed: "
              + order.maxValue());
        }
        // store term text
        // we expect that there is at most one term per document
        if (t >= length)
          throw new RuntimeException("there are more terms than "
              + "documents in field \"" + field
              + "\", but it's impossible to sort on " + "tokenized fields");
        list.add(term.text());
        termDocs.seek(termEnum);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int df = 0;
        if (termDocs.next())
        {
          df++;
          int docid = termDocs.doc();
          order.add(docid, t);
          minID = docid;
          while (termDocs.next())
          {
            df++;
            docid = termDocs.doc();
            order.add(docid, t);
          }
          maxID = docid;
        }
        freqList.add(df);
        totalFreq += df;
        minIDList.add(minID);
        maxIDList.add(maxID);

        t++;
      } while (termEnum.next());
    } finally
    {
      termDocs.close();
      termEnum.close();
    }
    list.seal();

    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();

    int doc = 0;
    while (doc <= maxDoc && order.get(doc) != 0)
    {
      ++doc;
    }
    if (doc <= maxDoc)
    {
      this.minIDs[0] = doc;
      // Try to get the max
      doc = maxDoc;
      while (doc > 0 && order.get(doc) != 0)
      {
        --doc;
      }
      if (doc > 0)
      {
View Full Code Here

      throws IOException {
    String field = fieldName.intern();
    int maxDoc = reader.maxDoc();

    int dictValueCount = getDictValueCount(reader, fieldName);
    BigSegmentedArray order = newInstance(dictValueCount, maxDoc);
    this.orderArray = order;

    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int length = maxDoc + 1;
    @SuppressWarnings("unchecked")
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    int negativeValueCount = getNegativeValueCount(reader, field);

    int t = 1; // valid term id starts from 1
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    int totalFreq = 0;
    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        // store term text
        // we expect that there is at most one term per document
        if (t >= length) throw new RuntimeException("there are more terms than "
            + "documents in field \"" + field + "\", but it's impossible to sort on "
            + "tokenized fields");
        String strText = text.utf8ToString();
        list.add(strText);
        Term term = new Term(field, strText);
        DocsEnum docsEnum = reader.termDocsEnum(term);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int docID = -1;
        int df = 0;
        int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
        while ((docID = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          df++;
          order.add(docID, valId);
          minID = docID;
          while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
            docID = docsEnum.docID();
            df++;
            order.add(docID, valId);
          }
          maxID = docID;
        }
        freqList.add(df);
        totalFreq += df;
        minIDList.add(minID);
        maxIDList.add(maxID);
        t++;
      }
    }

    list.seal();
    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();

    int doc = 0;
    while (doc < maxDoc && order.get(doc) != 0) {
      ++doc;
    }
    if (doc < maxDoc) {
      this.minIDs[0] = doc;
      // Try to get the max
      doc = maxDoc - 1;
      while (doc >= 0 && order.get(doc) != 0) {
        --doc;
      }
      this.maxIDs[0] = doc;
    }
    this.freqs[0] = maxDoc - totalFreq;
View Full Code Here

    public DocComparator getComparator(AtomicReader reader, int docbase) throws IOException {
      if (!(reader instanceof BoboSegmentReader)) throw new IllegalStateException(
          "reader not instance of " + BoboSegmentReader.class);
      BoboSegmentReader boboReader = (BoboSegmentReader) reader;
      final FacetDataCache<?> dataCache = _facetHandler.getFacetData(boboReader);
      final BigSegmentedArray orderArray = dataCache.orderArray;
      return new DocComparator() {
        @Override
        public Comparable<?> value(ScoreDoc doc) {
          int index = orderArray.get(doc.doc);
          return dataCache.valArray.getComparableValue(index);
        }

        @Override
        public int compare(ScoreDoc doc1, ScoreDoc doc2) {
          return orderArray.get(doc1.doc) - orderArray.get(doc2.doc);
        }
      };
    }
View Full Code Here

   * (non-Javadoc)
   * @see com.browseengine.bobo.facets.FacetCountCollector#getCountDistribution()
   */
  @Override
  public BigSegmentedArray getCountDistribution() {
    BigSegmentedArray dist = null;
    if (_latPredefinedRangeIndexes != null) {
      dist = new LazyBigIntArray(_latPredefinedRangeIndexes.length);
      int n = 0;
      int start;
      int end;
      for (int[] range : _latPredefinedRangeIndexes) {
        start = range[0];
        end = range[1];
        int sum = 0;
        for (int i = start; i < end; i++) {
          sum += _latCount[i];
        }
        dist.add(n++, sum);
      }
    }
    return dist;
  }
View Full Code Here

  /**
   * @return Count distribution for all the user specified range values
   */
  @Override
  public BigSegmentedArray getCountDistribution() {
    BigSegmentedArray dist = null;
    if (_predefinedRanges != null) {
      dist = new LazyBigIntArray(_predefinedRanges.size());
      int distIdx = 0;
      for (int i = 0; i < _count.size(); i++) {
        int count = _count.get(i);
        dist.add(distIdx++, count);
      }
    }
    return dist;
  }
View Full Code Here

      if (startIdx < 0) startIdx = -(startIdx + 1);

      int endIdx = _valArray.indexOf(_end);
      if (endIdx < 0) endIdx = -(endIdx + 1);

      BigSegmentedArray baseCounts = _baseCollector.getCountDistribution();
      if (_start instanceof Long) {
        long start = _start.longValue();
        long unit = _unit.longValue();
        TermLongList valArray = (TermLongList) _valArray;
        for (int i = startIdx; i < endIdx; i++) {
          long val = valArray.getPrimitiveValue(i);
          int idx = (int) ((val - start) / unit);
          if (idx >= 0 && idx < _count.size()) {
            _count.add(idx, _count.get(idx) + baseCounts.get(i));
          }
        }
      } else if (_start instanceof Integer) {
        int start = _start.intValue();
        int unit = _unit.intValue();
        TermIntList valArray = (TermIntList) _valArray;
        for (int i = startIdx; i < endIdx; i++) {
          int val = valArray.getPrimitiveValue(i);
          int idx = ((val - start) / unit);
          if (idx >= 0 && idx < _count.size()) {
            _count.add(idx, _count.get(idx) + baseCounts.get(i));
          }
        }
      } else {
        double start = _start.doubleValue();
        double unit = _unit.doubleValue();
        for (int i = startIdx; i < endIdx; i++) {
          Number val = (Number) _valArray.getRawValue(i);
          int idx = (int) ((val.doubleValue() - start) / unit);
          if (idx >= 0 && idx < _count.size()) {
            _count.add(idx, _count.get(idx) + baseCounts.get(i));
          }
        }
      }
    }
View Full Code Here

      throws IOException {
    String field = fieldName.intern();
    int maxDoc = reader.maxDoc();

    int dictValueCount = getDictValueCount(reader, fieldName);
    BigSegmentedArray order = newInstance(dictValueCount, maxDoc);
    this.orderArray = order;

    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int length = maxDoc + 1;
    @SuppressWarnings("unchecked")
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    int negativeValueCount = getNegativeValueCount(reader, field);

    int t = 1; // valid term id starts from 1
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    int totalFreq = 0;
    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        // store term text
        // we expect that there is at most one term per document
        if (t >= length) throw new RuntimeException("there are more terms than "
            + "documents in field \"" + field + "\", but it's impossible to sort on "
            + "tokenized fields");
        String strText = text.utf8ToString();
        list.add(strText);
        Term term = new Term(field, strText);
        DocsEnum docsEnum = reader.termDocsEnum(term);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int docID = -1;
        int df = 0;
        int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
        while ((docID = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          df++;
          order.add(docID, valId);
          minID = docID;
          while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
            docID = docsEnum.docID();
            df++;
            order.add(docID, valId);
          }
          maxID = docID;
        }
        freqList.add(df);
        totalFreq += df;
        minIDList.add(minID);
        maxIDList.add(maxID);
        t++;
      }
    }

    list.seal();
    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();

    int doc = 0;
    while (doc < maxDoc && order.get(doc) != 0) {
      ++doc;
    }
    if (doc < maxDoc) {
      this.minIDs[0] = doc;
      // Try to get the max
      doc = maxDoc - 1;
      while (doc >= 0 && order.get(doc) != 0) {
        --doc;
      }
      this.maxIDs[0] = doc;
    }
    this.freqs[0] = reader.numDocs() - totalFreq;
View Full Code Here

TOP

Related Classes of com.browseengine.bobo.util.BigSegmentedArray

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.