Examples of it.unimi.dsi.fastutil.ints.IntArrayList

it.unimi.dsi.fastutil.ints.IntArrayList

    BufferedLoader loader = getBufferedLoader(maxdoc, workArea);


    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();
    OpenBitSet bitset = new OpenBitSet(maxdoc + 1);


    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;
    
    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if (term == null || !fieldName.equals(term.field()))
            break;


          String val = term.text();


          if (val != null)
          {
            list.add(val);


            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if(!loader.add(docid, t)) logOverflow(fieldName);
              minID = docid;
              bitset.fastSet(docid);
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!loader.add(docid, t)) logOverflow(fieldName);
                bitset.fastSet(docid);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }


          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }


    list.seal();


    try
    {
      _nestedArray.load(maxdoc + 1, loader);
    }
    catch (IOException e)
    {
      throw e;
    }
    catch (Exception e)
    {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }
    
    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();


    int doc = 0;
    while (doc <= maxdoc && !_nestedArray.contains(doc, 0, true))

View Full Code Here

    }
    
    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();
    OpenBitSet bitset = new OpenBitSet(maxdoc + 1);


    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;


    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if(term == null || !fieldName.equals(term.field()))
            break;
          
          String val = term.text();
          
          if (val != null)
          {
            list.add(val);
            
            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if (!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              minID = docid;
              bitset.fastSet(docid);
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!_nestedArray.addData(docid, t)) logOverflow(fieldName);
                bitset.fastSet(docid);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }
          
          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }
    
    list.seal();
    
    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();


    int doc = 0;
    while (doc <= maxdoc && !_nestedArray.contains(doc, 0, true))

View Full Code Here


    BigIntArray order = new BigIntArray(maxDoc);


    TermValueList mterms = _termListFactory == null ? new TermStringList() : _termListFactory.createTermList();
    
    IntArrayList minIDList=new IntArrayList();
      IntArrayList maxIDList=new IntArrayList();
      IntArrayList freqList = new IntArrayList();
      
    TermDocs termDocs = null;
    TermEnum termEnum = null;
    int t = 0; // current term number
    mterms.add(null);
    minIDList.add(-1);
      maxIDList.add(-1);
      freqList.add(0);
    t++;
    try {
      termDocs = reader.termDocs();
      termEnum = reader.terms(new Term(_indexFieldName, ""));
      do {
        if (termEnum == null)
          break;
        Term term = termEnum.term();
        if (term == null || !_indexFieldName.equals(term.field()))
          break;


        // store term text
        // we expect that there is at most one term per document
        if (t > MAX_VAL_COUNT) {
          throw new IOException("maximum number of value cannot exceed: "
              + MAX_VAL_COUNT);
        }
        String val = term.text();
        mterms.add(val);
        int bit = (0x00000001 << (t-1));
        termDocs.seek(termEnum);
        //freqList.add(termEnum.docFreq());  // removed because the df doesn't take into account the num of deletedDocs
        int df = 0;
        int minID=-1;
            int maxID=-1;
            if(termDocs.next())
            {
              df++;
                  int docid = termDocs.doc();
                  order.add(docid, order.get(docid) | bit);
                  minID = docid;
                  while (termDocs.next())
              {
                    df++;
                    docid = termDocs.doc();
                    order.add(docid, order.get(docid) | bit);
              }
          maxID = docid;
            }
              freqList.add(df);
        minIDList.add(minID);
            maxIDList.add(maxID);
        t++;
      } while (termEnum.next());
    } finally {
      try {
        if (termDocs != null) {
          termDocs.close();
        }
      } finally {
        if (termEnum != null) {
          termEnum.close();
        }
      }
    }
    
    mterms.seal();


    return new FacetDataCache(order,mterms,freqList.toIntArray(),minIDList.toIntArray(),maxIDList.toIntArray(),TermCountSize.large);
  }

View Full Code Here


  @Override
  protected List<?> buildPrimitiveList(int capacity)
  {
    _type = Integer.class;
    return capacity > 0 ? new IntArrayList(capacity) : new IntArrayList();
  }

View Full Code Here

      order.ensureCapacity(maxDoc); // no need to fill to 0, we are reseting the
                                    // data anyway
    }
    this.orderArray = order;


    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();


    int length = maxDoc + 1;
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    TermDocs termDocs = reader.termDocs();
    TermEnum termEnum = reader.terms(new Term(field, ""));
    int t = 0; // current term number


    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    int totalFreq = 0;
    // int df = 0;
    t++;
    try
    {
      do
      {
        Term term = termEnum.term();
        if (term == null || term.field() != field)
          break;


        if (t > order.maxValue())
        {
          throw new IOException("maximum number of value cannot exceed: "
              + order.maxValue());
        }
        // store term text
        // we expect that there is at most one term per document
        if (t >= length)
          throw new RuntimeException("there are more terms than "
              + "documents in field \"" + field
              + "\", but it's impossible to sort on " + "tokenized fields");
        list.add(term.text());
        termDocs.seek(termEnum);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int df = 0;
        if (termDocs.next())
        {
          df++;
          int docid = termDocs.doc();
          order.add(docid, t);
          minID = docid;
          while (termDocs.next())
          {
            df++;
            docid = termDocs.doc();
            order.add(docid, t);
          }
          maxID = docid;
        }
        freqList.add(df);
        totalFreq += df;
        minIDList.add(minID);
        maxIDList.add(maxID);


        t++;
      } while (termEnum.next());
    } finally
    {
      termDocs.close();
      termEnum.close();
    }
    list.seal();


    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();


    int doc = 0;
    while (doc <= maxDoc && order.get(doc) != 0)

View Full Code Here

    this.freqs[0] = maxDoc + 1 - totalFreq;
  }
  
  private static int[] convertString(FacetDataCache dataCache,String[] vals)
  {
      IntList list = new IntArrayList(vals.length);
      for (int i=0;i<vals.length;++i)
      {
        int index = dataCache.valArray.indexOf(vals[i]);
        if (index>=0)
        {
          list.add(index);
        }
      }
      return list.toIntArray();
  }

View Full Code Here

   * @return the array of order indices of the values.
   */
  public static <T> int[] convert(FacetDataCache<T> dataCache,T[] vals)
  {
    if (vals!=null && (vals instanceof String[])) return convertString(dataCache, (String[])vals);
    IntList list = new IntArrayList(vals.length);
    for (int i=0;i<vals.length;++i)
    {
      int index = dataCache.valArray.indexOfWithType(vals[i]);
      if (index>=0)
      {
        list.add(index);
      }
    }
    return list.toIntArray();
  }

View Full Code Here


    String input = cmdline.getOptionValue(INPUT);
    String output = cmdline.getOptionValue(OUTPUT);


    List<String> stringList = Lists.newArrayList();
    IntArrayList intList = new IntArrayList();


    // First read lines into sorted map to sort input.
    Object2IntAVLTreeMap<String> tree = new Object2IntAVLTreeMap<String>();
    BufferedReader br = new BufferedReader(new FileReader(input));
    String line;
    while ((line = br.readLine()) != null) {
      String[] arr = line.split("\\t");
      if ( arr[0] == null || arr[0].length() == 0) {
        LOG.info("Skipping invalid line: " + line);
      }
      tree.put(arr[0], Integer.parseInt(arr[1]));
    }
    br.close();


    // Extracted sorted strings and ints.
    for (Object2IntMap.Entry<String> map : tree.object2IntEntrySet()) {
      stringList.add(map.getKey());
      intList.add(map.getIntValue());
    }
    
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);


    FSDataOutputStream os = fs.create(new Path(output), true);


    ByteArrayOutputStream bytesOut;
    ObjectOutputStream objOut;
    byte[] bytes;


    // Serialize the front-coded dictionary
    FrontCodedStringList frontcodedList = new FrontCodedStringList(stringList, 8, true);


    bytesOut = new ByteArrayOutputStream();
    objOut = new ObjectOutputStream(bytesOut);
    objOut.writeObject(frontcodedList);
    objOut.close();


    bytes = bytesOut.toByteArray();
    os.writeInt(bytes.length);
    os.write(bytes);


    // Serialize the hash function
    ShiftAddXorSignedStringMap dict = new ShiftAddXorSignedStringMap(stringList.iterator(),
        new TwoStepsLcpMonotoneMinimalPerfectHashFunction<CharSequence>(stringList,
            TransformationStrategies.prefixFreeUtf16()));


    bytesOut = new ByteArrayOutputStream();
    objOut = new ObjectOutputStream(bytesOut);
    objOut.writeObject(dict);
    objOut.close();


    bytes = bytesOut.toByteArray();
    os.writeInt(bytes.length);
    os.write(bytes);


    // Serialize the ints.
    os.writeInt(intList.size());
    for (int i = 0; i < intList.size(); i++) {
      os.writeInt(intList.getInt(i));
    }
    
    os.close();
  }

View Full Code Here


        ImmutableList.Builder<PageAndPositions> builder = ImmutableList.builder();
        long nextDistinctId = 0;
        GroupByHash groupByHash = new GroupByHash(types, allChannels, 10_000);
        for (UpdateRequest request : requests) {
            IntList positions = new IntArrayList();


            int startPosition = request.getStartPosition();
            Block[] blocks = request.getBlocks();


            // Move through the positions while advancing the cursors in lockstep
            int positionCount = blocks[0].getPositionCount();
            for (int position = startPosition; position < positionCount; position++) {
                // We are reading ahead in the cursors, so we need to filter any nulls since they can not join
                if (!containsNullValue(position, blocks) && groupByHash.putIfAbsent(position, blocks) == nextDistinctId) {
                    nextDistinctId++;


                    // Only include the key if it is not already in the index
                    if (existingSnapshot.getJoinPosition(position, blocks) == UNLOADED_INDEX_KEY) {
                        positions.add(position);
                    }
                }
            }


            if (!positions.isEmpty()) {
                builder.add(new PageAndPositions(request, positions));
            }
        }


        pageAndPositions = builder.build();

View Full Code Here

    BufferedLoader weightLoader = getBufferedLoader(maxdoc, null);


    @SuppressWarnings("unchecked")
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();
    OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
    int negativeValueCount = getNegativeValueCount(reader, field);
    int t = 1; // valid term id starts from 1
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);


    _overflow = false;


    String pre = null;


    int df = 0;
    int minID = -1;
    int maxID = -1;
    int docID = -1;
    int valId = 0;


    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        String strText = text.utf8ToString();
        String val = null;
        int weight = 0;
        String[] split = strText.split("\u0000");
        if (split.length > 1) {
          val = split[0];
          weight = Integer.parseInt(split[split.length - 1]);
        } else {
          continue;
        }


        if (pre == null || !val.equals(pre)) {
          if (pre != null) {
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }
          list.add(val);
          df = 0;
          minID = -1;
          maxID = -1;
          valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
          t++;
        }


        Term term = new Term(field, strText);
        DocsEnum docsEnum = reader.termDocsEnum(term);
        if (docsEnum != null) {
          while ((docID = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            df++;


            if (!loader.add(docID, valId)) {
              logOverflow(fieldName);
            } else {
              weightLoader.add(docID, weight);
            }


            if (docID < minID) minID = docID;
            bitset.fastSet(docID);
            while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
              docID = docsEnum.docID();
              df++;
              if (!loader.add(docID, valId)) {
                logOverflow(fieldName);
              } else {
                weightLoader.add(docID, weight);
              }
              bitset.fastSet(docID);
            }
            if (docID > maxID) maxID = docID;
          }
        }
        pre = val;
      }
      if (pre != null) {
        freqList.add(df);
        minIDList.add(minID);
        maxIDList.add(maxID);
      }
    }


    list.seal();


    try {
      _nestedArray.load(maxdoc + 1, loader);
      _weightArray.load(maxdoc + 1, weightLoader);
    } catch (IOException e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }


    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();


    int doc = 0;
    while (doc < maxdoc && !_nestedArray.contains(doc, 0, true)) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of it.unimi.dsi.fastutil.ints.IntArrayList

com.browseengine.bobo.facets.data.FacetDataCache

com.browseengine.bobo.facets.data.MultiValueFacetDataCache

com.browseengine.bobo.facets.data.MultiValueWithWeightFacetDataCache

com.browseengine.bobo.facets.data.TermIntList

com.browseengine.bobo.facets.filter.FacetRangeFilter

com.browseengine.bobo.facets.impl.CompactMultiValueFacetHandler

com.facebook.presto.operator.ChannelHash

com.facebook.presto.operator.GroupByHash$ChannelBuilder

com.facebook.presto.operator.index.UnloadedIndexKeyRecordSet

com.facebook.presto.operator.JoinHash

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.