Examples of Posting


Examples of ivory.core.data.index.Posting

  private static void testTerm(RetrievalEnvironment env, String term) {
    long startTime = System.currentTimeMillis();

    PostingsReader reader = null;
    Posting p = new Posting();
    int df = 0;
    String termOrig = term;
    String termTokenized = env.tokenize(termOrig)[0];

    LOG.info("term=" + termOrig + ", tokenized=" + termTokenized);
View Full Code Here

Examples of ivory.core.data.index.Posting

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;

    BloomConfig bloomConfig =  new BloomConfig((int) env.getDocumentCount(),
                                               collectionSize, nbHash, bitsPerElement);
    //Deletes the output path if it already exists.
    fs.delete(new Path(outputPath), true);

    //Serialize and write the configuration parameters.
    out = fs.create(new Path(outputPath + "/" + BloomConfig.CONFIG_FILE));
    bloomConfig.write(out);
    out.close();

    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();
        Signature filter = null;

        //Decide which filter to use based on the configuration parameters
        int df = pl.getDf();
        if (df <= bloomConfig.getIdentityHashThreshold()) {
          filter = new BloomFilterHash(df * bloomConfig.getBitsPerElement(),
                                       bloomConfig.getHashCount());
        } else {
          filter = new BloomFilterIdentityHash(bloomConfig.getDocumentCount());
        }

        while (reader.nextPosting(posting)) {
          filter.add(newDocids[posting.getDocno()]);
        }

        out.writeInt(i);
        out.writeInt(df);
        filter.write(out);
View Full Code Here

Examples of ivory.core.data.index.Posting

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;

    out = fs.create(new Path(outputPath + "/" + CompressedPostingsIO.LENGTH_FILE));
    out.writeInt(collectionSize);
    out.close();

    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }

      if(i % 1000 == 0) {
        LOGGER.info(i + " posting lists prepared...");
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();

        int[] data = new int[pl.getDf()];
        int index = 0;
        while (reader.nextPosting(posting)) {
          data[index++] = newDocids[posting.getDocno()];
        }
        Arrays.sort(data);
        CompressedPostings compPostings = CompressedPostings.newInstance(data);

        out.writeInt(i);
View Full Code Here

Examples of org.terrier.structures.postings.Posting

    List<Posting> postingList = new ArrayList<Posting>();
    int doclen = 0;
    TIntHashSet foundIds = new TIntHashSet();
    while(documentPostings.hasNext())
    {
      final Posting p = documentPostings.next().asWritablePosting();
      //check for duplicate pointers
      if (! foundIds.contains(p.getId()) )
      {
        postingList.add(p);
        doclen += p.getFrequency();
        reporter.progress();
        foundIds.add(p.getId());
      }
      else
      {
        dupPointers++;
      }
View Full Code Here

Examples of org.terrier.structures.postings.Posting

  {
    BitIndexPointer pointer = new SimpleBitIndexPointer();
    pointer.setOffset(output.getByteOffset(), output.getBitOffset());
    int numberOfEntries = 0;
   
    Posting posting = null;
    while(iterator.hasNext())
    {
      posting = iterator.next();
      output.writeGamma(posting.getId() - previousId);
      previousId = posting.getId();
      writePostingNotDocid(posting);
      numberOfEntries++;
    }
    pointer.setNumberOfEntries(numberOfEntries);
    return pointer;
View Full Code Here

Examples of org.terrier.structures.postings.Posting

          final IterablePosting postings = dfInput2.next();
         
          List<Posting> postingList = new ArrayList<Posting>();
          while(postings.next() != IterablePosting.EOL)
          {
            final Posting p = postings.asWritablePosting();
            p.setId(termcodeHashmap.get(postings.getId()));
            postingList.add(p);
          }
          Collections.sort(postingList, new PostingIdComparator());
          pointerDF = dfOutput.writePostings(postingList.iterator());
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.