Examples of Posting

ivory.core.data.index.Posting
Object representing a posting. A posting contains two elements: a docno and a score. In most cases, the score is the term frequency of a term within the document, but in the case of impact-based indexes, the score is the impact score. @author Jimmy Lin
org.terrier.structures.postings.Posting
This interface represents one posting in a posting list. From a Posting object, the id of the posting list entry (e.g. document id for inverted index, or term id for direct index), the frequency, and the length of the document can be accessed. @since 3.0 @author Craig Macdonald

Examples of ivory.core.data.index.Posting


  private static void testTerm(RetrievalEnvironment env, String term) {
    long startTime = System.currentTimeMillis();


    PostingsReader reader = null;
    Posting p = new Posting();
    int df = 0;
    String termOrig = term;
    String termTokenized = env.tokenize(termOrig)[0];


    LOG.info("term=" + termOrig + ", tokenized=" + termTokenized);

View Full Code Here

Examples of ivory.core.data.index.Posting

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);


    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;


    BloomConfig bloomConfig =  new BloomConfig((int) env.getDocumentCount(),
                                               collectionSize, nbHash, bitsPerElement);
    //Deletes the output path if it already exists.
    fs.delete(new Path(outputPath), true);


    //Serialize and write the configuration parameters.
    out = fs.create(new Path(outputPath + "/" + BloomConfig.CONFIG_FILE));
    bloomConfig.write(out);
    out.close();


    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }


      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();
        Signature filter = null;


        //Decide which filter to use based on the configuration parameters
        int df = pl.getDf();
        if (df <= bloomConfig.getIdentityHashThreshold()) {
          filter = new BloomFilterHash(df * bloomConfig.getBitsPerElement(),
                                       bloomConfig.getHashCount());
        } else {
          filter = new BloomFilterIdentityHash(bloomConfig.getDocumentCount());
        }


        while (reader.nextPosting(posting)) {
          filter.add(newDocids[posting.getDocno()]);
        }


        out.writeInt(i);
        out.writeInt(df);
        filter.write(out);

View Full Code Here

Examples of ivory.core.data.index.Posting

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);


    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;


    out = fs.create(new Path(outputPath + "/" + CompressedPostingsIO.LENGTH_FILE));
    out.writeInt(collectionSize);
    out.close();


    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }


      if(i % 1000 == 0) {
        LOGGER.info(i + " posting lists prepared...");
      }


      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();


        int[] data = new int[pl.getDf()];
        int index = 0;
        while (reader.nextPosting(posting)) {
          data[index++] = newDocids[posting.getDocno()];
        }
        Arrays.sort(data);
        CompressedPostings compPostings = CompressedPostings.newInstance(data);


        out.writeInt(i);

View Full Code Here

Examples of org.terrier.structures.postings.Posting

    List<Posting> postingList = new ArrayList<Posting>();
    int doclen = 0;
    TIntHashSet foundIds = new TIntHashSet();
    while(documentPostings.hasNext())
    {
      final Posting p = documentPostings.next().asWritablePosting();
      //check for duplicate pointers
      if (! foundIds.contains(p.getId()) )
      {
        postingList.add(p);
        doclen += p.getFrequency();
        reporter.progress();
        foundIds.add(p.getId());
      }
      else
      {
        dupPointers++;
      }

View Full Code Here

Examples of org.terrier.structures.postings.Posting

  {
    BitIndexPointer pointer = new SimpleBitIndexPointer();
    pointer.setOffset(output.getByteOffset(), output.getBitOffset());
    int numberOfEntries = 0;
    
    Posting posting = null;
    while(iterator.hasNext())
    {
      posting = iterator.next();
      output.writeGamma(posting.getId() - previousId);
      previousId = posting.getId();
      writePostingNotDocid(posting);
      numberOfEntries++;
    }
    pointer.setNumberOfEntries(numberOfEntries);
    return pointer;

View Full Code Here

Examples of org.terrier.structures.postings.Posting

          final IterablePosting postings = dfInput2.next();
          
          List<Posting> postingList = new ArrayList<Posting>();
          while(postings.next() != IterablePosting.EOL)
          {
            final Posting p = postings.asWritablePosting();
            p.setId(termcodeHashmap.get(postings.getId()));
            postingList.add(p);
          }
          Collections.sort(postingList, new PostingIdComparator());
          pointerDF = dfOutput.writePostings(postingList.iterator());
        }

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.