/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.cascade.retrieval;
import ivory.cascade.model.CascadeClique;
import ivory.core.ConfigurationException;
import ivory.core.RetrievalEnvironment;
import ivory.core.RetrievalException;
import ivory.smrf.model.Clique;
import ivory.smrf.model.DocumentNode;
import ivory.smrf.model.GlobalTermEvidence;
import ivory.smrf.model.GraphNode;
import ivory.smrf.model.MarkovRandomField;
import ivory.smrf.model.score.ScoringFunction;
import ivory.smrf.retrieval.Accumulator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.log4j.Logger;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/**
* @author Lidan Wang
*/
public class CascadeEval {
private static final Logger LOG = Logger.getLogger(CascadeEval.class);
static int INITIAL_STAGE_NUM_RESULTS = 20000;
/**
* Pool of accumulators.
*/
private CascadeAccumulator[] mAccumulators = null;
/**
* Sorted list of accumulators.
*/
private final PriorityQueue<CascadeAccumulator> mSortedAccumulators = new PriorityQueue<CascadeAccumulator>();
/**
* Comparator used to sort cliques by their max score.
*/
private final Comparator<Clique> maxScoreComparator = new Clique.MaxScoreComparator();
/**
* Markov Random Field that we are using to generate the ranking.
*/
private MarkovRandomField mMRF = null;
/**
* If defined, only documents within this set will be scored.
*/
private int[] mDocSet = null;
float[] accumulated_scores = null;
// Declare these so don't have to repeatedly declaring them in the methods
double[] mDocSet_tmp;
float[] accumulated_scores_tmp;
int[] order;
/**
* MRF document nodes.
*/
private List<DocumentNode> mDocNodes = null;
/**
* Maximum number of results to return.
*/
private int mNumResults;
// saved results from internalInputFile
private float[][] mSavedResults;
// K value used in cascade model
private int mK;
// Cost of this cascade model = # documents * sum of unit per document cost over the cliques
float cascadeCost = 0;
// docs that will be passed around
int[][][] keptDocs;
int[] keptDocLengths;
// single terms in cliques used in first stage, which clique number they correspond to, keyed by
// the concept, value is the cliqueNumber or termCollectionFrequency
Map<String, Integer> termToCliqueNumber = Maps.newHashMap();
Map<String, Long> cf = Maps.newHashMap();
Map<String, Integer> df = Maps.newHashMap();
// for pruning use
float meanScore = 0;
float stddev = 0;
int numQueryTerms;
public static int defaultNumDocs = 9999999;
public CascadeEval(MarkovRandomField mrf, int numResults, String qid, float[][] savedResults,
int K) {
this(mrf, null, numResults, qid, savedResults, K);
}
public CascadeEval(MarkovRandomField mrf, int[] docSet, int numResults, String qid,
float[][] savedResults, int K) {
mMRF = mrf;
mDocSet = docSet;
mNumResults = numResults;
mDocNodes = getDocNodes();
mSavedResults = savedResults;
mK = K;
// Lidan: get # query terms
numQueryTerms = mMRF.getQueryTerms().length;
keptDocs = new int[INITIAL_STAGE_NUM_RESULTS + 1][numQueryTerms][];
keptDocLengths = new int[INITIAL_STAGE_NUM_RESULTS + 1];
}
// Lidan: assuming mDocSet[] & accumulated_scores[] sorted by descending order of scores!
// Lidan: this method modifies mDocSet[] & accumulated_scores[] (class variables)
public void pruneDocuments(String pruner, float pruner_param) {
// After pruning, make sure have max(RetrievalEnvironment.mCascade_K, |retained docs|)
// documents!
int[] mDocSet_tmp = new int[mDocSet.length];
float[] accumulated_scores_tmp = new float[accumulated_scores.length];
int retainSize = 0;
if (pruner.equals("score")) {
float max_score = accumulated_scores[0];
float min_score = accumulated_scores[accumulated_scores.length - 1];
float score_threshold = (max_score - min_score) * pruner_param + min_score;
for (int i = 0; i < accumulated_scores.length; i++) {
if (score_threshold <= accumulated_scores[i]) {
retainSize++;
} else {
break;
}
}
} else if (pruner.equals("mean-max")) {
float max_score = accumulated_scores[0];
float mean_score = 0;
for (int j = 0; j < accumulated_scores.length; j++) {
mean_score += accumulated_scores[j];
}
mean_score = mean_score / (float) accumulated_scores.length;
float score_threshold = pruner_param * max_score + (1.0f - pruner_param) * mean_score;
for (int i = 0; i < accumulated_scores.length; i++) {
if (score_threshold <= accumulated_scores[i]) {
retainSize++;
} else {
break;
}
}
} else if (pruner.equals("rank")) {
// if pruner_param = 0.3 --> remove bottom 30% of the docs!
retainSize = (int) ((1.0 - pruner_param) * ((double) (mDocSet.length)));
} else if (pruner.equals("z-score")) {
// compute mean
float avgScores = 0.0f;
for (int i = 0; i < accumulated_scores.length; i++) {
avgScores += accumulated_scores[i];
}
avgScores = avgScores / (float) accumulated_scores.length;
// compute variance
float variance = 0.0f;
for (int i = 0; i < accumulated_scores.length; i++) {
variance += (accumulated_scores[i] - avgScores) * (accumulated_scores[i] - avgScores);
}
float stddev = (float) Math.sqrt(variance);
float[] z_scores = new float[accumulated_scores.length];
for (int i = 0; i < z_scores.length; i++) {
z_scores[i] = (accumulated_scores[i] - avgScores) / stddev;
}
} else {
throw new RetrievalException("PruningFunction " + pruner + " is not supported!");
}
if (retainSize < mK) {
if (mDocSet.length >= mK) {
retainSize = mK;
} else if (mK != defaultNumDocs) {
// When training the model, set the # output docs large on purpose so that output size =
// retained docs size
retainSize = mDocSet.length;
}
}
if (retainSize > mDocSet.length) {
retainSize = mDocSet.length;
}
for (int i = 0; i < retainSize; i++) {
mDocSet_tmp[i] = mDocSet[i];
accumulated_scores_tmp[i] = accumulated_scores[i];
}
mDocSet = new int[retainSize];
accumulated_scores = new float[retainSize];
for (int i = 0; i < retainSize; i++) {
mDocSet[i] = mDocSet_tmp[i];
accumulated_scores[i] = accumulated_scores_tmp[i];
}
}
// Lidan: operate on class vars mDocSet[] & accumulated_scores
public void sortDocumentsByDocnos() {
order = new int[mDocSet.length];
mDocSet_tmp = new double[mDocSet.length];
accumulated_scores_tmp = new float[mDocSet.length];
for (int i = 0; i < order.length; i++) {
order[i] = i;
mDocSet_tmp[i] = mDocSet[i];
accumulated_scores_tmp[i] = accumulated_scores[i];
}
ivory.smrf.model.constrained.ConstraintModel.Quicksort(mDocSet_tmp, order, 0, order.length - 1);
for (int i = 0; i < order.length; i++) {
mDocSet[i] = (int) mDocSet_tmp[i];
accumulated_scores[i] = accumulated_scores_tmp[order[i]];
}
}
// Total cost of the cascade model: # documents * sum of unit per document cost over each clique
public float getCascadeCost() {
// Lidan: should cast it to [0, 1]
float normalizedCost = 1.0f - (float) (Math.exp(-0.01 * cascadeCost / 50000));
return normalizedCost;
}
public Accumulator[] rank() {
if (mSavedResults != null) {
mDocSet = new int[mSavedResults.length];
accumulated_scores = new float[mSavedResults.length];
for (int i = 0; i < mSavedResults.length; i++) {
mDocSet[i] = (int) mSavedResults[i][0];
accumulated_scores[i] = mSavedResults[i][1];
}
keptDocs = new int[mDocSet.length + 1][numQueryTerms][];
keptDocLengths = new int[mDocSet.length + 1];
}
// Initialize the MRF ==> this will clear out postings readers cache!
try {
mMRF.initialize();
} catch (ConfigurationException e) {
LOG.error("Error initializing MRF. Aborting ranking!");
return null;
}
int totalCnt = mMRF.getCliques().size();
Map<Integer, Set<CascadeClique>> cascadeStages = Maps.newHashMap();
for (Clique c : mMRF.getCliques()) {
CascadeClique cc = (CascadeClique) c;
int stage = cc.getCascadeStage();
if ( cascadeStages.containsKey(stage)) {
cascadeStages.get(stage).add(cc);
} else {
cascadeStages.put(stage, Sets.newHashSet(cc));
}
}
CascadeAccumulator[] results = null;
// Cascade stage starts at 0
int cascadeStage = 0;
int cnt = 0;
String pruningFunction = null;
float pruningParameter = -1;
int termMatches = 0;
while (cnt != totalCnt) { // if not have gone thru all cascade stages
float subTotal_cascadeCost = 0;
if (cascadeStage < 1) { // only call once, then use keptDocs[][][]
mMRF.removeAllCliques();
for (CascadeClique c : cascadeStages.get(cascadeStage)) {
mMRF.addClique(c);
cnt++;
pruningFunction = c.getPruningFunction();
pruningParameter = c.getPruningParameter();
int numDocs = Integer.MAX_VALUE;
if (mDocSet == null) {
numDocs = c.getNumberOfPostings();
// (not) ignore cost of first stage from the cost model
subTotal_cascadeCost += c.cost * numDocs;
} else {
subTotal_cascadeCost += c.cost;
}
}
if (mDocSet != null) {
// Lidan: mDocSet[] & accumulated_scores[] should be sorted by doc scores!
// Lidan: this method opereates on mDocSet[] & accumulated_scores[]!
pruneDocuments(pruningFunction, pruningParameter);
// Lidan: will score all documents in the retained documenet set
mNumResults = mDocSet.length;
sortDocumentsByDocnos();
// Cost = cost of applying the feature on the retained documents after pruning
subTotal_cascadeCost = subTotal_cascadeCost * mNumResults;
} else {
// Lidan: first cascade stage, just output 20000 documents
mNumResults = INITIAL_STAGE_NUM_RESULTS;
if (cascadeStage != 0) {
System.out.println("Should be the first stage here!");
System.exit(-1);
}
}
// Create single pool of reusable accumulators.
mAccumulators = new CascadeAccumulator[mNumResults + 1];
for (int i = 0; i < mNumResults + 1; i++) {
mAccumulators[i] = new CascadeAccumulator(0, 0.0f);
}
results = executeInitialStage();
cascadeStage++;
} else {
String featureID = null;
ScoringFunction scoringFunction = null;
int mSize = -1;
String[][] concepts_this_stage = new String[totalCnt][];
float[] clique_wgts = new float[concepts_this_stage.length];
int cntConcepts = 0;
for (CascadeClique c : cascadeStages.get(cascadeStage)) {
cnt++;
pruningFunction = c.getPruningFunction();
pruningParameter = c.getPruningParameter();
featureID = c.getParamID().trim(); // termWt, orderedWt, unorderedWt
scoringFunction = c.getScoringFunction();
mSize = c.getWindowSize(); // window width
if (mSize == -1 && !(featureID.equals("termWt"))) {
throw new RetrievalException("Only term features don't support getWindowSize()! " + featureID);
}
concepts_this_stage[cntConcepts] = c.getSingleTerms();
clique_wgts[cntConcepts] = c.getWeight();
cntConcepts++;
subTotal_cascadeCost += c.cost;
}
// for use in pruning
// score-based
float max_score = results[0].score;
float min_score = results[results.length - 1].score;
float score_threshold = (max_score - min_score) * pruningParameter + min_score;
float mean_max_score_threshold = pruningParameter * max_score + (1.0f - pruningParameter) * meanScore;
// rank-based
int retainSize = (int) ((1.0 - pruningParameter) * ((double) (results.length)));
int size = 0;
// Clear priority queue.
mSortedAccumulators.clear();
float[] termCollectionFreqs = new float[cntConcepts];
float[] termDFs = new float[cntConcepts];
int[][] termIndexes = new int[cntConcepts][];
float sumScore = 0;
for (int j = 0; j < cntConcepts; j++) {
String[] singleTerms = concepts_this_stage[j];
int termIndex1 = termToCliqueNumber.get(singleTerms[0]);
if (featureID.indexOf("termWt") != -1) {
float termCollectionFreq = cf.get(singleTerms[0]);
termCollectionFreqs[j] = termCollectionFreq;
float termDF = df.get(singleTerms[0]);
termDFs[j] = termDF;
termIndexes[j] = new int[1];
termIndexes[j][0] = termIndex1;
if (singleTerms.length != 1) {
System.out.println("Should have length 1 " + singleTerms.length);
System.exit(-1);
}
} else {
int termIndex2 = termToCliqueNumber.get(singleTerms[1]);
termIndexes[j] = new int[2];
termIndexes[j][0] = termIndex1;
termIndexes[j][1] = termIndex2;
if (singleTerms.length != 2) {
System.out.println("Should have length 2 " + singleTerms.length);
System.exit(-1);
}
}
}
// iterate over results documents, which are sorted in scores
for (int i = 0; i < results.length; i++) {
// pruning, if okay, scoring, update pruning stats for next cascade stage
boolean passedPruning = false;
if (pruningFunction.equals("rank")) {
if (i < retainSize) {
passedPruning = true;
} else {
if (size < mK && mK != defaultNumDocs) {
passedPruning = true;
} else {
break;
}
}
} else if (pruningFunction.equals("score")) {
if (results[i].score > score_threshold) {
passedPruning = true;
} else {
if (size < mK && mK != defaultNumDocs) {
passedPruning = true;
} else {
break;
}
}
} else if (pruningFunction.equals("mean-max")) {
if (results[i].score > mean_max_score_threshold) {
passedPruning = true;
} else {
if (size < mK && mK != defaultNumDocs) {
passedPruning = true;
} else {
break;
}
}
} else {
throw new RetrievalException("Not supported pruner! "+pruningFunction);
}
if (passedPruning) {
size++;
int docIndex = results[i].index_into_keptDocs;
int docLen = keptDocLengths[docIndex];
float docScore_cascade = 0;
for (int j = 0; j < cntConcepts; j++) {
if (featureID.equals("termWt")) {
int termIndex1 = termIndexes[j][0];
int[] positions1 = keptDocs[docIndex][termIndex1];
int tf = 0;
if (positions1 != null) {
tf = positions1.length;
}
docScore_cascade += clique_wgts[j] * scoringFunction.getScore(tf, docLen);
} else { // term proximity
// merge into a single stream and compute matches. Assume there are only two
// terms!!!
int termIndex1 = termIndexes[j][0];
int termIndex2 = termIndexes[j][1];
int[] positions1 = keptDocs[docIndex][termIndex1];
int[] positions2 = keptDocs[docIndex][termIndex2];
int matches = 0;
if (positions1 != null && positions2 != null) { // both query terms are in the doc
termMatches++;
int[] ids = new int[positions1.length];
Arrays.fill(ids, 0);
int length = positions1.length;
int length2 = positions2.length;
int[] newPositions = new int[length + length2];
int[] newIds = new int[length + length2];
int posA = 0;
int posB = 0;
int ii = 0;
while (ii < length + length2) {
if (posB == length2 || posA < length && positions1[posA] <= positions2[posB]) {
newPositions[ii] = positions1[posA];
newIds[ii] = ids[posA];
posA++;
} else {
newPositions[ii] = positions2[posB];
newIds[ii] = 1;
posB++;
}
ii++;
}
int[] positions = newPositions;
ids = newIds;
BitSet mMatchedIds = new BitSet(2); // Assume there are only two terms!!!
if (featureID.equals("orderedWt")) {
for (ii = 0; ii < positions.length; ii++) {
mMatchedIds.clear();
int maxGap = 0;
boolean ordered = true;
mMatchedIds.set(ids[ii]);
int matchedIDCounts = 1;
int lastMatchedID = ids[ii];
int lastMatchedPos = positions[ii];
for (int jj = ii + 1; jj < positions.length; jj++) {
int curID = ids[jj];
int curPos = positions[jj];
if (!mMatchedIds.get(curID)) {
mMatchedIds.set(curID);
matchedIDCounts++;
if (curID < lastMatchedID) {
ordered = false;
}
if (curPos - lastMatchedPos > maxGap) {
maxGap = curPos - lastMatchedPos;
}
}
// stop looking if the maximum gap is too large
// or the terms appear out of order
if (maxGap > mSize || !ordered) {
break;
}
// did we match all the terms, and in order?
if (matchedIDCounts == 2 && ordered) {
matches++;
break;
}
}
}
} else if (featureID.equals("unorderedWt")) {
for (ii = 0; ii < positions.length; ii++) {
mMatchedIds.clear();
mMatchedIds.set(ids[ii]);
int matchedIDCounts = 1;
int startPos = positions[ii];
for (int jj = ii + 1; jj < positions.length; jj++) {
int curID = ids[jj];
int curPos = positions[jj];
int windowSize = curPos - startPos + 1;
if (!mMatchedIds.get(curID)) {
mMatchedIds.set(curID);
matchedIDCounts++;
}
// stop looking if we've exceeded the maximum window size
if (windowSize > mSize) {
break;
}
// did we match all the terms?
if (matchedIDCounts == 2) {
matches++;
break;
}
}
}
} else {
System.out.println("Invalid featureID " + featureID);
System.exit(-1);
}
} // end if this is a match, i.e., both query terms are in the doc
// float s = getScore(matches, docLen, RetrievalEnvironment.defaultCf,
// (float) RetrievalEnvironment.defaultDf, scoringFunctionName);
// docScore_cascade += clique_wgts[j] * s;
GlobalTermEvidence termEvidence = scoringFunction.getGlobalTermEvidence();
termEvidence.cf = RetrievalEnvironment.defaultCf;
termEvidence.df = RetrievalEnvironment.defaultDf;
scoringFunction.initialize(termEvidence, scoringFunction.getGlobalEvidence());
docScore_cascade += clique_wgts[j] * scoringFunction.getScore(matches, docLen);
} // end else it's proximity feature
} // end for (each concept)
// accumulate doc score in results[i] across cascade stages
results[i].score += docScore_cascade;
mSortedAccumulators.add(results[i]);
sumScore += results[i].score;
} // end if passed pruning
} // end iterating over docs
// order based on new scores in results[], put into priority queue
if (size != mSortedAccumulators.size()) {
throw new RetrievalException("They should be equal right here " + size + " "
+ mSortedAccumulators.size());
}
CascadeAccumulator[] results_tmp = new CascadeAccumulator[size];
meanScore = sumScore / (float) size; // update stats for use in pruning in next cascade stage
stddev = 0;
for (int i = 0; i < results_tmp.length; i++) {
results_tmp[results_tmp.length - 1 - i] = mSortedAccumulators.poll();
stddev += (results_tmp[results_tmp.length - 1 - i].score - meanScore)
* (results_tmp[results_tmp.length - 1 - i].score - meanScore);
}
results = results_tmp;
stddev = (float) Math.sqrt(stddev);
// Create single pool of reusable accumulators.
// Use mNumResults from prev iteration, since we don't know how many docs are kept until
// we're done iterating through the documents
cascadeStage++;
subTotal_cascadeCost = subTotal_cascadeCost * size;
} // end if not first stage
cascadeCost += subTotal_cascadeCost;
} // end while
CascadeAccumulator[] results_return = results;
if (results.length > mK) {
results_return = new CascadeAccumulator[mK];
for (int i = 0; i < mK; i++) {
results_return[i] = new CascadeAccumulator(results[i].docno, results[i].score);
}
}
return results_return;
}
public CascadeAccumulator[] executeInitialStage() {
// point to next position in keptDocs array that hasn't been filled
int indexCntKeptDocs = 0;
// Clear priority queue.
mSortedAccumulators.clear();
// Cliques associated with the MRF.
List<Clique> cliques = mMRF.getCliques();
if (cliques.size() == 0) {
throw new RetrievalException("Shouldn't have size 0!");
}
// Current accumulator.
CascadeAccumulator a = mAccumulators[0];
// Maximum possible score that this MRF can achieve.
float mrfMaxScore = 0.0f;
for (Clique c : cliques) {
if (!((((CascadeClique) c).getParamID()).equals("termWt"))) {
System.out
.println("In this faster cascade implementation, first stage must be term in order to get positions[] values! "
+ ((CascadeClique) c).getParamID());
System.exit(-1);
}
mrfMaxScore += c.getMaxScore();
}
// Sort cliques according to their max scores.
Collections.sort(cliques, maxScoreComparator);
// Score that must be achieved to enter result set.
double scoreThreshold = Double.NEGATIVE_INFINITY;
// Offset into document set we're currently at (if applicable).
int docsetOffset = 0;
int docno = 0;
if (mDocSet != null) {
docno = docsetOffset < mDocSet.length ? mDocSet[docsetOffset++] : Integer.MAX_VALUE;
} else {
docno = mMRF.getNextCandidate();
}
boolean firstTime = true;
while (docno < Integer.MAX_VALUE) {
for (DocumentNode documentNode : mDocNodes) {
documentNode.setDocno(docno);
}
// Document-at-a-time scoring.
float docMaxScore = mrfMaxScore;
boolean skipped = false;
float score = 0.0f;
// Lidan: accumulate document scores across the cascade stages
// if (mDocSet != null && cascadeStage != 0) {
// score = accumulated_scores[docsetOffset - 1];
// }
// for each query term, its position in a document
int[][] termPositions = new int[cliques.size()][];
int doclen = -1;
for (int i = 0; i < cliques.size(); i++) {
// Current clique that we're scoring.
CascadeClique c = (CascadeClique) cliques.get(i);
if (firstTime) {
termToCliqueNumber.put(c.getConcept().trim().toLowerCase(), i);
cf.put(c.getConcept().trim().toLowerCase(), c.termCollectionCF());
df.put(c.getConcept().trim().toLowerCase(), c.termCollectionDF());
}
if (score + docMaxScore <= scoreThreshold) {
// Advance postings readers (but don't score).
for (int j = i; j < cliques.size(); j++) {
cliques.get(j).setNextCandidate(docno + 1);
}
skipped = true;
break;
}
// Document independent cliques do not affect the ranking.
if (!c.isDocDependent()) {
continue;
}
// Update document score.
float cliqueScore = c.getPotential();
score += c.getWeight() * cliqueScore;
// Update the max score for the rest of the cliques.
docMaxScore -= c.getMaxScore();
// stuff needed for document evaluation in the next stage
int[] p = c.getPositions();
if (p != null) {
termPositions[i] = Arrays.copyOf(p, p.length);
doclen = c.getDocLen();
}
}
firstTime = false;
// Keep track of mNumResults best accumulators.
if (!skipped && score > scoreThreshold) {
a.docno = docno;
a.score = score;
a.index_into_keptDocs = indexCntKeptDocs;
keptDocLengths[indexCntKeptDocs] = doclen;
mSortedAccumulators.add(a);
// save positional information for each query term in the document
for (int j = 0; j < termPositions.length; j++) {
if (termPositions[j] != null) {
keptDocs[indexCntKeptDocs][j] = Arrays.copyOf(termPositions[j], termPositions[j].length);
}
}
if (mSortedAccumulators.size() == mNumResults + 1) {
a = mSortedAccumulators.poll(); // Re-use the accumulator of the removed document
// After maximum # docs been put into queue, each time a new document is added, an old
// document will be ejected, use the spot freed by the ejected document to store the new
// document positional info in keptDocs
indexCntKeptDocs = a.index_into_keptDocs;
keptDocs[indexCntKeptDocs] = new int[numQueryTerms][];
scoreThreshold = mSortedAccumulators.peek().score;
} else {
a = mAccumulators[mSortedAccumulators.size()]; // Next non-used accumulator in the
// accumulator pool
indexCntKeptDocs++;
}
}
if (mDocSet != null) {
docno = docsetOffset < mDocSet.length ? mDocSet[docsetOffset++] : Integer.MAX_VALUE;
} else {
docno = mMRF.getNextCandidate();
}
}
// Grab the accumulators off the stack, in (reverse) order.
CascadeAccumulator[] results_tmp = new CascadeAccumulator[Math.min(mNumResults,
mSortedAccumulators.size())];
for (int i = 0; i < results_tmp.length; i++) {
results_tmp[results_tmp.length - 1 - i] = mSortedAccumulators.poll();
meanScore += results_tmp[results_tmp.length - 1 - i].score;
}
meanScore /= results_tmp.length;
CascadeAccumulator[] results = results_tmp;
return results;
}
/**
* Returns the Markov Random Field associated with this ranker.
*/
public MarkovRandomField getMRF() {
return mMRF;
}
/**
* Sets the number of results to return.
*/
public void setNumResults(int numResults) {
mNumResults = numResults;
}
private List<DocumentNode> getDocNodes() {
ArrayList<DocumentNode> docNodes = new ArrayList<DocumentNode>();
// Check which of the nodes are DocumentNodes.
List<GraphNode> nodes = mMRF.getNodes();
for (GraphNode node : nodes) {
if (node.getType() == GraphNode.Type.DOCUMENT) {
docNodes.add((DocumentNode) node);
}
}
return docNodes;
}
}