package edu.cmu.graphchi.util;
import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.vertexdata.ForeachCallback;
import edu.cmu.graphchi.vertexdata.VertexAggregator;
import edu.cmu.graphchi.datablocks.FloatConverter;
import edu.cmu.graphchi.datablocks.IntConverter;
import edu.cmu.graphchi.engine.VertexInterval;
import edu.cmu.graphchi.preprocessing.VertexIdTranslate;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.TreeSet;
/**
* From float vertex data, prints a top K listing. For example, used
* to get top K highest ranked vertices in PageRank.
*/
public class Toplist {
/**
* Returns a sorted list of top topN vertices having float values.
* @param baseFilename input-graph
* @param numVertices number of vertices in the graph (use engine.numVertices())
* @param topN how many top results to include
* @return
* @throws IOException
*/
public static TreeSet<IdFloat> topListFloat(String baseFilename, int numVertices, final int topN) throws IOException{
final TreeSet<IdFloat> topList = new TreeSet<IdFloat>(new IdFloat.Comparator());
VertexAggregator.foreach(numVertices, baseFilename, new FloatConverter(), new ForeachCallback<Float>() {
IdFloat least;
public void callback(int vertexId, Float vertexValue) {
if (topList.size() < topN) {
topList.add(new IdFloat(vertexId, vertexValue));
least = topList.last();
} else {
if (vertexValue > least.value) {
topList.remove(least);
topList.add(new IdFloat(vertexId, vertexValue));
least = topList.last();
}
}
}
}
);
return topList;
}
/**
* Returns a sorted list of top topN vertices having integer values.
* @param baseFilename input-graph
* @param numVertices number of vertices in the graph (use engine.numVertices())
* @param topN how many top results to include
* @return
* @throws IOException
*/
public static TreeSet<IdInt> topListInt(String baseFilename, int numVertices, final int topN) throws IOException{
final TreeSet<IdInt> topList = new TreeSet<IdInt>(new Comparator<IdInt>() {
public int compare(IdInt a, IdInt b) {
if (a.vertexId == b.vertexId) return 0;
return (a.value > b.value ? -1 : (a.value == b.value ? (a.vertexId < b.vertexId ? -1 : 1) : 1)); // Descending order
}
});
VertexAggregator.foreach(numVertices, baseFilename, new IntConverter(), new ForeachCallback<Integer>() {
IdInt least;
public void callback(int vertexId, Integer vertexValue) {
if (topList.size() < topN) {
topList.add(new IdInt(vertexId, vertexValue));
least = topList.last();
} else {
if (vertexValue > least.value) {
topList.remove(least);
topList.add(new IdInt(vertexId, vertexValue));
least = topList.last();
}
}
}
}
);
return topList;
}
/**
* Returns a sorted list of top topN vertices having float values stored
* in-memory in the HugeFloatMatrix.
* @param floatMatrix vertex-values
* @param column column to use
* @param topN how many top results to include
* @return
* @throws IOException
*/
public static TreeSet<IdFloat> topList(final HugeFloatMatrix floatMatrix, final int column, final int topN) {
final TreeSet<IdFloat> topList = new TreeSet<IdFloat>(new Comparator<IdFloat>() {
public int compare(IdFloat idFloat, IdFloat idFloat1) {
if (idFloat.vertexId == idFloat1.vertexId) return 0;
int comp = -Float.compare(idFloat.value, idFloat1.value); // Descending order
return (comp != 0 ? comp : (idFloat.vertexId < idFloat1.vertexId ? 1 : -1));
}
});
IdFloat least = null;
int n = (int) floatMatrix.getNumRows();
for(int vertexId=0; vertexId < n; vertexId++) {
float vertexValue = floatMatrix.getValue(vertexId, column);
if (topList.size() < topN) {
topList.add(new IdFloat(vertexId, vertexValue));
least = topList.last();
} else {
if (vertexValue > least.value) {
topList.remove(least);
topList.add(new IdFloat(vertexId, vertexValue));
least = topList.last();
}
}
}
return topList;
}
public static void main(String[] args) throws Exception {
String baseFilename = args[0];
int nShards = Integer.parseInt(args[1]);
ArrayList<VertexInterval> intervals = ChiFilenames.loadIntervals(baseFilename, nShards);
TreeSet<IdFloat> top20 = Toplist.topListFloat(baseFilename, intervals.get(intervals.size() - 1).getLastVertex(), 20);
VertexIdTranslate trans = VertexIdTranslate.fromFile(new File(ChiFilenames.getVertexTranslateDefFile(baseFilename, nShards)));
System.out.println("Result: " + top20);
int i = 0;
for(IdFloat vertexRank : top20) {
System.out.println(++i + ": " + trans.backward(vertexRank.getVertexId()) + " = " + vertexRank.getValue());
}
}
}