Package edu.cmu.graphchi.util

Source Code of edu.cmu.graphchi.util.Toplist

package edu.cmu.graphchi.util;

import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.vertexdata.ForeachCallback;
import edu.cmu.graphchi.vertexdata.VertexAggregator;
import edu.cmu.graphchi.datablocks.FloatConverter;
import edu.cmu.graphchi.datablocks.IntConverter;
import edu.cmu.graphchi.engine.VertexInterval;
import edu.cmu.graphchi.preprocessing.VertexIdTranslate;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.TreeSet;

/**
* From float vertex data, prints a top K listing. For example, used
* to get top K highest ranked vertices in PageRank.
*/
public class Toplist {

    /**
     * Returns a sorted list of top topN vertices having float values.
     * @param baseFilename input-graph
     * @param numVertices number of vertices in the graph (use engine.numVertices())
     * @param topN how many top results to include
     * @return
     * @throws IOException
     */
    public static TreeSet<IdFloat> topListFloat(String baseFilename, int numVertices, final int topN) throws IOException{
        final TreeSet<IdFloat> topList = new TreeSet<IdFloat>(new IdFloat.Comparator());
        VertexAggregator.foreach(numVertices, baseFilename, new FloatConverter(), new ForeachCallback<Float>()  {
            IdFloat least;
            public void callback(int vertexId, Float vertexValue) {
                if (topList.size() < topN) {
                    topList.add(new IdFloat(vertexId, vertexValue));
                    least = topList.last();
                } else {
                    if (vertexValue > least.value) {
                        topList.remove(least);
                        topList.add(new IdFloat(vertexId, vertexValue));
                        least = topList.last();
                    }
                }
            }
        }

        );
        return topList;
    }

    /**
     * Returns a sorted list of top topN vertices having integer values.
     * @param baseFilename input-graph
     * @param numVertices number of vertices in the graph (use engine.numVertices())
     * @param topN how many top results to include
     * @return
     * @throws IOException
     */
    public static TreeSet<IdInt> topListInt(String baseFilename, int numVertices, final int topN) throws IOException{
        final TreeSet<IdInt> topList = new TreeSet<IdInt>(new Comparator<IdInt>() {
            public int compare(IdInt a, IdInt b) {
                if (a.vertexId == b.vertexId) return 0;
                return  (a.value > b.value ? -1 : (a.value == b.value ? (a.vertexId < b.vertexId ? -1 : 1) : 1)); // Descending order
            }
        });
        VertexAggregator.foreach(numVertices, baseFilename, new IntConverter(), new ForeachCallback<Integer>()  {
            IdInt least;
            public void callback(int vertexId, Integer vertexValue) {
                if (topList.size() < topN) {
                    topList.add(new IdInt(vertexId, vertexValue));
                    least = topList.last();
                } else {
                    if (vertexValue > least.value) {
                        topList.remove(least);
                        topList.add(new IdInt(vertexId, vertexValue));
                        least = topList.last();
                    }
                }
            }
        }

        );
        return topList;
    }
    /**
     * Returns a sorted list of top topN vertices having float values stored
     * in-memory in the HugeFloatMatrix.
     * @param floatMatrix vertex-values
     * @param column column to use
     * @param topN how many top results to include
     * @return
     * @throws IOException
     */
    public static TreeSet<IdFloat> topList(final HugeFloatMatrix floatMatrix, final int column, final int topN) {
        final TreeSet<IdFloat> topList = new TreeSet<IdFloat>(new Comparator<IdFloat>() {
            public int compare(IdFloat idFloat, IdFloat idFloat1) {
                if (idFloat.vertexId == idFloat1.vertexId) return 0;
                int comp =  -Float.compare(idFloat.value, idFloat1.value); // Descending order
                return (comp != 0 ? comp : (idFloat.vertexId < idFloat1.vertexId ? 1 : -1));
            }
        });
        IdFloat least = null;
        int n = (int) floatMatrix.getNumRows();
        for(int vertexId=0; vertexId < n; vertexId++) {
            float vertexValue = floatMatrix.getValue(vertexId, column);
            if (topList.size() < topN) {
                topList.add(new IdFloat(vertexId, vertexValue));
                least = topList.last();
            } else {
                if (vertexValue > least.value) {
                    topList.remove(least);
                    topList.add(new IdFloat(vertexId, vertexValue));
                    least = topList.last();
                }
            }
        }

        return topList;
    }


    public static void main(String[] args) throws Exception {
        String baseFilename = args[0];
        int nShards = Integer.parseInt(args[1]);

        ArrayList<VertexInterval> intervals = ChiFilenames.loadIntervals(baseFilename, nShards);
        TreeSet<IdFloat> top20 = Toplist.topListFloat(baseFilename, intervals.get(intervals.size() - 1).getLastVertex(), 20);
        VertexIdTranslate trans = VertexIdTranslate.fromFile(new File(ChiFilenames.getVertexTranslateDefFile(baseFilename, nShards)));
        System.out.println("Result: " + top20);
        int i = 0;
        for(IdFloat vertexRank : top20) {
            System.out.println(++i + ": " + trans.backward(vertexRank.getVertexId()) + " = " + vertexRank.getValue());
        }
    }
}
TOP

Related Classes of edu.cmu.graphchi.util.Toplist

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.