Package edu.cmu.graphchi.apps.recommendations

Source Code of edu.cmu.graphchi.apps.recommendations.CircleOfTrustSalsa$SalsaVertex

package edu.cmu.graphchi.apps.recommendations;

import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.preprocessing.VertexIdTranslate;
import edu.cmu.graphchi.queries.VertexQuery;

import java.io.*;
import java.util.*;
import java.util.logging.Logger;

/**
* Emulates Twitter's Who-To-Follow (WTF) algorithm's SALSA part as described in WWW'13 paper
* WTF: The Who to Follow Service at Twitter: http://www.stanford.edu/~rezab/papers/wtf_overview.pdf
*
* This demonstration loads the followers
* of the "circle of trust" (top visited vertices in egocentric random walk) directly
* from the shards using the edu.cmu.graphchi.queries.VertexQuery class.  Then SALSA
* algorithm is run so that the "circle of trust" is on the left as "hubs" and their followers
* on the right as "authorities".
*
* Circle of trust must be given externally to this class. <b>Note:</b> work in progress.
* @author Aapo Kyrola
*/

// TODO: Make multithreaded / thread-safe.
public class CircleOfTrustSalsa {

    private static final Logger logger = ChiLogger.getLogger("circle-of-trust");


    static class SalsaVertex {
        int id;
        int degree = 0;
        SalsaVertex(int id) {
            this.id = id;
        }
        double value = 1.0;
        ArrayList<Integer> neighbors;
    }

    private VertexQuery queryService;

    // Neighbor list
    private HashMap<Integer, SalsaVertex> hubs;
    private HashMap<Integer, SalsaVertex> authorities;

    // Static cache
    static private Map<Integer, ArrayList<Integer>> cache;
    private String graphName;

    private static final int FILTER_LIMIT = 4;


    // NOTE: non-thread safe
    public CircleOfTrustSalsa(VertexQuery queryService, final int cacheSize) throws Exception {
        this.queryService = queryService;

        synchronized (CircleOfTrustSalsa.class) {
            if (cache == null) {
                cache = Collections.synchronizedMap(new LinkedHashMap<Integer, ArrayList<Integer>>(cacheSize, 1.0f, true) // LRU
                {
                    @Override
                    protected boolean removeEldestEntry(Map.Entry<Integer, ArrayList<Integer>> integerArrayListEntry) {
                        return this.size() > cacheSize;
                    }
                });
            }
        }
    }

    public void initializeGraph(Collection<Integer> circleOfTrust) {
        hubs = new HashMap<Integer, SalsaVertex>(circleOfTrust.size(), 1.0f);
        long t = System.currentTimeMillis();

        int totalNeighbors = 0;

        int cacheHits = 0;
        HashSet<Integer> querySet = new HashSet<Integer>(circleOfTrust.size());
        for(int v : circleOfTrust) {
            hubs.put(v, new SalsaVertex(v));
            if (cache.containsKey(v)) {
                SalsaVertex hub = hubs.get(v);
                hub.neighbors = cache.get(v);
                hub.degree = hub.neighbors.size();
                cacheHits++;
            } else {
                querySet.add(v);
            }
        }


        /* Load neighbors of the circle of trust -- we probably would need some limitation here?? */
        HashMap<Integer, ArrayList<Integer>> hubNeighbors = queryService.queryOutNeighbors(querySet);
        long queryTime = System.currentTimeMillis() - t;

        /* Initialize salsa */
        t = System.currentTimeMillis();

        for(Map.Entry<Integer, ArrayList<Integer>> entry: hubNeighbors.entrySet()) {
            int hubId = entry.getKey();
            SalsaVertex hub = hubs.get(hubId);
            hub.neighbors = entry.getValue();
            hub.degree = entry.getValue().size();
            cache.put(hubId, entry.getValue());
        }

        // Count total neighbors
        for(SalsaVertex hub : hubs.values()) {
            totalNeighbors += hub.neighbors.size();
        }

        long salsaInitTime0 = System.currentTimeMillis() - t;

        // We do not add neighbors to authorities -- we can push values to authorities
        // and pull to hubs.

        int[] authEntries = new int[totalNeighbors];

        int j = 0;
        for(SalsaVertex hub : hubs.values()) {
            for(int authId : hub.neighbors) {
                authEntries[j++] = authId;
            }
        }
        assert(j == authEntries.length);

        // Create map efficiently
        Arrays.sort(authEntries);

        int lastId = -1;
        int count = 0;
    //    int filtered = 0;

        ArrayList<SalsaVertex> tmpAuth = new ArrayList<SalsaVertex>(1 + authEntries.length / 100);
        for(int i=0; i < authEntries.length; i++) {
            int authId = authEntries[i];
            if (lastId != authId) {
                if (lastId >= 0) {
                    if (count > FILTER_LIMIT) {
                        SalsaVertex auth = new SalsaVertex(lastId);
                        auth.degree = count;
                        tmpAuth.add(auth);
                    } else {
                      //  filtered++;
                    }
                    count = 0;

                }
                lastId  = authId;
            }
            count++;

        }

        authorities = new HashMap<Integer, SalsaVertex>(tmpAuth.size());
        for(SalsaVertex auth : tmpAuth) {
            authorities.put(auth.id, auth);
        }

        // NOTE: remove neighbors!
            /*
        long salsaInitTime = System.currentTimeMillis() - t;
        logger.info("Query took: " + queryTime + " ms, circle of trust size=" + circleOfTrust.size() + ", cache size=" +
                cache.size() + ", hits=" + cacheHits);
        logger.info("Salsa init: " + salsaInitTime + " ms, first phase=" + salsaInitTime0 + " ms, hubs="
                + hubs.size() + ", auths=" + authorities.size());
        logger.info("Filtered: " + filtered);            */
    }

    /**
     * Compute SALSA on graph initialized in initializeGraph() method.
     * @param nIterations
     */
    public void computeSALSA(int nIterations) {
        for(int iter=0; iter < nIterations; iter++) {
            // Hubs: sum of authority-neighbors values divided by their degree
            for(SalsaVertex hub: hubs.values()) {
                double nbSum = 0.0;
                // Update the degree because not all authorities were selected
                int degree = 0;
                for(int authId : hub.neighbors) {
                    SalsaVertex auth = authorities.get(authId);
                    if (auth != null) {
                        nbSum += auth.value / auth.degree;
                        degree++;
                    }
                }
                hub.value = nbSum;
                hub.degree = degree;
            }

            // Authorities: push from authority side.
            // First: set values to zero
            for(SalsaVertex auth: authorities.values()) {
                auth.value = 0;
            }

            // Then, push hubs values to their auths
            for(SalsaVertex hub: hubs.values()) {
                double myContribution = hub.value / hub.degree;
                for(int authId : hub.neighbors) {
                    SalsaVertex auth = authorities.get(authId);
                    if (auth != null) {
                        auth.value += myContribution;
                    }
                }
            }
        }
    }

    public VertexQuery getQueryService() {
        return queryService;
    }

    /**
     * Return top K authorities (result from SALSA), but do not include users in the removeList
     * @param K
     * @param removeList
     * @return
     */
    public ArrayList<SalsaVertex> topAuthorities(int K, HashSet<Integer> removeList) {
        // TODO: faster top-K implementation
        ArrayList<SalsaVertex> all = new ArrayList<SalsaVertex>(authorities.size());
        all.addAll(authorities.values());
        Collections.sort(all, new Comparator<SalsaVertex>() {
            @Override
            public int compare(SalsaVertex salsaVertex, SalsaVertex salsaVertex1) {
                if (salsaVertex.value < salsaVertex1.value) return 1;
                else return (salsaVertex.value > salsaVertex1.value ? -1 : 0);
            }
        });

        ArrayList<SalsaVertex> result = new ArrayList<SalsaVertex>(K);
        int i = 0;
        while(result.size() < K) {
            if (i < all.size()) {
                SalsaVertex x = all.get(i);
                if (!removeList.contains(x.id))
                    result.add(x);
            } else {
                break;
            }
            i++;
        }
        return result;
    }

    public String namify(Integer value) throws IOException {
        File f = new File(graphName + "_names.dat");
        if (!f.exists()) {
            System.out.println("didn't find name file: " + f.getPath());
            return value+"";
        }
        int i = value * 16;
        RandomAccessFile raf = new RandomAccessFile(f.getAbsolutePath(), "r");
        raf.seek(i);
        byte[] tmp = new byte[16];
        raf.read(tmp);
        raf.close();
        return new String(tmp) + "(" + value + ")";
    }

    public static void main(String[] args) throws  Exception {
        String graphName = args[0];
        int nShards = Integer.parseInt(args[1]);

        CircleOfTrustSalsa csalsa = new CircleOfTrustSalsa(new VertexQuery(graphName, nShards), 10000);

        VertexIdTranslate vertexTrans = VertexIdTranslate.fromFile(new File(ChiFilenames.getVertexTranslateDefFile(graphName, nShards)));

        BufferedReader cmdIn = new BufferedReader(new InputStreamReader(System.in));
        while(true) {
            System.out.print("Enter vertex id to query >> :: ");
            String ln = cmdIn.readLine();
            int vertex = Integer.parseInt(ln);

            // Circle of trust is just the vertex's followers for now
            HashSet<Integer> circle = csalsa.queryService.queryOutNeighbors(vertexTrans.forward(vertex));

            int maxCircleSize = 300;
            // max 500
            if (circle.size() > maxCircleSize) {
                int[] all = new int[circle.size()];
                int i = 0;
                for(Integer v : circle) all[i++] = v;
                HashSet<Integer> filteredCircle = new HashSet<Integer>();
                Random r  = new Random(260379);
                for(i=0; i < maxCircleSize; i++) filteredCircle.add(all[Math.abs(r.nextInt()) % all.length]);
                circle = filteredCircle;
            }

            csalsa.initializeGraph(circle);

            long t = System.currentTimeMillis();
            csalsa.computeSALSA(3);
            logger.info("SALSA computation took " + (System.currentTimeMillis() - t) + "ms");

            circle.add(vertexTrans.forward(vertex));
            ArrayList<SalsaVertex> top = csalsa.topAuthorities(20, circle);
            int j = 1;
            for(SalsaVertex sv : top) {
                int originalId = vertexTrans.backward(sv.id);
                logger.info("Top " + (j++) + " = " + originalId + " " + csalsa.namify(originalId) + " (" + sv.value + ")");
            }

        }
    }

}
TOP

Related Classes of edu.cmu.graphchi.apps.recommendations.CircleOfTrustSalsa$SalsaVertex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.