Package edu.gslis.ttg.clusters.clusterers

Source Code of edu.gslis.ttg.clusters.clusterers.SimpleJaccardClusterer

package edu.gslis.ttg.clusters.clusterers;

import java.util.Iterator;
import java.util.List;
import java.util.NavigableMap;

import cc.twittertools.thrift.gen.TResult;
import edu.gslis.ttg.clusters.Clusters;
import edu.gslis.ttg.jaccard.JaccardStore;

public class SimpleJaccardClusterer {
 
  private List<TResult> results;
  private JaccardStore jaccardScores;
 
  public SimpleJaccardClusterer(List<TResult> results) {
    this.results = results;
    this.jaccardScores = computeJaccardSimilarity();
  }
 
  public Clusters cluster(double threshold) {
    Clusters clusters = new Clusters();
   
    NavigableMap<Double, List<long[]>> thresholdPairs = jaccardScores.getDocsGreaterThanScore(threshold);
    Iterator<Double> pairsIt = thresholdPairs.keySet().iterator();
    while (pairsIt.hasNext()) { // for each pair of documents matching this jaccard score
      List<long[]> docPairs = thresholdPairs.get(pairsIt.next());
      Iterator<long[]> docPairIt = docPairs.iterator();
      while (docPairIt.hasNext()) { //
        long[] docs = docPairIt.next();
        clusters.mergeMembers(docs[0], docs[1]);
      }
    }
   
    return clusters;
  }
 
  public List<TResult> getResults() {
    return results;
  }

  public void setResults(List<TResult> results) {
    this.results = results;
  }
 
  private JaccardStore computeJaccardSimilarity() { 
    // compute jaccard similarity for each pair of results
    JaccardStore scores = new JaccardStore();
    for (int j = 0; j < results.size(); j++) {
      TResult doc1 = results.get(j);
      for (int k = j + 1; k < results.size(); k++) {
        TResult doc2 = results.get(k);
       
        double jaccardSim = JaccardStore.computeJaccardSimilarity(doc1.getText(), doc2.getText());
        scores.setScore(doc1.getId(), doc2.getId(), jaccardSim);
      }
    }
   
    return scores;
  }

}
TOP

Related Classes of edu.gslis.ttg.clusters.clusterers.SimpleJaccardClusterer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.