Examples of CosineDistanceMeasure


Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

      clusters.add(cluster);
      value = (Writable) reader.getValueClass()
          .newInstance();
    }
   
    DistanceMeasure measure = new CosineDistanceMeasure();
    double max = 0;
    double min = Double.MAX_VALUE;
    double sum = 0;
    int count = 0;
    for (int i = 0; i < clusters.size(); i++) {
      for (int j = i + 1; j < clusters.size(); j++) {
        double d = measure.distance(clusters.get(i)
            .getCenter(), clusters.get(j).getCenter());
        min = Math.min(d, min);
        max = Math.max(d, max);
        sum += d;
        count++;
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

    Path vectorsFolder = new Path(outputDir, "tfidf-vectors");
    Path centroids = new Path(outputDir, "centroids");
    Path clusterOutput = new Path(outputDir, "clusters");
   
    RandomSeedGenerator.buildRandom(conf, vectorsFolder, centroids, 20,
      new CosineDistanceMeasure());
    KMeansDriver.run(conf, vectorsFolder, centroids, clusterOutput,
      new CosineDistanceMeasure(), 0.01, 20, true, false);
   
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
        new Path(clusterOutput, Cluster.CLUSTERED_POINTS_DIR
                                + "/part-m-00000"), conf);
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

    Path vectorsFolder = new Path(outputDir, "tfidf-vectors");
    Path centroids = new Path(outputDir, "centroids");
    Path clusterOutput = new Path(outputDir, "clusters");
   
    RandomSeedGenerator.buildRandom(conf, vectorsFolder, centroids, 20,
      new CosineDistanceMeasure());
    KMeansDriver.run(conf, vectorsFolder, centroids, clusterOutput,
      new MyDistanceMeasure(), 0.01, 20, true, false);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
        new Path(clusterOutput, Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

    while (reader.next(key, value)) {
      points.add(value.get());
    }
    System.out.println(points.size());
    reader.close();
    List<Canopy> canopies = CanopyClusterer.createCanopies(points, new CosineDistanceMeasure(), 0.8, 0.7);
    List<Cluster> clusters = new ArrayList<Cluster>();
    System.out.println(canopies.size());
    for (Canopy canopy : canopies) {
      clusters.add(new Cluster(canopy.getCenter(), canopy.getId(), new CosineDistanceMeasure()));
    }
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

    List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(points, k);
    List<SoftCluster> clusters = new ArrayList<SoftCluster>();
    System.out.println(randomPoints.size());
    int clusterId = 0;
    for (Vector v : randomPoints) {
      clusters.add(new SoftCluster(v, clusterId++, new CosineDistanceMeasure()));
    }
   
    List<List<SoftCluster>> finalClusters = FuzzyKMeansClusterer.clusterPoints(points, clusters,
      new CosineDistanceMeasure(), 0.01, 3, 10);
    for (SoftCluster cluster : finalClusters.get(finalClusters.size() - 1)) {
      System.out.println("Cluster id: " + cluster.getId() + " center: "
                         + cluster.getCenter().asFormatString());
    }
   
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

      points.add(value.get());
    }
    System.out.println(points.size());
    reader.close();
  
    List<Canopy> canopies = CanopyClusterer.createCanopies(points, new CosineDistanceMeasure(), 0.7, 0.5);
    List<Cluster> clusters = new ArrayList<Cluster>();
    System.out.println(canopies.size());
    for (Canopy canopy : canopies) {
      clusters.add(new Cluster(canopy.getCenter(), canopy.getId(), new CosineDistanceMeasure()));
    }
   
    List<List<Cluster>> finalClusters = KMeansClusterer.clusterPoints(points, clusters,
      new CosineDistanceMeasure(), 10, 0.1);
    for (Cluster cluster : finalClusters.get(finalClusters.size() - 1)) {
      System.out.println("Cluster id: " + cluster.getId() + " center: "
                         + cluster.getCenter().asFormatString());
    }
   
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

    List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(points, k);
    List<Cluster> clusters = new ArrayList<Cluster>();
    System.out.println(randomPoints.size());
    int clusterId = 0;
    for (Vector v : randomPoints) {
      clusters.add(new Cluster(v, clusterId++, new CosineDistanceMeasure()));
    }
   
    List<List<Cluster>> finalClusters = KMeansClusterer.clusterPoints(points, clusters,
      new CosineDistanceMeasure(), 10, 0.01);
    for (Cluster cluster : finalClusters.get(finalClusters.size() - 1)) {
      System.out.println("Cluster id: " + cluster.getId() + " center: "
                         + cluster.getCenter().asFormatString());
    }
   
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

      VectorBenchmarks mark = new VectorBenchmarks(cardinality, sparsity, numVectors, loop, numOps);
      mark.createBenchmark();
      mark.incrementalCreateBenchmark();
      mark.cloneBenchmark();
      mark.dotBenchmark();
      mark.distanceMeasureBenchmark(new CosineDistanceMeasure());
      mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure());
      mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure());
      //mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
      mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure());
     
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

        // be the case. Note, though, that the final clusters with document
        // vectors will be in a different file.
        Path kmeansClusters = new Path(output + "/canopy/clusters-0");

        try {
            CanopyDriver.run(conf, canopyInputPath, canopyOutputPath, new CosineDistanceMeasure(), t1, t2, true, false);
        } catch (Exception e) {
            LOG.error("Failure running mahout canopy.", e);
            return 1;
        }

        // The convergencedelta and maxiterations affect how long kmeans will
        // take to run and how many times we run the algorithm before we give
        // up. The numbers we are using here seem to give reasonably good
        // results.
        try {
            KMeansDriver.run(conf, kmeansInputPath, kmeansClusters, kmeansOutputPath, new CosineDistanceMeasure(), .5, 20, true, false);
        } catch (Exception e) {
            LOG.error("Failure running mahout kmeans.", e);
            return 2;
        }
View Full Code Here

Examples of org.apache.mahout.common.distance.CosineDistanceMeasure

      String vectorPath;
      while((vectorPath = job.get("cluster.results.vector.files" + i)) != null){
        vectorPaths.add(new Path(vectorPath));
        i++;
      }
      measure = new CosineDistanceMeasure();
      t = job.getFloat("cluster.t2.distance", 0.02F);
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.