Examples of org.apache.mahout.common.distance.DistanceMeasure.distance()

Package org.apache.mahout.common.distance

Class org.apache.mahout.common.distance.DistanceMeasure

Examples of org.apache.mahout.common.distance.DistanceMeasure.distance()

org.apache.mahout.common.distance.DistanceMeasure.distance()
Returns the distance metric applied to the arguments @param v1 a Vector defining a multidimensional point in some feature space @param v2 a Vector defining a multidimensional point in some feature space @return a scalar doubles of the distance

    double min = Double.MAX_VALUE;
    double sum = 0;
    int count = 0;
    for (int i = 0; i < clusters.size(); i++) {
      for (int j = i + 1; j < clusters.size(); j++) {
        double d = measure.distance(clusters.get(i)
            .getCenter(), clusters.get(j).getCenter());
        min = Math.min(d, min);
        max = Math.max(d, max);
        sum += d;
        count++;

View Full Code Here

    // Given the centroid, we can compute \Delta_1^2(X), the total squared distance for the datapoints
    // this accelerates seed selection.
    double radius = 0;
    DistanceMeasure l2 = new SquaredEuclideanDistanceMeasure();
    for (WeightedVector row : datapoints) {
      radius += l2.distance(row, center);
    }


    // Find the first seed c_1 (and conceptually the second, c_2) as might be done in the 2-means clustering so that
    // the probability of selecting c_1 and c_2 is proportional to || c_1 - c_2 ||^2.  This is done
    // by first selecting c_1 with probability:

View Full Code Here

    // Multinomial distribution of vector indices for the selection seeds. These correspond to
    // the indices of the vectors in the original datapoints list.
    Multinomial<Integer> seedSelector = new Multinomial<Integer>();
    for (int i = 0; i < datapoints.size(); ++i) {
      double selectionProbability =
          radius + datapoints.size() * l2.distance(datapoints.get(i), center);
      seedSelector.add(i, selectionProbability);
    }


    Centroid c_1 = new Centroid((WeightedVector)datapoints.get(seedSelector.sample()).clone());
    c_1.setIndex(0);

View Full Code Here

    c_1.setIndex(0);
    // Construct a set of weighted things which can be used for random selection.  Initial weights are
    // set to the squared distance from c_1
    for (int i = 0; i < datapoints.size(); ++i) {
      WeightedVector row = datapoints.get(i);
      final double w = l2.distance(c_1, row) * row.getWeight();
      seedSelector.set(i, w);
    }


    // From here, seeds are selected with probablity proportional to:
    //

View Full Code Here

      // Don't select this one again.
      seedSelector.set(seedIndex, 0);
      // Re-weight everything according to the minimum distance to a seed.
      for (int currSeedIndex : seedSelector) {
        WeightedVector curr = datapoints.get(currSeedIndex);
        double newWeight = nextSeed.getWeight() * l2.distance(nextSeed, curr);
        if (newWeight < seedSelector.getWeight(currSeedIndex)) {
          seedSelector.set(currSeedIndex, newWeight);
        }
      }
    }

View Full Code Here

      // proportional distance threshold for points that should be involved in calculating the
      // centroid.
      closestClusterDistances.clear();
      for (Vector center : centroids) {
        Vector closestOtherCluster = centroids.search(center, 2).get(1).getValue();
        closestClusterDistances.add(l2.distance(center, closestOtherCluster));
      }


      // Copies the current cluster centroids to newClusters and sets their weights to 0. This is
      // so we calculate the new centroids as we go through the datapoints.
      List<Centroid> newCentroids = Lists.newArrayList();

View Full Code Here

    FileWriter writer = new FileWriter(affinities.toString());
    PrintWriter out = new PrintWriter(writer);
    try {
      for (int i = 0; i < SAMPLE_DATA.size(); i++) {
        for (int j = 0; j < SAMPLE_DATA.size(); j++) {
          out.println(i + "," + j + "," + measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()));
        }
      }
    } finally {
      out.close();
    }

View Full Code Here

    Writer writer = null;
    try {
      writer = Files.newWriter(new File(affinities.toString()), Charsets.UTF_8);
      for (int i = 0; i < SAMPLE_DATA.size(); i++) {
        for (int j = 0; j < SAMPLE_DATA.size(); j++) {
          writer.write(i + "," + j + ',' + measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()) + '\n');
        }
      }
    } finally {
      Closeables.close(writer, false);
    }

View Full Code Here

    // Given the centroid, we can compute \Delta_1^2(X), the total squared distance for the datapoints
    // this accelerates seed selection.
    double deltaX = 0;
    DistanceMeasure distanceMeasure = centroids.getDistanceMeasure();
    for (WeightedVector row : datapoints) {
      deltaX += distanceMeasure.distance(row, center);
    }


    // Find the first seed c_1 (and conceptually the second, c_2) as might be done in the 2-means clustering so that
    // the probability of selecting c_1 and c_2 is proportional to || c_1 - c_2 ||^2.  This is done
    // by first selecting c_1 with probability:

View Full Code Here

    // Multinomial distribution of vector indices for the selection seeds. These correspond to
    // the indices of the vectors in the original datapoints list.
    Multinomial<Integer> seedSelector = new Multinomial<Integer>();
    for (int i = 0; i < datapoints.size(); ++i) {
      double selectionProbability =
          deltaX + datapoints.size() * distanceMeasure.distance(datapoints.get(i), center);
      seedSelector.add(i, selectionProbability);
    }


    int selected = random.nextInt(datapoints.size());
    Centroid c_1 = new Centroid(datapoints.get(selected).clone());

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.