DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
svdT.configure(conf);
DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
a.configure(conf);
DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
sData.configure(conf);
// now run the Canopy job to prime kMeans canopies
CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
// now run the KMeans job
KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);