Package org.apache.mahout.math.hadoop

Examples of org.apache.mahout.math.hadoop.DistributedRowMatrix.configure()


    Path seqFiles = new Path(output, "seqfiles-" + (System.nanoTime() & 0xFF));
    runJob(input, seqFiles, dimensions, dimensions);
    DistributedRowMatrix A = new DistributedRowMatrix(seqFiles,
        new Path(seqFiles, "seqtmp-" + (System.nanoTime() & 0xFF)),
        dimensions, dimensions);
    A.configure(new JobConf());
    return A;
  }
}
View Full Code Here


    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);

    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    JobConf conf = new JobConf(config);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);
View Full Code Here

    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    JobConf conf = new JobConf(config);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);

    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
View Full Code Here

    JobConf conf = new JobConf(config);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);

    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
    // now run the KMeans job
    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);
View Full Code Here

    new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5, 0.0, true, conf);
    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);

    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);
View Full Code Here

    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);

    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
View Full Code Here

    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);

    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
    // now run the KMeans job
    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);
View Full Code Here

    if (eigenInput != null && eigensToVerify == null) {
      prepareEigens(config, eigenInput, inMemory);
    }
    DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tempOut, 1, 1);
    c.configure(config);
    corpus = c;

    // set up eigenverifier and orthoverifier TODO: allow multithreaded execution

    eigenVerifier = new SimpleEigenVerifier();
View Full Code Here

    return eigenMetaData;
  }

  private void prepareEigens(JobConf conf, Path eigenInput, boolean inMemory) {
    DistributedRowMatrix eigens = new DistributedRowMatrix(eigenInput, tmpOut, 1, 1);
    eigens.configure(conf);
    if (inMemory) {
      List<Vector> eigenVectors = new ArrayList<Vector>();
      for (MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }
View Full Code Here

    if (eigenInput != null && eigensToVerify == null) {
      prepareEigens(new JobConf(conf), eigenInput, inMemory);
    }

    DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1, 1);
    c.configure(new JobConf(conf));
    corpus = c;

    eigenVerifier = new SimpleEigenVerifier();
    //OrthonormalityVerifier orthoVerifier = new OrthonormalityVerifier();
    VectorIterable pairwiseInnerProducts = computePairwiseInnerProducts();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.