Examples of org.apache.mahout.math.hadoop.DistributedRowMatrix.configure()

Class org.apache.mahout.math.hadoop.DistributedRowMatrix

Examples of org.apache.mahout.math.hadoop.DistributedRowMatrix.configure()

org.apache.mahout.math.hadoop.DistributedRowMatrix.configure()

    Path seqFiles = new Path(output, "seqfiles-" + (System.nanoTime() & 0xFF));
    runJob(input, seqFiles, dimensions, dimensions);
    DistributedRowMatrix A = new DistributedRowMatrix(seqFiles, 
        new Path(seqFiles, "seqtmp-" + (System.nanoTime() & 0xFF)), 
        dimensions, dimensions);
    A.configure(new JobConf());
    return A;
  }
}

View Full Code Here

    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);


    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    JobConf conf = new JobConf(config);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);

View Full Code Here

    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    JobConf conf = new JobConf(config);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);


    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);

View Full Code Here

    JobConf conf = new JobConf(config);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);


    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
    // now run the KMeans job
    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);

View Full Code Here

    new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5, 0.0, true, conf);
    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);


    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);

View Full Code Here


    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);


    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);

View Full Code Here

    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);


    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
    // now run the KMeans job
    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);

View Full Code Here


    if (eigenInput != null && eigensToVerify == null) {
      prepareEigens(config, eigenInput, inMemory);
    }
    DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tempOut, 1, 1);
    c.configure(config);
    corpus = c;


    // set up eigenverifier and orthoverifier TODO: allow multithreaded execution


    eigenVerifier = new SimpleEigenVerifier();

View Full Code Here

    return eigenMetaData;
  }


  private void prepareEigens(JobConf conf, Path eigenInput, boolean inMemory) {
    DistributedRowMatrix eigens = new DistributedRowMatrix(eigenInput, tmpOut, 1, 1);
    eigens.configure(conf);
    if (inMemory) {
      List<Vector> eigenVectors = new ArrayList<Vector>();
      for (MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }

View Full Code Here

    if (eigenInput != null && eigensToVerify == null) {
      prepareEigens(new JobConf(conf), eigenInput, inMemory);
    }


    DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1, 1);
    c.configure(new JobConf(conf));
    corpus = c;


    eigenVerifier = new SimpleEigenVerifier();
    //OrthonormalityVerifier orthoVerifier = new OrthonormalityVerifier();
    VectorIterable pairwiseInnerProducts = computePairwiseInnerProducts();

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.