Package org.apache.mahout.math

Examples of org.apache.mahout.math.DenseMatrix


    pageRank.setConf(conf);
    pageRank.run(new String[] { "--vertices", verticesFile.getAbsolutePath(), "--edges", edgesFile.getAbsolutePath(),
        "--output", outputDir.getAbsolutePath(), "--numIterations", "3", "--stayingProbability", "0.8",
        "--tempDir", tempDir.getAbsolutePath() });

    Matrix expectedAdjacencyMatrix = new DenseMatrix(new double[][] {
        { 0, 1, 1, 1 },
        { 1, 0, 0, 1 },
        { 0, 0, 1, 0 },
        { 0, 1, 1, 0 } });

    int numVertices = HadoopUtil.readInt(new Path(tempDir.getAbsolutePath(), AdjacencyMatrixJob.NUM_VERTICES), conf);
    assertEquals(4, numVertices);
    Matrix actualAdjacencyMatrix = MathHelper.readMatrix(conf, new Path(tempDir.getAbsolutePath(),
        AdjacencyMatrixJob.ADJACENCY_MATRIX + "/part-r-00000"), numVertices, numVertices);

    StringBuilder info = new StringBuilder();
    info.append("\nexpected adjacency matrix\n\n");
    info.append(MathHelper.nice(expectedAdjacencyMatrix));
    info.append("\nactual adjacency matrix \n\n");
    info.append(MathHelper.nice(actualAdjacencyMatrix));
    info.append('\n');
    log.info(info.toString());

    Matrix expectedTransitionMatrix = new DenseMatrix(new double[][] {
        { 0.0,         0.4, 0.0, 0.0 },
        { 0.266666667, 0.0, 0.0, 0.4 },
        { 0.266666667, 0.0, 0.8, 0.4 },
        { 0.266666667, 0.4, 0.0, 0.0 } });
View Full Code Here


    double topicSmoothing = Double.parseDouble(job.get(TOPIC_SMOOTHING_KEY));

    Path dir = new Path(statePath);

    // TODO scalability bottleneck: numWords * numTopics * 8bytes for the driver *and* M/R classes
    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    Arrays.fill(logTotals, Double.NEGATIVE_INFINITY);
    double ll = 0.0;
    if (empty) {
      return new LDAState(numTopics, numWords, topicSmoothing, pWgT, logTotals, ll);
    }
    for (Pair<IntPairWritable,DoubleWritable> record
         : new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(new Path(dir, "part-*"),
                                                                        PathType.GLOB,
                                                                        null,
                                                                        null,
                                                                        true,
                                                                        job)) {
      IntPairWritable key = record.getFirst();
      DoubleWritable value = record.getSecond();
      int topic = key.getFirst();
      int word = key.getSecond();
      if (word == TOPIC_SUM_KEY) {
        logTotals[topic] = value.get();
        Preconditions.checkArgument(!Double.isInfinite(value.get()));
      } else if (topic == LOG_LIKELIHOOD_KEY) {
        ll = value.get();
      } else {
        Preconditions.checkArgument(topic >= 0, "topic should be non-negative, not %d", topic);
        Preconditions.checkArgument(word >= 0, "word should be non-negative not %d", word);
        Preconditions.checkArgument(pWgT.getQuick(topic, word) == 0.0);

        pWgT.setQuick(topic, word, value.get());
        Preconditions.checkArgument(!Double.isInfinite(pWgT.getQuick(topic, word)));
      }
    }

    return new LDAState(numTopics, numWords, topicSmoothing, pWgT, logTotals, ll);
  }
View Full Code Here

  }

  @Override
  public Matrix getDiagonalMatrix() {
    if(diagonalMatrix == null) {
      diagonalMatrix = new DenseMatrix(desiredRank, desiredRank);
    }
    if(diagonalMatrix.get(0, 1) <= 0) {
      try {
        Vector norms = fetchVector(new Path(baseDir, "norms"), 0);
        Vector projections = fetchVector(new Path(baseDir, "projections"), 0);
View Full Code Here

    updatedModel = initialModelCorpusFraction == 0
        ? new TopicModel(numTopics, numTerms, eta, alpha, null, terms, numUpdatingThreads, 1)
        : topicModel;
    updatedModel.setConf(getConf());
    docTopicCounts = new DenseMatrix(numDocuments, numTopics);
    docTopicCounts.assign(1.0/numTopics);
    modelTrainer = new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms);
  }
View Full Code Here

      super.setup(context);
      FileSystem fs = FileSystem.get(context.getConfiguration());
      Path uHatPath = new Path(context.getConfiguration().get(PROP_UHAT_PATH));
      Path sigmaPath = new Path(context.getConfiguration().get(PROP_SIGMA_PATH));

      uHat = new DenseMatrix(SSVDSolver.loadDistributedRowMatrix(fs,
          uHatPath, context.getConfiguration()));
      // since uHat is (k+p) x (k+p)
      kp = uHat.columnSize();
      k = context.getConfiguration().getInt(PROP_K, kp);
      uRow = new DenseVector(k);
View Full Code Here

    this(loadModel(conf, modelpath), eta, alpha, dictionary, numThreads, modelWeight);
  }

  public TopicModel(int numTopics, int numTerms, double eta, double alpha, String[] dictionary,
      int numThreads, double modelWeight) {
    this(new DenseMatrix(numTopics, numTerms), new DenseVector(numTopics), eta, alpha, dictionary,
        numThreads, modelWeight);
  }
View Full Code Here

  public Vector topicSums() {
    return topicSums;
  }

  private static Pair<Matrix,Vector> randomMatrix(int numTopics, int numTerms, Random random) {
    Matrix topicTermCounts = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    if(random != null) {
      for(int x = 0; x < numTopics; x++) {
        for(int term = 0; term < numTerms; term++) {
          topicTermCounts.viewRow(x).set(term, random.nextDouble());
        }
      }
    }
    for(int x = 0; x < numTopics; x++) {
      topicSums.set(x, random == null ? 1.0 : topicTermCounts.viewRow(x).norm(1));
    }
    return Pair.of(topicTermCounts, topicSums);
  }
View Full Code Here

    }
    if(rows.isEmpty()) {
      throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
    }
    numTopics++;
    Matrix model = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    for(Pair<Integer, Vector> pair : rows) {
      model.viewRow(pair.getFirst()).assign(pair.getSecond());
      topicSums.set(pair.getFirst(), pair.getSecond().norm(1));
    }
    return Pair.of(model, topicSums);
  }
View Full Code Here

      }
    });
  }

  public static Matrix randomStructuredModel(int numTopics, int numTerms, DoubleFunction decay) {
    Matrix model = new DenseMatrix(numTopics, numTerms);
    int width = numTerms / numTopics;
    for(int topic = 0; topic < numTopics; topic++) {
      int topicCentroid = width * (1+topic);
      for(int i = 0; i < numTerms; i++) {
        int distance = Math.abs(topicCentroid - i);
        if(distance > numTerms / 2) {
          distance = numTerms - distance;
        }
        double v = decay.apply(distance);
        model.set(topic, i, v);
      }
    }
    return model;
  }
View Full Code Here

    return v;
  }

  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics,numWords);
    double[] logTotals = new double[numTopics];
    for(int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for(int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k,w,Math.log(pseudocount));
      }

      logTotals[k] = Math.log(total);
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.DenseMatrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.