Package org.apache.mahout.classifier.df.data

Examples of org.apache.mahout.classifier.df.data.Instance


    FullRunningAverage totalRa = new FullRunningAverage();
    double totalSk = 0.0;

    for (int i = 0; i < data.size(); i++) {
      // computes the variance
      Instance instance = data.get(i);
      int value = (int) instance.get(attr);
      double xk = data.getDataset().getLabel(instance);
      if (ra[value].getCount() == 0) {
        ra[value].addDatum(xk);
        sk[value] = 0.0;
      } else {
View Full Code Here


        String line = scanner.nextLine();
        if (line.isEmpty()) {
          continue; // skip empty lines
        }

        Instance instance = converter.convert(line);
        double prediction = forest.classify(dataset, rng, instance);

        if (ofile != null) {
          ofile.writeChars(Double.toString(prediction)); // write the prediction
          ofile.writeChar('\n');
View Full Code Here

    double totalSum = 0;
    double totalSumSquared = 0;

    // sum and sum of squares
    for (int i = 0; i < data.size(); i++) {
      Instance instance = data.get(i);
      int value = (int) instance.get(attr);
      double label = data.getDataset().getLabel(instance);
      double square = label * label;

      sums[value] += label;
      sumSquared[value] += square;
View Full Code Here

    Dataset dataset = data.getDataset();

    // compute frequencies
    for (int index = 0; index < data.size(); index++) {
      Instance instance = data.get(index);
      counts[ArrayUtils.indexOf(values, instance.get(attr))][(int) dataset.getLabel(instance)]++;
      countAll[(int) dataset.getLabel(instance)]++;
    }

    int size = data.size();
    double hy = entropy(countAll, size); // H(Y)
View Full Code Here

  void computeFrequencies(Data data, int attr, double[] values) {
    Dataset dataset = data.getDataset();

    for (int index = 0; index < data.size(); index++) {
      Instance instance = data.get(index);
      counts[ArrayUtils.indexOf(values, instance.get(attr))][(int) dataset.getLabel(instance)]++;
      countAll[(int) dataset.getLabel(instance)]++;
    }
  }
View Full Code Here

        first = false;
      }

      String line = value.toString();
      if (!line.isEmpty()) {
        Instance instance = converter.convert(line);
        double prediction = forest.classify(dataset, rng, instance);
        lkey.set(dataset.getLabel(instance));
        lvalue.set(Double.toString(prediction));
        context.write(lkey, lvalue);
      }
View Full Code Here

  private boolean isIdentical(Data data) {
    if (data.isEmpty()) {
      return true;
    }

    Instance instance = data.get(0);
    for (int attr = 0; attr < selected.length; attr++) {
      if (selected[attr]) {
        continue;
      }

      for (int index = 1; index < data.size(); index++) {
        if (data.get(index).get(attr) != instance.get(attr)) {
          return false;
        }
      }
    }
View Full Code Here

      if (line.isEmpty()) {
        continue; // skip empty lines
      }
     
      // write the tuple in files[tuple.label]
      Instance instance = converter.convert(line);
      int label = (int) dataset.getLabel(instance);
      files[currents[label]].writeBytes(line);
      files[currents[label]].writeChar('\n');
     
      // update currents
View Full Code Here

        String line = scanner.nextLine();
        if (line.isEmpty()) {
          continue; // skip empty lines
        }

        Instance instance = converter.convert(line);
        double prediction = forest.classify(dataset, rng, instance);

        if (ofile != null) {
          ofile.writeChars(Double.toString(prediction)); // write the prediction
          ofile.writeChar('\n');
View Full Code Here

                                 int[][] counts,
                                 int[] countAll) {
    Dataset dataset = data.getDataset();

    for (int index = 0; index < data.size(); index++) {
      Instance instance = data.get(index);
      int label = (int) dataset.getLabel(instance);
      double value = instance.get(attr);
      int split = 0;
      while (split < splitPoints.length && value > splitPoints[split]) {
        split++;
      }
      if (split < splitPoints.length) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.df.data.Instance

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.