Package org.apache.mahout.classifier.df.data

Examples of org.apache.mahout.classifier.df.data.Data


  @Override
  protected void cleanup(Context context) throws IOException, InterruptedException {
    // prepare the data
    log.debug("partition: {} numInstances: {}", partition, instances.size());
   
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);
   
    TreeID key = new TreeID();
   
    log.debug("Building {} trees", nbTrees);
View Full Code Here


  @Override
  protected void cleanup(Context context) throws IOException, InterruptedException {
    // prepare the data
    log.debug("partition: {} numInstances: {}", partition, instances.size());
   
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);
   
    TreeID key = new TreeID();
   
    log.debug("Building {} trees", nbTrees);
View Full Code Here

    super.setup(context);
   
    Configuration conf = context.getConfiguration();
   
    log.info("Loading the data...");
    Data data = loadData(conf, getDataset());
    log.info("Data loaded : {} instances", data.size());
   
    bagging = new Bagging(getTreeBuilder(), data);
  }
View Full Code Here

    Node childNode;
    if (data.getDataset().isNumerical(best.getAttr())) {
      boolean[] temp = null;

      Data loSubset = data.subset(Condition.lesser(best.getAttr(), best.getSplit()));
      Data hiSubset = data.subset(Condition.greaterOrEquals(best.getAttr(), best.getSplit()));

      if (loSubset.isEmpty() || hiSubset.isEmpty()) {
        // the selected attribute did not change the data, avoid using it in the child notes
        selected[best.getAttr()] = true;
      } else {
        // the data changed, so we can unselect all previousely selected NUMERICAL attributes
        temp = selected;
        selected = cloneCategoricalAttributes(data.getDataset(), selected);
      }

      // size of the subset is less than the minSpitNum
      if (loSubset.size() < minSplitNum || hiSubset.size() < minSplitNum) {
        // branch is not split
        double label;
        if (data.getDataset().isNumerical(data.getDataset().getLabelId())) {
          label = sum / data.size();
        } else {
View Full Code Here

   *          number of trees to grow
   */
  private void runIteration(Random rng, Data data, int m, int nbtrees) {
   
    log.info("Splitting the data");
    Data train = data.clone();
    Data test = train.rsplit(rng, (int) (data.size() * 0.1));
   
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
   
    SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);
   
    // grow a forest with m = log2(M)+1
    treeBuilder.setM(m);
   
    long time = System.currentTimeMillis();
    log.info("Growing a forest with m={}", m);
    DecisionForest forestM = forestBuilder.build(nbtrees);
    sumTimeM += System.currentTimeMillis() - time;
    numNodesM += forestM.nbNodes();
   
    // grow a forest with m=1
    treeBuilder.setM(1);
   
    time = System.currentTimeMillis();
    log.info("Growing a forest with m=1");
    DecisionForest forestOne = forestBuilder.build(nbtrees);
    sumTimeOne += System.currentTimeMillis() - time;
    numNodesOne += forestOne.nbNodes();
   
    // compute the test set error (Selection Error), and mean tree error (One Tree Error),
    double[] testLabels = test.extractLabels();
    double[] predictions = new double[test.size()];
   
    forestM.classify(test, predictions);
    sumTestErrM += ErrorEstimate.errorRate(testLabels, predictions);
   
    forestOne.classify(test, predictions);
View Full Code Here

    }
   
    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
   
    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);
   
    Random rng = RandomUtils.getRandom();
    for (int iteration = 0; iteration < nbIterations; iteration++) {
      log.info("Iteration {}", iteration);
      runIteration(rng, data, m, nbTrees);
View Full Code Here

  }
 
  protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
    log.info("Loading the data...");
    FileSystem fs = dataPath.getFileSystem(conf);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
    log.info("Data Loaded");
   
    return data;
  }
View Full Code Here

  }
 
  protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
    log.info("Loading the data...");
    FileSystem fs = dataPath.getFileSystem(conf);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
    log.info("Data Loaded");
   
    return data;
  }
View Full Code Here

  @Override
  protected void cleanup(Context context) throws IOException, InterruptedException {
    // prepare the data
    log.debug("partition: {} numInstances: {}", partition, instances.size());
   
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);
   
    TreeID key = new TreeID();
   
    log.debug("Building {} trees", nbTrees);
View Full Code Here

  private static Data[] generateTrainingDataA() throws DescriptorException {
    // Dataset
    Dataset dataset = DataLoader.generateDataset("C N N C L", false, TRAIN_DATA);
   
    // Training data
    Data data = DataLoader.loadData(dataset, TRAIN_DATA);
    @SuppressWarnings("unchecked")
    List<Instance>[] instances = new List[3];
    for (int i = 0; i < instances.length; i++) {
      instances[i] = Lists.newArrayList();
    }
    for (int i = 0; i < data.size(); i++) {
      if (data.get(i).get(0) == 0.0d) {
        instances[0].add(data.get(i));
      } else {
        instances[1].add(data.get(i));
      }
    }
    Data[] datas = new Data[instances.length];
    for (int i = 0; i < datas.length; i++) {
      datas[i] = new Data(dataset, instances[i]);
    }

    return datas;
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.df.data.Data

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.