Package org.apache.mahout.classifier.df.data

Examples of org.apache.mahout.classifier.df.data.DataConverter


    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input);
    DataConverter converter = new DataConverter(dataset);
    int nbInstances = dataset.nbInstances();
   
    int id = 0;
    while (scanner.hasNextLine()) {
      if (id % 1000 == 0) {
        log.info("progress : {} / {}", id, nbInstances);
      }
     
      String line = scanner.nextLine();
      if (line.isEmpty()) {
        continue; // skip empty lines
      }
     
      // write the tuple in files[tuple.label]
      Instance instance = converter.convert(line);
      int label = (int) dataset.getLabel(instance);
      files[currents[label]].writeBytes(line);
      files[currents[label]].writeChar('\n');
     
      // update currents
View Full Code Here


      return;
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
View Full Code Here

   *          number of running map tasks
   * @param numTrees
   *          total number of trees in the forest
   */
  protected void configure(Long seed, int partition, int numMapTasks, int numTrees) {
    converter = new DataConverter(getDataset());
   
    // prepare random-numders generator
    log.debug("seed : {}", seed);
    if (seed == null) {
      rng = RandomUtils.getRandom();
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.df.data.DataConverter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.