Examples of FileLineIterable


Examples of net.myrrix.common.iterator.FileLineIterable

    log.info("Reading IDs...");   
    FastIDSet userIDsSet = new FastIDSet();
    FastIDSet itemIDsSet = new FastIDSet();
    Splitter comma = Splitter.on(',');
    for (File f : dataDirectory.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) {
      for (CharSequence line : new FileLineIterable(f)) {
        Iterator<String> it = comma.split(line).iterator();
        userIDsSet.add(Long.parseLong(it.next()));
        itemIDsSet.add(Long.parseLong(it.next()));
      }
    }
View Full Code Here

Examples of org.apache.mahout.cf.taste.impl.common.FileLineIterable

  @Override
  protected void reload() {
    bookMap = new FastMap<String, Book>(5001);
    userDataMap = new FastMap<String, String[]>(5001);

    for (String line : new FileLineIterable(booksFile, true)) {
      String[] tokens = tokenizeLine(line, 5);
      if (tokens != null) {
        String id = tokens[0];
        bookMap.put(id, new Book(id, tokens[1], tokens[2], Integer.parseInt(tokens[3]), tokens[4]));
      }
    }
    for (String line : new FileLineIterable(usersFile, true)) {
      String[] tokens = tokenizeLine(line, 3);
      if (tokens != null) {
        String id = tokens[0];
        userDataMap.put(id, new String[] { tokens[1], tokens[2] });
      }
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

  }

  private static void loadPopulation(FileSystem fs, Path f,
                                     Collection<DummyCandidate> population) throws IOException {
    FSDataInputStream in = fs.open(f);
    for (String line : new FileLineIterable(in)) {
      population.add(StringUtils.<DummyCandidate>fromString(line));
    }
  }
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (String line : new FileLineIterable(new File("target/test-classes/wdbc/wdbc.data"))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          assertInRange(dl.getAttribut(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
   
    FPGrowth<String> fp = new FPGrowth<String>();
    Set<String> features = new HashSet<String>();
   
    fp.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(new File(input), encoding,
        false), pattern), fp.generateFList(new StringRecordIterator(new FileLineIterable(new File(input),
        encoding, false), pattern), minSupport), minSupport, maxHeapSize, features,
      new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
      new ContextStatusUpdater(null));
    writer.close();
   
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

    Charset charset = Charset.forName("UTF-8");
    BayesFileFormatter.collapse("animal", analyzer, input, charset, new File(out, "animal"));
    files = out.listFiles();
    assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length);
    int count = 0;
    for (String line : new FileLineIterable(files[0])) {
      assertTrue("line does not start with label", line.startsWith("animal"));
      System.out.println("Line: " + line);
      count++;
    }
    assertEquals(count + " does not equal: " + words.length, count, words.length);
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = new ArrayList<Preference>();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf(',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf(',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

      resultFile.delete();
    }
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, false)) {
        String convertedLine = COLON_DELIMITER_PATTERN.matcher(line.substring(0, line.lastIndexOf(COLON_DELIMTER))).replaceAll(",");
        writer.println(convertedLine);
      }
      writer.flush();
    } catch (IOException ioe) {
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    resultFile.delete();
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, true)) {
        // 0 ratings are basically "no rating", ignore them (thanks h.9000)
        if (line.endsWith("\"0\"")) {
          continue;
        }
        // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.