Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


  public static FastIDSet[] parseMenWomen(File genderFile)
      throws IOException {
    FastIDSet men = new FastIDSet(50000);
    FastIDSet women = new FastIDSet(50000);
    for (String line : new FileLineIterable(genderFile)) {
      int comma = line.indexOf(',');
      char gender = line.charAt(comma + 1);
      if (gender == 'U') {
        continue;
      }
View Full Code Here


   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (CharSequence line : new FileLineIterable(new File(Resources.getResource("wdbc/wdbc.data").toURI()))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          CDMutationTest.assertInRange(dl.getAttribute(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here

      FPGrowth<String> fp1 = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();

    final Map<Set<String>,Long> results1 = Maps.newHashMap();
   
    fp1.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 100000,
      returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
         
          for (Pair<List<String>,Long> v : value) {
            List<String> l = v.getFirst();
            results1.put(new HashSet<String>(l), v.getSecond());
            log.info("found pat ["+v.getSecond()+"]: "+ v.getFirst());
          }
        }
       
      }, new StatusUpdater() {
       
        @Override
        public void update(String status) {}
      });

    FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
    final Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
    fp2.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 100000,
      new HashSet<String>(),
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
View Full Code Here

  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowthObj<String> fp = new FPGrowthObj<String>();
   
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
   
    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> returnableFeatures = new HashSet<String>();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
   
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),

      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
View Full Code Here

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here

  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");   
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
View Full Code Here

    MathHelper.assertMatrixEquals(expectedAdjacencyMatrix, actualAdjacencyMatrix);
    MathHelper.assertMatrixEquals(expectedTransitionMatrix, actualTransitionMatrix);

    Map<Long,Double> steadyStateProbabilities = Maps.newHashMap();
    for (CharSequence line : new FileLineIterable(new File(outputDir, "part-m-00000"))) {
      String[] tokens = Iterables.toArray(Splitter.on("\t").split(line), String.class);
      steadyStateProbabilities.put(Long.parseLong(tokens[0]), Double.parseDouble(tokens[1]));
    }

    assertEquals(4, steadyStateProbabilities.size());
View Full Code Here

    File input = new File(inputDir, "synth_test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "FPGsynth.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here

    final Map<Set<String>,Long> seqResult = Maps.newHashMap();
   
    FPGrowthObj<String> fpSeq = new FPGrowthObj<String>();
    fpSeq.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fpSeq.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 1000000,
      null,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.