Examples of readTextFile()


Examples of org.apache.crunch.Pipeline.readTextFile()

  }

  @Test
  public void testAvroReflectSortTable() throws IOException {
    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
    PTable<String, StringWrapper> unsorted = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt")).parallelDo(
        new MapFn<String, Pair<String, StringWrapper>>() {

          @Override
          public Pair<String, StringWrapper> map(String input) {
            return Pair.of(input, wrap(input));
View Full Code Here

Examples of org.apache.crunch.Pipeline.readTextFile()

    new File(infilename).getParentFile().mkdirs();

    writeFile(expected, infilename);

    Pipeline memPipeline = MemPipeline.getInstance();
    PCollection<String> memPColl = memPipeline.readTextFile(infilename);
    Target memTarget = new TextFileTarget(memOutFilename);
    memPipeline.write(memPColl, memTarget, WriteMode.OVERWRITE);
    memPipeline.run();
    File outDir = new File(memOutFilename);
    File actualMemOut = null;
View Full Code Here

Examples of org.apache.crunch.Pipeline.readTextFile()

      }
    }
    String actualMemText = Files.readFirstLine(actualMemOut, Charsets.UTF_8);

    Pipeline mrPipeline = new MRPipeline(getClass());
    PCollection<String> mrPColl = mrPipeline.readTextFile(infilename);
    Target mrTarget = new TextFileTarget(mrOutFilename);
    mrPipeline.write(mrPColl, mrTarget, WriteMode.OVERWRITE);
    mrPipeline.run();
    String actualMrText = Files.readFirstLine(new File(mrOutFilename + "/part-m-00000"), Charsets.UTF_8);
View Full Code Here

Examples of org.apache.crunch.Pipeline.readTextFile()

  @Test
  public void testWritables() throws Exception {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
    runMinMax(shakes, WritableTypeFamily.getInstance());
    pipeline.done();
  }

  @Test
View Full Code Here

Examples of org.apache.crunch.Pipeline.readTextFile()

  @Test
  public void testAvro() throws Exception {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
    runMinMax(shakes, AvroTypeFamily.getInstance());
    pipeline.done();
  }

  @Test
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()

    out2.flush();
    out2.close();

    final MRPipeline pipeline = new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration());

    final PCollection<String> values1 = pipeline.readTextFile(path1.toString());
    final PCollection<String> values2 = pipeline.readTextFile(path2.toString());

    final PTable<Text, Text> convertedValues1 = convertStringToText(values1);
    final PTable<Text, Text> convertedValues2 = convertStringToText(values2);
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()

    out2.close();

    final MRPipeline pipeline = new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration());

    final PCollection<String> values1 = pipeline.readTextFile(path1.toString());
    final PCollection<String> values2 = pipeline.readTextFile(path2.toString());

    final PTable<Text, Text> convertedValues1 = convertStringToText(values1);
    final PTable<Text, Text> convertedValues2 = convertStringToText(values2);

    // for map side join
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()

  @Test
  public void materializedColShouldBeWritten() throws Exception {
    File textFile = tmpDir.copyResourceFile("shakes.txt");
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath());
    pipeline.run();
    PCollection<String> filter = genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL());
    filter.materialize();
    pipeline.run();
    File file = tmpDir.getFile("output.txt");
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()

 
 
  @Test
  public void testPGroupedTableToMultipleOutputs() throws IOException{
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    PGroupedTable<String, String> groupedLineTable = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt")).by(IdentityFn.<String>getInstance(), Writables.strings()).groupByKey();
   
    PTable<String, String> ungroupedTableA = groupedLineTable.ungroup();
    PTable<String, String> ungroupedTableB = groupedLineTable.ungroup();
   
    File outputDirA = tmpDir.getFile("output_a");
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()

 
  public PipelineExecution run() throws IOException {
    String shakes = tmpDir.copyResourceFileName("shakes.txt");
    String out = tmpDir.getFileName("cancel");
    Pipeline p = new MRPipeline(CancelJobsIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> words = p.readTextFile(shakes);
    p.write(words.count().top(20), To.textFile(out));
    return p.runAsync(); // need to hack to slow down job start up if this test becomes flaky.
  }
}
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.