Package org.apache.crunch.impl.mr

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()


      return 1;
    }
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Combiner used for summing up response size
    CombineFn<String, Long> longSumCombiner = CombineFn.SUM_LONGS();

    // Table of (ip, sum(response size))
View Full Code Here


  @Test
  public void testAvroReflectSortPair() throws IOException {
    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
    pipeline.enableDebug();
    String rsrc = tmpDir.copyResourceFileName("set2.txt");
    PCollection<Pair<String, StringWrapper>> in = pipeline.readTextFile(rsrc)
        .parallelDo(new MapFn<String, Pair<String, StringWrapper>>() {

          @Override
          public Pair<String, StringWrapper> map(String input) {
            return Pair.of(input, wrap(input));
View Full Code Here

  }

  @Test
  public void testAvroReflectSortTable() throws IOException {
    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
    PTable<String, StringWrapper> unsorted = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt")).parallelDo(
        new MapFn<String, Pair<String, StringWrapper>>() {

          @Override
          public Pair<String, StringWrapper> map(String input) {
            return Pair.of(input, wrap(input));
View Full Code Here

      }
    }
    String actualMemText = Files.readFirstLine(actualMemOut, Charsets.UTF_8);

    Pipeline mrPipeline = new MRPipeline(getClass());
    PCollection<String> mrPColl = mrPipeline.readTextFile(infilename);
    Target mrTarget = new TextFileTarget(mrOutFilename);
    mrPipeline.write(mrPColl, mrTarget, WriteMode.OVERWRITE);
    mrPipeline.run();
    String actualMrText = Files.readFirstLine(new File(mrOutFilename + "/part-m-00000"), Charsets.UTF_8);
View Full Code Here

  @Test
  public void testMaterializeAvroPersonAndReflectsPair_GroupedTable() throws IOException {
    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
    Pipeline pipeline = new MRPipeline(MaterializeIT.class);
    MaterializableIterable<Pair<StringWrapper, Person>> mi = (MaterializableIterable) pipeline
        .readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(new StringToStringWrapperPersonPairMapFn(),
            Avros.pairs(Avros.reflects(StringWrapper.class), Avros.records(Person.class)))
        .materialize();
   
View Full Code Here

  @Test
  public void testCompressText() throws Exception {
    String urlsFile = tmpDir.copyResourceFileName("urls.txt");
    String out = tmpDir.getFileName("out");
    MRPipeline p = new MRPipeline(CompressIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> in = p.readTextFile(urlsFile);
    in.write(Compress.gzip(To.textFile(out)));
    p.done();
    assertTrue(checkDirContainsExt(out, ".gz"));
  }
View Full Code Here

    File src2 = tmpDir.copyResourceFile(Tests.resource(this, "src2.txt"));
    Files.copy(src1, new File(srcFiles, "src1.txt"));
    Files.copy(src2, new File(srcFiles, "src2.txt"));

    MRPipeline p = new MRPipeline(CombineFileIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> in = p.readTextFile(srcFiles.getAbsolutePath());
    in.write(To.textFile(outputFiles.getAbsolutePath()));
    p.done();
    assertEquals(4, outputFiles.listFiles().length);
  }
View Full Code Here

    out2.flush();
    out2.close();

    final MRPipeline pipeline = new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration());

    final PCollection<String> values1 = pipeline.readTextFile(path1.toString());
    final PCollection<String> values2 = pipeline.readTextFile(path2.toString());

    final PTable<Text, Text> convertedValues1 = convertStringToText(values1);
    final PTable<Text, Text> convertedValues2 = convertStringToText(values2);
View Full Code Here

    out2.close();

    final MRPipeline pipeline = new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration());

    final PCollection<String> values1 = pipeline.readTextFile(path1.toString());
    final PCollection<String> values2 = pipeline.readTextFile(path2.toString());

    final PTable<Text, Text> convertedValues1 = convertStringToText(values1);
    final PTable<Text, Text> convertedValues2 = convertStringToText(values2);

    // for map side join
View Full Code Here

  @Test
  public void testWritables() throws Exception {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
    runMinMax(shakes, WritableTypeFamily.getInstance());
    pipeline.done();
  }

  @Test
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.