Package org.apache.crunch.impl.mr

Examples of org.apache.crunch.impl.mr.MRPipeline.readTextFile()


  public transient TemporaryPath tmpDir = TemporaryPaths.create();

  @Test
  public void testReflection() throws IOException {
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<StringWrapper> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(new MapFn<String, StringWrapper>() {

          @Override
          public StringWrapper map(String input) {
            StringWrapper stringWrapper = new StringWrapper();
View Full Code Here


  // doesn't crash
  @Test
  public void testCombinationOfReflectionAndSpecific() throws IOException {
    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Pair<StringWrapper, Person>> hybridPairCollection = pipeline.readTextFile(
        tmpDir.copyResourceFileName("set1.txt")).parallelDo(new MapFn<String, Pair<StringWrapper, Person>>() {

      @Override
      public Pair<StringWrapper, Person> map(String input) {
        Person person = new Person();
View Full Code Here

      Avros.reflects(StringWrapper.class));

  @Test
  public void testDerivedReflection() throws Exception {
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(IdentityFn.<String>getInstance(), STRING_PTYPE);
    List<String> strings = Lists.newArrayList(stringWrapperCollection.materialize());
    pipeline.done();
    assertEquals(Lists.newArrayList("b", "c", "a", "e"), strings);
  }
View Full Code Here

  }

  @Test
  public void testWrappedDerivedReflection() throws Exception {
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Pair<Long, String>> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(new MapFn<String, Pair<Long, String>>() {
          @Override
          public Pair<Long, String> map(String input) {
            return Pair.of(1L, input);
          }
View Full Code Here

  @Test
  public void materializedColShouldBeWritten() throws Exception {
    File textFile = tmpDir.copyResourceFile("shakes.txt");
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath());
    pipeline.run();
    PCollection<String> filter = genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL());
    filter.materialize();
    pipeline.run();
    File file = tmpDir.getFile("output.txt");
View Full Code Here

 
 
  @Test
  public void testPGroupedTableToMultipleOutputs() throws IOException{
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    PGroupedTable<String, String> groupedLineTable = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt")).by(IdentityFn.<String>getInstance(), Writables.strings()).groupByKey();
   
    PTable<String, String> ungroupedTableA = groupedLineTable.ungroup();
    PTable<String, String> ungroupedTableB = groupedLineTable.ungroup();
   
    File outputDirA = tmpDir.getFile("output_a");
View Full Code Here

  public void testWritingOfDotfile() throws IOException {
    File dotfileDir = Files.createTempDir();
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    pipeline.getConfiguration().set(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR, dotfileDir.getAbsolutePath());

    PCollection<String> lines = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
    pipeline.write(
        lines.parallelDo(IdentityFn.<String>getInstance(), Writables.strings()),
        To.textFile(tmpDir.getFile("output").getAbsolutePath()));
    pipeline.done();
View Full Code Here

  @Test
  public void testMRMaterializeToMap() throws IOException {
    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
    String inputFile = tmpDir.copyResourceFileName("set1.txt");
    PCollection<String> c = p.readTextFile(inputFile);
    PTypeFamily tf = c.getTypeFamily();
    PTable<Integer, String> t = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(), tf.strings()));
    Map<Integer, String> m = t.materializeToMap();
    assertMatches(m);
  }
View Full Code Here

 
  public PipelineExecution run() throws IOException {
    String shakes = tmpDir.copyResourceFileName("shakes.txt");
    String out = tmpDir.getFileName("cancel");
    Pipeline p = new MRPipeline(CancelJobsIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> words = p.readTextFile(shakes);
    p.write(words.count().top(20), To.textFile(out));
    return p.runAsync(); // need to hack to slow down job start up if this test becomes flaky.
  }
}
View Full Code Here

   * another.
   */
  @Test
  public void testFusedMappersObjectReuseBug() throws IOException {
    Pipeline pipeline = new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration());
    PCollection<StringWrapper> stringWrappers = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
        .parallelDo(new StringWrapper.StringToStringWrapperMapFn(), Avros.reflects(StringWrapper.class));

    PCollection<String> stringsA = stringWrappers.parallelDo(new AppendFn("A"), stringWrappers.getPType())
        .parallelDo(new StringWrapper.StringWrapperToStringMapFn(), Writables.strings());
    PCollection<String> stringsB = stringWrappers.parallelDo(new AppendFn("B"), stringWrappers.getPType())
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.