Package org.apache.crunch.types

Examples of org.apache.crunch.types.PTypeFamily.tableOf()


            @Override
            public Pair<String, Pair<String, Long>> map(Pair<Pair<String, String>, Long> input) {
              Pair<String, String> wordDocumentPair = input.first();
              return Pair.of(wordDocumentPair.first(), Pair.of(wordDocumentPair.second(), input.second()));
            }
          }, ptf.tableOf(ptf.strings(), ptf.pairs(ptf.strings(), ptf.longs())));

      pipeline.writeTextFile(wordDocumentCountPair, transformedOutput.getAbsolutePath());
    }

    SourceTarget<String> st = At.textFile(tfOutput.getAbsolutePath());
View Full Code Here


        PageRankData prd = input.second();
        for (String link : prd.urls) {
          emitter.emit(Pair.of(link, prd.propagatedScore()));
        }
      }
    }, ptf.tableOf(ptf.strings(), ptf.floats()));

    return input.cogroup(outbound).mapValues(
        new MapFn<Pair<Collection<PageRankData>, Collection<Float>>, PageRankData>() {
          @Override
          public PageRankData map(Pair<Collection<PageRankData>, Collection<Float>> input) {
View Full Code Here

   * @param valueType The {@code PType} for the value of the SequenceFile entry
   * @return A new {@code TableSourceTarget<K, V>} instance
   */
  public static <K, V> TableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
    PTypeFamily ptf = keyType.getFamily();
    return new SeqFileTableSourceTarget<K, V>(path, ptf.tableOf(keyType, valueType));
  }

  /**
   * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given path name.
   *
 
View Full Code Here

  public void testMRMaterializeToMap() throws IOException {
    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
    String inputFile = tmpDir.copyResourceFileName("set1.txt");
    PCollection<String> c = p.readTextFile(inputFile);
    PTypeFamily tf = c.getTypeFamily();
    PTable<Integer, String> t = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(), tf.strings()));
    Map<Integer, String> m = t.materializeToMap();
    assertMatches(m);
  }

}
View Full Code Here

    return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
      @Override
      public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
        emitter.emit(Pair.of(input, Boolean.TRUE));
      }
    }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
  }

}
View Full Code Here

   * @return A {@code PTable} that contains the same data as the input {@code PCollection}
   */
  public static <K, V> PTable<K, V> asPTable(PCollection<Pair<K, V>> pcollect) {
    PType<Pair<K, V>> pt = pcollect.getPType();
    PTypeFamily ptf = pt.getFamily();
    PTableType<K, V> ptt = ptf.tableOf(pt.getSubTypes().get(0), pt.getSubTypes().get(1));
    DoFn<Pair<K, V>, Pair<K, V>> id = IdentityFn.getInstance();
    return pcollect.parallelDo("asPTable", id, ptt);
  }

  /**
 
View Full Code Here

  public static <K1, K2, V> PTable<K2, V> mapKeys(String name, PTable<K1, V> ptable, MapFn<K1, K2> mapFn,
      PType<K2> ptype) {
    PTypeFamily ptf = ptable.getTypeFamily();
    return ptable.parallelDo(name,
        new PairMapFn<K1, V, K2, V>(mapFn, IdentityFn.<V>getInstance()),
        ptf.tableOf(ptype, ptable.getValueType()));
  }
 
  /**
   * Maps a {@code PTable<K, U>} to a {@code PTable<K, V>} using the given {@code MapFn<U, V>} on
   * the values of the {@code PTable}.
View Full Code Here

  public static <K, U, V> PTable<K, V> mapValues(String name, PTable<K, U> ptable, MapFn<U, V> mapFn,
      PType<V> ptype) {
    PTypeFamily ptf = ptable.getTypeFamily();
    return ptable.parallelDo(name,
        new PairMapFn<K, U, K, V>(IdentityFn.<K>getInstance(), mapFn),
        ptf.tableOf(ptable.getKeyType(), ptype));
  }
 
  /**
   * An analogue of the {@code mapValues} function for {@code PGroupedTable<K, U>} collections.
   *
 
View Full Code Here

      MapFn<Iterable<U>, V> mapFn,
      PType<V> ptype) {
    PTypeFamily ptf = ptable.getTypeFamily();
    return ptable.parallelDo(name,
        new PairMapFn<K, Iterable<U>, K, V>(IdentityFn.<K>getInstance(), mapFn),
        ptf.tableOf((PType<K>) ptable.getPType().getSubTypes().get(0), ptype));
  }
 
  /**
   * Extract the keys from the given {@code PTable<K, V>} as a {@code PCollection<K>}.
   * @param ptable The {@code PTable}
View Full Code Here

   *
   * @return a {@code PCollection} representing the sorted collection.
   */
  public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
    PTypeFamily tf = collection.getTypeFamily();
    PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
    Configuration conf = collection.getPipeline().getConfiguration();
    PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
      @Override
      public void process(T input, Emitter<Pair<T, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.