Package org.apache.crunch.types

Examples of org.apache.crunch.types.PTypeFamily


  @Test
  public void testMRMaterializeToMap() throws IOException {
    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
    String inputFile = tmpDir.copyResourceFileName("set1.txt");
    PCollection<String> c = p.readTextFile(inputFile);
    PTypeFamily tf = c.getTypeFamily();
    PTable<Integer, String> t = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(), tf.strings()));
    Map<Integer, String> m = t.materializeToMap();
    assertMatches(m);
  }
View Full Code Here


   */
  public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(
      int numReducers,
      PTable<K, U> left,
      PTable<K, V> right) {
    PTypeFamily tf = left.getTypeFamily();
    return cogroup(
        tf.pairs(tf.collections(left.getValueType()),
                 tf.collections(right.getValueType())),
        TupleFactory.PAIR,
        numReducers,
        left, right);
  }
View Full Code Here

   */
  public static <K, U, V> PTable<K, TupleN> cogroup(
      int numReducers,
      PTable<K, ?> first,
      PTable<K, ?>... rest) {
    PTypeFamily tf = first.getTypeFamily();
    PType[] components = new PType[1 + rest.length];
    components[0] = tf.collections(first.getValueType());
    for (int i = 0; i < rest.length; i++) {
      components[i + 1] = tf.collections(rest[i].getValueType());
    }
    return cogroup(
        tf.tuples(components),
        TupleFactory.TUPLEN,
        numReducers,
        first, rest);
  }
View Full Code Here

  private static <K, T extends Tuple> PTable<K, T> cogroup(
      PType<T> outputType,
      TupleFactory tupleFactory,
      int numReducers,
      PTable<K, ?> first, PTable<K, ?>... rest) {
    PTypeFamily ptf = first.getTypeFamily();
    PType[] ptypes = new PType[1 + rest.length];
    ptypes[0] = first.getValueType();
    for (int i = 0; i < rest.length; i++) {
      ptypes[i + 1] = rest[i].getValueType();
    }
    PType<Union> itype = ptf.unionOf(ptypes);
   
    PTable<K, Union> firstInter = first.mapValues("coGroupTag1",
        new CogroupFn(0), itype);
    PTable<K, Union>[] inter = new PTable[rest.length];
    for (int i = 0; i < rest.length; i++) {
View Full Code Here

   * Tuples are otherwise filled with <code>null</code>.
   *
   * @return a collection of {@link Tuple3} objects
   */
  public static <T> PCollection<Tuple3<T, T, T>> comm(PCollection<T> coll1, PCollection<T> coll2) {
    PTypeFamily typeFamily = coll1.getTypeFamily();
    PType<T> type = coll1.getPType();
    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
        "Calculate common values of sets",
        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, Tuple3<T, T, T>>() {
          @Override
          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input,
              Emitter<Tuple3<T, T, T>> emitter) {
            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
            boolean inFirst = !groups.first().isEmpty();
            boolean inSecond = !groups.second().isEmpty();
            T t = input.first();
            emitter.emit(Tuple3.of(inFirst && !inSecond ? t : null, !inFirst && inSecond ? t : null, inFirst
                && inSecond ? t : null));
          }
        }, typeFamily.triples(type, type, type));
  }
View Full Code Here

          }
        }, typeFamily.triples(type, type, type));
  }

  private static <T> PTable<T, Boolean> toTable(PCollection<T> coll) {
    PTypeFamily typeFamily = coll.getTypeFamily();
    return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
      @Override
      public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
        emitter.emit(Pair.of(input, Boolean.TRUE));
      }
    }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
  }
View Full Code Here

   * @param pcollect The {@code PCollection} to convert
   * @return A {@code PTable} that contains the same data as the input {@code PCollection}
   */
  public static <K, V> PTable<K, V> asPTable(PCollection<Pair<K, V>> pcollect) {
    PType<Pair<K, V>> pt = pcollect.getPType();
    PTypeFamily ptf = pt.getFamily();
    PTableType<K, V> ptt = ptf.tableOf(pt.getSubTypes().get(0), pt.getSubTypes().get(1));
    DoFn<Pair<K, V>, Pair<K, V>> id = IdentityFn.getInstance();
    return pcollect.parallelDo("asPTable", id, ptt);
  }
View Full Code Here

   * @param ptype The PType for the returned keys
   * @return A new {@code PTable<K2, V>} instance
   */
  public static <K1, K2, V> PTable<K2, V> mapKeys(String name, PTable<K1, V> ptable, MapFn<K1, K2> mapFn,
      PType<K2> ptype) {
    PTypeFamily ptf = ptable.getTypeFamily();
    return ptable.parallelDo(name,
        new PairMapFn<K1, V, K2, V>(mapFn, IdentityFn.<V>getInstance()),
        ptf.tableOf(ptype, ptable.getValueType()));
  }
View Full Code Here

   * @param ptype The PType for the returned values
   * @return A new {@code PTable<K, V>} instance
   */
  public static <K, U, V> PTable<K, V> mapValues(String name, PTable<K, U> ptable, MapFn<U, V> mapFn,
      PType<V> ptype) {
    PTypeFamily ptf = ptable.getTypeFamily();
    return ptable.parallelDo(name,
        new PairMapFn<K, U, K, V>(IdentityFn.<K>getInstance(), mapFn),
        ptf.tableOf(ptable.getKeyType(), ptype));
  }
View Full Code Here

   */
  public static <K, U, V> PTable<K, V> mapValues(String name,
      PGroupedTable<K, U> ptable,
      MapFn<Iterable<U>, V> mapFn,
      PType<V> ptype) {
    PTypeFamily ptf = ptable.getTypeFamily();
    return ptable.parallelDo(name,
        new PairMapFn<K, Iterable<U>, K, V>(IdentityFn.<K>getInstance(), mapFn),
        ptf.tableOf((PType<K>) ptable.getPType().getSubTypes().get(0), ptype));
  }
View Full Code Here

TOP

Related Classes of org.apache.crunch.types.PTypeFamily

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.