Package org.apache.crunch.types

Examples of org.apache.crunch.types.PTypeFamily.pairs()


    PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(),
        ptf.tableOf(keyType, itype));

    PTable<K, Pair<U, V>> both = cgLeft.union(cgRight);

    PType<Pair<Collection<U>, Collection<V>>> otype = ptf.pairs(ptf.collections(leftType), ptf.collections(rightType));
    return both.groupByKey().parallelDo("cogroup", new PostGroupFn<K, U, V>(), ptf.tableOf(keyType, otype));
  }

  private static class CogroupFn1<K, V, U> extends MapValuesFn<K, V, Pair<V, U>> {
    @Override
View Full Code Here


    // put U and V into a pair/tuple in the key so we can do grouping and
    // sorting
    PTypeFamily tf = collection.getTypeFamily();
    PType<Pair<U, V>> pType = collection.getPType();
    @SuppressWarnings("unchecked")
    PTableType<Pair<U, V>, Void> type = tf.tableOf(tf.pairs(pType.getSubTypes().get(0), pType.getSubTypes().get(1)),
        tf.nulls());
    PTable<Pair<U, V>, Void> pt = collection.parallelDo(new DoFn<Pair<U, V>, Pair<Pair<U, V>, Void>>() {
      @Override
      public void process(Pair<U, V> input, Emitter<Pair<Pair<U, V>, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
View Full Code Here

  private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare(
      PTable<K, Pair<V1, V2>> input) {
    PTypeFamily ptf = input.getTypeFamily();
    PType<Pair<V1, V2>> valueType = input.getValueType();
    PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf(
        ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)),
        valueType);
    PTableType<K, Collection<Pair<V1, V2>>> out = ptf.tableOf(input.getKeyType(),
        ptf.collections(input.getValueType()));
    return input.parallelDo("SecondarySort.format", new SSFormatFn<K, V1, V2>(), inter)
        .groupByKey(
View Full Code Here

    PTypeFamily rtf = right.getTypeFamily();

    PTable<Pair<Integer, Integer>, Pair<K1, U>> leftCross = left.parallelDo(new GFCross<Pair<K1, U>>(0, parallelism),
        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), ltf.pairs(left.getKeyType(), left.getValueType())));
    PTable<Pair<Integer, Integer>, Pair<K2, V>> rightCross = right.parallelDo(new GFCross<Pair<K2, V>>(1, parallelism),
        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), rtf.pairs(right.getKeyType(), right.getValueType())));

    PTable<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>> cg = leftCross.join(rightCross);

    PTypeFamily ctf = cg.getTypeFamily();
View Full Code Here

    PTypeFamily rtf = right.getTypeFamily();

    PTable<Pair<Integer, Integer>, Pair<K1, U>> leftCross = left.parallelDo(new GFCross<Pair<K1, U>>(0, parallelism),
        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), ltf.pairs(left.getKeyType(), left.getValueType())));
    PTable<Pair<Integer, Integer>, Pair<K2, V>> rightCross = right.parallelDo(new GFCross<Pair<K2, V>>(1, parallelism),
        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), rtf.pairs(right.getKeyType(), right.getValueType())));

    PTable<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>> cg = leftCross.join(rightCross);

    PTypeFamily ctf = cg.getTypeFamily();
View Full Code Here

            Pair<Pair<K1, U>, Pair<K2, V>> valuePair = input.second();
            return Pair.of(Pair.of(valuePair.first().first(), valuePair.second().first()),
                Pair.of(valuePair.first().second(), valuePair.second().second()));
          }
        },
        ctf.tableOf(ctf.pairs(left.getKeyType(), right.getKeyType()),
            ctf.pairs(left.getValueType(), right.getValueType())));
  }

  /**
   * Performs a full cross join on the specified {@link PCollection}s (using the
View Full Code Here

            return Pair.of(Pair.of(valuePair.first().first(), valuePair.second().first()),
                Pair.of(valuePair.first().second(), valuePair.second().second()));
          }
        },
        ctf.tableOf(ctf.pairs(left.getKeyType(), right.getKeyType()),
            ctf.pairs(left.getValueType(), right.getValueType())));
  }

  /**
   * Performs a full cross join on the specified {@link PCollection}s (using the
   * same strategy as Pig's CROSS operator).
View Full Code Here

   * @return table containing the top N values from the incoming table
   */
  public static <K, V> PTable<K, V> top(PTable<K, V> ptable, int limit, boolean maximize) {
    PTypeFamily ptf = ptable.getTypeFamily();
    PTableType<K, V> base = ptable.getPTableType();
    PType<Pair<K, V>> pairType = ptf.pairs(base.getKeyType(), base.getValueType());
    PTableType<Integer, Pair<K, V>> inter = ptf.tableOf(ptf.ints(), pairType);
    return ptable.parallelDo("top" + limit + "map", new TopKFn<K, V>(limit, maximize, pairType), inter)
        .groupByKey(1).combineValues(new TopKCombineFn<K, V>(limit, maximize, pairType))
        .parallelDo("top" + limit + "reduce", new DoFn<Pair<Integer, Pair<K, V>>, Pair<K, V>>() {
          public void process(Pair<Integer, Pair<K, V>> input, Emitter<Pair<K, V>> emitter) {
View Full Code Here

    DistributedCache.addCacheFile(path.toUri(), pipeline.getConfiguration());

    MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.getName(), right.getPType());
    PTypeFamily typeFamily = left.getTypeFamily();
    return left.parallelDo("mapjoin", mapJoinDoFn,
        typeFamily.tableOf(left.getKeyType(), typeFamily.pairs(left.getValueType(), right.getValueType())));

  }

  static class MapsideJoinDoFn<K, U, V> extends DoFn<Pair<K, U>, Pair<K, Pair<U, V>>> {
View Full Code Here

    PTypeFamily rtf = right.getTypeFamily();

    PTable<Pair<Integer, Integer>, Pair<K1, U>> leftCross = left.parallelDo(new GFCross<Pair<K1, U>>(0, parallelism),
        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), ltf.pairs(left.getKeyType(), left.getValueType())));
    PTable<Pair<Integer, Integer>, Pair<K2, V>> rightCross = right.parallelDo(new GFCross<Pair<K2, V>>(1, parallelism),
        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), rtf.pairs(right.getKeyType(), right.getValueType())));

    PTable<Pair<Integer, Integer>, Pair<Collection<Pair<K1, U>>, Collection<Pair<K2, V>>>> cg = leftCross
        .cogroup(rightCross);

    PTypeFamily ctf = cg.getTypeFamily();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.