Package org.apache.crunch.types

Examples of org.apache.crunch.types.PTypeFamily


   * the order specified using the given number of reducers.
   *
   * @return a {@code PCollection} representing the sorted collection.
   */
  public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
    PTypeFamily tf = collection.getTypeFamily();
    PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
    Configuration conf = collection.getPipeline().getConfiguration();
    PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
      @Override
      public void process(T input, Emitter<Pair<T, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
View Full Code Here


  // TODO: move to type family?
  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
      int numReducers, Order order) {
    PType<K> ptype = ptable.getKeyType();
    PTypeFamily tf = ptable.getTypeFamily();
    GroupingOptions.Builder builder = GroupingOptions.builder();
    if (order == Order.DESCENDING) {
      if (tf == WritableTypeFamily.getInstance()) {
        builder.sortComparatorClass(ReverseWritableComparator.class);
      } else if (tf == AvroTypeFamily.getInstance()) {
View Full Code Here

    return builder.build();
  }

  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
      int numReducers, ColumnOrder[] columnOrders) {
    PTypeFamily tf = ptable.getTypeFamily();
    PType<K> keyType = ptable.getKeyType();
    GroupingOptions.Builder builder = GroupingOptions.builder();
    if (tf == WritableTypeFamily.getInstance()) {
      if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) {
        builder.sortComparatorClass(ReverseWritableComparator.class);
View Full Code Here

      init();
    }
   
    private void init() {
      List<PType> pt = ptype.getSubTypes();
      PTypeFamily ptf = ptype.getFamily();
      if (cols.length == 1) {
        byFn = new SingleKeyFn(cols[0]);
        keyPType = pt.get(cols[0]);
      } else {
        TupleFactory tf;
        switch (cols.length) {
        case 2:
          tf = TupleFactory.PAIR;
          keyPType = ptf.pairs(pt.get(cols[0]), pt.get(cols[1]));
          break;
        case 3:
          tf = TupleFactory.TUPLE3;
          keyPType = ptf.triples(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]));
          break;
        case 4:
          tf = TupleFactory.TUPLE4;
          keyPType = ptf.quads(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]), pt.get(cols[3]));
          break;
        default:
          PType[] pts = new PType[cols.length];
          for (int i = 0; i < pts.length; i++) {
            pts[i] = pt.get(cols[i]);
          }
          tf = TupleFactory.TUPLEN;
          keyPType = (PType<Object>) (PType<?>) ptf.tuples(pts);
        }
       
        if (ptf == AvroTypeFamily.getInstance()) {
          Schema s = createOrderedTupleSchema(keyPType, columnOrder);
          keyPType = (PType<Object>) (PType<?>) Avros.generics(s);
View Full Code Here

   */
  public static <T> PCollection<T> reservorSample(
      PCollection<T> input,
      int sampleSize,
      Long seed) {
    PTypeFamily ptf = input.getTypeFamily();
    PType<Pair<T, Integer>> ptype = ptf.pairs(input.getPType(), ptf.ints());
    return weightedReservoirSample(
        input.parallelDo("Map to pairs for reservoir sampling", new MapFn<T, Pair<T, Integer>>() {
          @Override
          public Pair<T, Integer> map(T t) { return Pair.of(t, 1); }
        }, ptype),
View Full Code Here

   */
  public static <T, N extends Number> PCollection<T> weightedReservoirSample(
      PCollection<Pair<T, N>> input,
      int sampleSize,
      Long seed) {
    PTypeFamily ptf = input.getTypeFamily();
    PTable<Integer, Pair<T, N>> groupedIn = input.parallelDo(
        new MapFn<Pair<T, N>, Pair<Integer, Pair<T, N>>>() {
          @Override
          public Pair<Integer, Pair<T, N>> map(Pair<T, N> p) {
            return Pair.of(0, p);
          }
        }, ptf.tableOf(ptf.ints(), input.getPType()));
    int[] ss = { sampleSize };
    return groupedWeightedReservoirSample(groupedIn, ss, seed)
        .parallelDo("Extract sampled value from pair", new MapFn<Pair<Integer, T>, T>() {
          @Override
          public T map(Pair<Integer, T> p) {
View Full Code Here

   */
  public static <T, N extends Number> PCollection<Pair<Integer, T>> groupedWeightedReservoirSample(
      PTable<Integer, Pair<T, N>> input,
      int[] sampleSizes,
      Long seed) {
    PTypeFamily ptf = input.getTypeFamily();
    PType<T> ttype = (PType<T>) input.getPTableType().getValueType().getSubTypes().get(0);
    PTableType<Integer, Pair<Double, T>> ptt = ptf.tableOf(ptf.ints(),
        ptf.pairs(ptf.doubles(), ttype));
   
    return input.parallelDo("Initial reservoir sampling", new ReservoirSampleFn<T, N>(sampleSizes, seed, ttype), ptt)
        .groupByKey(1)
        .combineValues(new WRSCombineFn<T>(sampleSizes, ttype))
        .parallelDo("Extract sampled values", new MapFn<Pair<Integer, Pair<Double, T>>, Pair<Integer, T>>() {
          @Override
          public Pair<Integer, T> map(Pair<Integer, Pair<Double, T>> p) {
            return Pair.of(p.first(), p.second().second());
          }
        }, ptf.pairs(ptf.ints(), ttype));
  }
View Full Code Here

        .parallelDo("SecondarySort.apply", new SSWrapFn<K, V1, V2, Pair<U, V>>(doFn), ptype);
  }
 
  private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare(
      PTable<K, Pair<V1, V2>> input, int numReducers) {
    PTypeFamily ptf = input.getTypeFamily();
    PType<Pair<V1, V2>> valueType = input.getValueType();
    PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf(
        ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)),
        valueType);
    PTableType<K, Collection<Pair<V1, V2>>> out = ptf.tableOf(input.getKeyType(),
        ptf.collections(input.getValueType()));
    GroupingOptions.Builder gob = GroupingOptions.builder()
        .requireSortedKeys()
        .groupingComparatorClass(JoinUtils.getGroupingComparator(ptf))
        .partitionerClass(JoinUtils.getPartitionerClass(ptf));
    if (numReducers > 0) {
View Full Code Here

   * @param right right table to be joined
   * @param joinFn The user-specified implementation of the {@code JoinFn} class
   * @return joined tables
   */
  public PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right, JoinFn<K, U, V> joinFn) {
    PTypeFamily ptf = left.getTypeFamily();
    PGroupedTable<Pair<K, Integer>, Pair<U, V>> grouped = preJoin(left, right, numReducers);
    PTableType<K, Pair<U, V>> ret = ptf
        .tableOf(left.getKeyType(), ptf.pairs(left.getValueType(), right.getValueType()));

    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
  }
View Full Code Here

    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
  }

  static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right,
                                                                       int numReducers) {
    PTypeFamily ptf = left.getTypeFamily();
    PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()),
        ptf.pairs(left.getValueType(), right.getValueType()));

    PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft",
        new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
          @Override
          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) {
View Full Code Here

TOP

Related Classes of org.apache.crunch.types.PTypeFamily

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.