Package org.apache.crunch

Examples of org.apache.crunch.GroupingOptions$Builder


   *
   * @return a {@code PTable} representing the sorted collection.
   */
  public static <K, V> PTable<K, V> sort(PTable<K, V> table, int numReducers, Order key) {
    Configuration conf = table.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(table, conf, numReducers, key);
    return table.groupByKey(options).ungroup();
  }
View Full Code Here


      ColumnOrder... columnOrders) {
    PType<T> pType = collection.getPType();
    SortFns.KeyExtraction<T> ke = new SortFns.KeyExtraction<T>(pType, columnOrders);
    PTable<Object, T> pt = collection.by(ke.getByFn(), ke.getKeyType());
    Configuration conf = collection.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, columnOrders);
    return pt.groupByKey(options).ungroup().values();
  }
View Full Code Here

      }
    }, tableOf(writables(KeyValue.class), nulls()));
    List <KeyValue> splitPoints = getSplitPoints(table);
    Path partitionFile = new Path(((MRPipeline) kvs.getPipeline()).createTempPath(), "partition");
    writePartitionInfo(conf, partitionFile, splitPoints);
    GroupingOptions options = GroupingOptions.builder()
        .partitionerClass(TotalOrderPartitioner.class)
        .conf(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString())
        .numReducers(splitPoints.size() + 1)
        .sortComparatorClass(KeyValueComparator.class)
        .build();
View Full Code Here

   */
  public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
    PTypeFamily tf = collection.getTypeFamily();
    PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
    Configuration conf = collection.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(conf, tf, collection.getPType(), order);
    PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
      @Override
      public void process(T input, Emitter<Pair<T, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
      }
View Full Code Here

   * @return a {@link PTable} representing the sorted collection.
   */
  public static <K, V> PTable<K, V> sort(PTable<K, V> table, Order key) {
    PTypeFamily tf = table.getTypeFamily();
    Configuration conf = table.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(conf, tf, table.getKeyType(), key);
    return table.groupByKey(options).ungroup();
  }
View Full Code Here

      public void process(Pair<U, V> input, Emitter<Pair<Pair<U, V>, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
      }
    }, type);
    Configuration conf = collection.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
    PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();
    return sortedPt.parallelDo(new DoFn<Pair<Pair<U, V>, Void>, Pair<U, V>>() {
      @Override
      public void process(Pair<Pair<U, V>, Void> input, Emitter<Pair<U, V>> emitter) {
        emitter.emit(input.first());
View Full Code Here

          public void process(Tuple3<V1, V2, V3> input, Emitter<Pair<Tuple3<V1, V2, V3>, Void>> emitter) {
            emitter.emit(Pair.of(input, (Void) null));
          }
        }, type);
    Configuration conf = collection.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
    PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup();
    return sortedPt.parallelDo(new DoFn<Pair<Tuple3<V1, V2, V3>, Void>, Tuple3<V1, V2, V3>>() {
      @Override
      public void process(Pair<Tuple3<V1, V2, V3>, Void> input, Emitter<Tuple3<V1, V2, V3>> emitter) {
        emitter.emit(input.first());
View Full Code Here

          public void process(Tuple4<V1, V2, V3, V4> input, Emitter<Pair<Tuple4<V1, V2, V3, V4>, Void>> emitter) {
            emitter.emit(Pair.of(input, (Void) null));
          }
        }, type);
    Configuration conf = collection.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
    PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup();
    return sortedPt.parallelDo(new DoFn<Pair<Tuple4<V1, V2, V3, V4>, Void>, Tuple4<V1, V2, V3, V4>>() {
      @Override
      public void process(Pair<Tuple4<V1, V2, V3, V4>, Void> input, Emitter<Tuple4<V1, V2, V3, V4>> emitter) {
        emitter.emit(input.first());
View Full Code Here

      public void process(TupleN input, Emitter<Pair<TupleN, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
      }
    }, type);
    Configuration conf = collection.getPipeline().getConfiguration();
    GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
    PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup();
    return sortedPt.parallelDo(new DoFn<Pair<TupleN, Void>, TupleN>() {
      @Override
      public void process(Pair<TupleN, Void> input, Emitter<TupleN> emitter) {
        emitter.emit(input.first());
View Full Code Here

TOP

Related Classes of org.apache.crunch.GroupingOptions$Builder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.