Package org.apache.crunch.types

Examples of org.apache.crunch.types.PTypeFamily.pairs()


      return left.parallelDo("mapjoin", mapJoinDoFn,
          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())),
          optionsBuilder.build());
    } else { // in-memory pipeline
      return left.parallelDo(new InMemoryJoinFn<K, U, V>(iterable),
          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())));
    }
  }

  static class InMemoryJoinFn<K, U, V> extends DoFn<Pair<K, U>, Pair<K, Pair<U, V>>> {
View Full Code Here


                Pair<String, String> pair = Pair.of(word.toLowerCase(), title);
                emitter.emit(pair);
              }
            }
          }
        }, ptf.pairs(ptf.strings(), ptf.strings())));

    if (transformTF) {
      /*
       * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count
       * in title>
View Full Code Here

            @Override
            public Pair<String, Pair<String, Long>> map(Pair<Pair<String, String>, Long> input) {
              Pair<String, String> wordDocumentPair = input.first();
              return Pair.of(wordDocumentPair.first(), Pair.of(wordDocumentPair.second(), input.second()));
            }
          }, ptf.tableOf(ptf.strings(), ptf.pairs(ptf.strings(), ptf.longs())));

      pipeline.writeTextFile(wordDocumentCountPair, transformedOutput.getAbsolutePath());
    }

    SourceTarget<String> st = At.textFile(tfOutput.getAbsolutePath());
View Full Code Here

      throw new Error();

    PTable<Pair<Integer, Integer>, U> leftCross = left.parallelDo(new GFCross<U>(0, parallelism),
        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), left.getPType()));
    PTable<Pair<Integer, Integer>, V> rightCross = right.parallelDo(new GFCross<V>(1, parallelism),
        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), right.getPType()));

    PTable<Pair<Integer, Integer>, Pair<U, V>> cg = leftCross.join(rightCross);

    PTypeFamily ctf = cg.getTypeFamily();
View Full Code Here

    return cg.parallelDo(new MapFn<Pair<Pair<Integer, Integer>, Pair<U, V>>, Pair<U, V>>() {
      @Override
      public Pair<U, V> map(Pair<Pair<Integer, Integer>, Pair<U, V>> input) {
        return input.second();
      }
    }, ctf.pairs(left.getPType(), right.getPType()));
  }

}
View Full Code Here

  }

  public static <K, V> PTable<K, V> top(PTable<K, V> ptable, int limit, boolean maximize) {
    PTypeFamily ptf = ptable.getTypeFamily();
    PTableType<K, V> base = ptable.getPTableType();
    PType<Pair<K, V>> pairType = ptf.pairs(base.getKeyType(), base.getValueType());
    PTableType<Integer, Pair<K, V>> inter = ptf.tableOf(ptf.ints(), pairType);
    return ptable.parallelDo("top" + limit + "map", new TopKFn<K, V>(limit, maximize), inter)
        .groupByKey(1).combineValues(new TopKCombineFn<K, V>(limit, maximize))
        .parallelDo("top" + limit + "reduce", new DoFn<Pair<Integer, Pair<K, V>>, Pair<K, V>>() {
          public void process(Pair<Integer, Pair<K, V>> input, Emitter<Pair<K, V>> emitter) {
View Full Code Here

  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right, JoinFn<K, U, V> joinFn) {
    PTypeFamily ptf = left.getTypeFamily();
    PGroupedTable<Pair<K, Integer>, Pair<U, V>> grouped = preJoin(left, right);
    PTableType<K, Pair<U, V>> ret = ptf
        .tableOf(left.getKeyType(), ptf.pairs(left.getValueType(), right.getValueType()));

    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
  }

  private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
View Full Code Here

    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
  }

  private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
    PTypeFamily ptf = left.getTypeFamily();
    PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()),
        ptf.pairs(left.getValueType(), right.getValueType()));

    PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft",
        new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
          @Override
View Full Code Here

  }

  private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
    PTypeFamily ptf = left.getTypeFamily();
    PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()),
        ptf.pairs(left.getValueType(), right.getValueType()));

    PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft",
        new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
          @Override
          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) {
View Full Code Here

  public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(PTable<K, U> left, PTable<K, V> right) {
    PTypeFamily ptf = left.getTypeFamily();
    PType<K> keyType = left.getPTableType().getKeyType();
    PType<U> leftType = left.getPTableType().getValueType();
    PType<V> rightType = right.getPTableType().getValueType();
    PType<Pair<U, V>> itype = ptf.pairs(leftType, rightType);

    PTable<K, Pair<U, V>> cgLeft = left.parallelDo("coGroupTag1", new CogroupFn1<K, U, V>(),
        ptf.tableOf(keyType, itype));
    PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(),
        ptf.tableOf(keyType, itype));
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.