Examples of GroupbyNode

org.apache.tajo.engine.planner.logical.GroupbyNode

Examples of org.apache.tajo.engine.planner.logical.GroupbyNode

    public static int calculatePartitionNum(SubQuery subQuery, DataChannel channel) {
      TajoConf conf = subQuery.context.getConf();
      MasterPlan masterPlan = subQuery.getMasterPlan();
      ExecutionBlock parent = masterPlan.getParent(subQuery.getBlock());


      GroupbyNode grpNode = null;
      if (parent != null) {
        grpNode = PlannerUtil.findTopNode(parent.getPlan(), NodeType.GROUP_BY);
      }


      // Is this subquery the first step of join?
      if (parent != null && parent.getScanNodes().length == 2) {
        List<ExecutionBlock> childs = masterPlan.getChilds(parent);


        // for inner
        ExecutionBlock outer = childs.get(0);
        long outerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, outer);


        // for inner
        ExecutionBlock inner = childs.get(1);
        long innerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, inner);
        LOG.info("Outer volume: " + Math.ceil((double)outerVolume / 1048576));
        LOG.info("Inner volume: " + Math.ceil((double)innerVolume / 1048576));


        long smaller = Math.min(outerVolume, innerVolume);


        int mb = (int) Math.ceil((double)smaller / 1048576);
        LOG.info("Smaller Table's volume is approximately " + mb + " MB");
        // determine the number of task
        int taskNum = (int) Math.ceil((double)mb /
            conf.getIntVar(ConfVars.DIST_QUERY_JOIN_PARTITION_VOLUME));
        LOG.info("The determined number of join partitions is " + taskNum);
        return taskNum;


        // Is this subquery the first step of group-by?
      } else if (grpNode != null) {


        if (grpNode.getGroupingColumns().length == 0) {
          return 1;
        } else {
          long volume = getInputVolume(subQuery.masterPlan, subQuery.context, subQuery.block);


          int mb = (int) Math.ceil((double)volume / 1048576);

View Full Code Here

Examples of org.apache.tajo.engine.planner.logical.GroupbyNode

    public static int calculateShuffleOutputNum(SubQuery subQuery, DataChannel channel) {
      TajoConf conf = subQuery.context.getConf();
      MasterPlan masterPlan = subQuery.getMasterPlan();
      ExecutionBlock parent = masterPlan.getParent(subQuery.getBlock());


      GroupbyNode grpNode = null;
      if (parent != null) {
        grpNode = PlannerUtil.findMostBottomNode(parent.getPlan(), NodeType.GROUP_BY);
      }


      // Is this subquery the first step of join?
      if (parent != null && parent.getScanNodes().length == 2) {
        List<ExecutionBlock> childs = masterPlan.getChilds(parent);


        // for outer
        ExecutionBlock outer = childs.get(0);
        long outerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, outer);


        // for inner
        ExecutionBlock inner = childs.get(1);
        long innerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, inner);
        LOG.info(subQuery.getId() + ", Outer volume: " + Math.ceil((double) outerVolume / 1048576) + "MB, "
            + "Inner volume: " + Math.ceil((double) innerVolume / 1048576) + "MB");


        long bigger = Math.max(outerVolume, innerVolume);


        int mb = (int) Math.ceil((double) bigger / 1048576);
        LOG.info(subQuery.getId() + ", Bigger Table's volume is approximately " + mb + " MB");


        int taskNum = (int) Math.ceil((double) mb /
            conf.getIntVar(ConfVars.DIST_QUERY_JOIN_PARTITION_VOLUME));


        int totalMem = getClusterTotalMemory(subQuery);
        LOG.info(subQuery.getId() + ", Total memory of cluster is " + totalMem + " MB");
        int slots = Math.max(totalMem / conf.getIntVar(ConfVars.TASK_DEFAULT_MEMORY), 1);


        // determine the number of task
        taskNum = Math.min(taskNum, slots);
        LOG.info(subQuery.getId() + ", The determined number of join partitions is " + taskNum);


        // The shuffle output numbers of join may be inconsistent by execution block order.
        // Thus, we need to compare the number with DataChannel output numbers.
        // If the number is right, the number and DataChannel output numbers will be consistent.
        int outerShuffleOutptNum = 0, innerShuffleOutputNum = 0;
        for (DataChannel eachChannel : masterPlan.getOutgoingChannels(outer.getId())) {
          outerShuffleOutptNum = Math.max(outerShuffleOutptNum, eachChannel.getShuffleOutputNum());
        }


        for (DataChannel eachChannel : masterPlan.getOutgoingChannels(inner.getId())) {
          innerShuffleOutputNum = Math.max(innerShuffleOutputNum, eachChannel.getShuffleOutputNum());
        }


        if (outerShuffleOutptNum != innerShuffleOutputNum
            && taskNum != outerShuffleOutptNum
            && taskNum != innerShuffleOutputNum) {
          taskNum = Math.max(outerShuffleOutptNum, innerShuffleOutputNum);
        }


        return taskNum;


        // Is this subquery the first step of group-by?
      } else if (grpNode != null) {


        if (grpNode.getGroupingColumns().length == 0) {
          return 1;
        } else {
          long volume = getInputVolume(subQuery.masterPlan, subQuery.context, subQuery.block);


          int mb = (int) Math.ceil((double) volume / 1048576);

View Full Code Here

Examples of org.apache.tajo.engine.planner.logical.GroupbyNode

  public void testEquals() {
    Schema schema = new Schema();
    schema.addColumn("id", Type.INT4);
    schema.addColumn("name", Type.TEXT);
    schema.addColumn("age", Type.INT2);
    GroupbyNode groupbyNode = new GroupbyNode(0);
    groupbyNode.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)});
    ScanNode scanNode = new ScanNode(0);
    scanNode.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta(StoreType.CSV), new Path("in")));


    GroupbyNode groupbyNode2 = new GroupbyNode(0);
    groupbyNode2.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)});
    JoinNode joinNode = new JoinNode(0);
    ScanNode scanNode2 = new ScanNode(0);
    scanNode2.init(CatalogUtil.newTableDesc("in2", schema, CatalogUtil.newTableMeta(StoreType.CSV), new Path("in2")));


    groupbyNode.setChild(scanNode);
    groupbyNode2.setChild(joinNode);
    joinNode.setLeftChild(scanNode);
    joinNode.setRightChild(scanNode2);


    assertTrue(groupbyNode.equals(groupbyNode2));
    assertFalse(groupbyNode.deepEquals(groupbyNode2));


    ScanNode scanNode3 = new ScanNode(0);
    scanNode3.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta(StoreType.CSV), new Path("in")));
    groupbyNode2.setChild(scanNode3);


    assertTrue(groupbyNode.equals(groupbyNode2));
    assertTrue(groupbyNode.deepEquals(groupbyNode2));
  }

View Full Code Here

Examples of org.apache.tajo.engine.planner.logical.GroupbyNode

  @Test
  public final void testFindDistinctAggFunctions() throws PlanningException {
    String query = "select sum(score) + max(age) from people";
    Expr expr = analyzer.parse(query);
    LogicalPlan plan = planner.createPlan(session, expr);
    GroupbyNode groupByNode = plan.getRootBlock().getNode(NodeType.GROUP_BY);
    EvalNode [] aggEvals = groupByNode.getAggFunctions();


    List<AggregationFunctionCallEval> list = new ArrayList<AggregationFunctionCallEval>();
    for (int i = 0; i < aggEvals.length; i++) {
      list.addAll(EvalTreeUtil.findDistinctAggFunction(aggEvals[i]));
    }

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.