Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.GroupByDesc


    mapColumnNames.put("Key", 0);
    mapColumnNames.put("Value", 1);
    VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
    Set<Object> keys = new HashSet<Object>();

    GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
       dataTypeInfo, "Key", TypeInfoFactory.stringTypeInfo);

    VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc);

    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo);
View Full Code Here


     */
    private boolean checkMapSideAggregation(GroupByOperator gop,
        List<ColStatistics> colStats, HiveConf conf) {

      List<AggregationDesc> aggDesc = gop.getConf().getAggregators();
      GroupByDesc desc = gop.getConf();
      GroupByDesc.Mode mode = desc.getMode();

      if (mode.equals(GroupByDesc.Mode.HASH)) {
        float hashAggMem = conf.getFloatVar(HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
        float hashAggMaxThreshold = conf.getFloatVar(HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      GroupByOperator op = (GroupByOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> colLists = new ArrayList<String>();
      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      ArrayList<AggregationDesc> aggrs = conf.getAggregators();
      for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
          colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
View Full Code Here

        int depth) throws SemanticException {
      HiveConf hiveConf = ctx.getConf();
      GroupByOptimizerSortMatch match = checkSortGroupBy(stack, groupByOp);
      boolean useMapperSort =
          HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT);
      GroupByDesc groupByOpDesc = groupByOp.getConf();

      boolean removeReduceSink = false;
      boolean optimizeDistincts = false;
      boolean setBucketGroup = false;

      // Dont remove the operator for distincts
      if (useMapperSort &&
          (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
        if (!groupByOpDesc.isDistinct()) {
          removeReduceSink = true;
        }
        else if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
          // Optimize the query: select count(distinct keys) from T, where
          // T is bucketized and sorted by T
          // Partial aggregation can be done by the mappers in this scenario

          List<ExprNodeDesc> keys =
              ((GroupByOperator)
              (groupByOp.getChildOperators().get(0).getChildOperators().get(0)))
                  .getConf().getKeys();
          if ((keys == null) || (keys.isEmpty())) {
            optimizeDistincts = true;
          }
        }
      }

      if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) ||
          (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
        setBucketGroup = true;
      }

      if (removeReduceSink) {
        convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
      }
      else if (optimizeDistincts && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        // In test mode, dont change the query plan. However, setup a query property
        pGraphContext.getQueryProperties().setHasMapGroupBy(true);
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
          return;
        }
        ReduceSinkOperator reduceSinkOp =
            (ReduceSinkOperator)groupByOp.getChildOperators().get(0);
        GroupByDesc childGroupByDesc =
            ((GroupByOperator)
            (reduceSinkOp.getChildOperators().get(0))).getConf();

        for (int pos = 0; pos < childGroupByDesc.getAggregators().size(); pos++) {
          AggregationDesc aggr = childGroupByDesc.getAggregators().get(pos);
          // Partial aggregation is not done for distincts on the mapper
          // However, if the data is bucketed/sorted on the distinct key, partial aggregation
          // can be performed on the mapper.
          if (aggr.getDistinct()) {
            ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
            ExprNodeDesc param = aggr.getParameters().get(0);
            assert param instanceof ExprNodeColumnDesc;
            ExprNodeColumnDesc paramC = (ExprNodeColumnDesc) param;
            paramC.setIsPartitionColOrVirtualCol(false);
            paramC.setColumn("VALUE._col" + pos);
            parameters.add(paramC);
            aggr.setParameters(parameters);
            aggr.setDistinct(false);
            aggr.setMode(Mode.FINAL);
          }
        }
        // Partial aggregation is performed on the mapper, no distinct processing at the reducer
        childGroupByDesc.setDistinct(false);
        groupByOpDesc.setDontResetAggrsDistinct(true);
        groupByOpDesc.setBucketGroup(true);
        groupByOp.setUseBucketizedHiveInputFormat(true);
        // no distinct processing at the reducer
        // A query like 'select count(distinct key) from T' is transformed into
View Full Code Here

    ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
    ExprNodeDesc groupByExpr =
        new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), null, false);
    groupByExprs.add(groupByExpr);

    GroupByDesc groupBy =
        new GroupByDesc(GroupByDesc.Mode.HASH, outputNames, groupByExprs,
            new ArrayList<AggregationDesc>(), false, groupByMemoryUsage, memoryThreshold,
            null, false, 0, true);

    GroupByOperator groupByOp =
        (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, selectOp);
View Full Code Here

      if (colToConstants.isEmpty()) {
        return null;
      }

      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (int i = 0; i < keys.size(); i++) {
        ExprNodeDesc key = keys.get(i);
        ExprNodeDesc newkey = foldExpr(key, colToConstants, cppCtx, op, 0, false);
        keys.set(i, newkey);
      }
View Full Code Here

       //Also, we do not rewrite for cases when same query branch has multiple group-by constructs
       if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) &&
           !canApplyCtx.isQueryHasGroupBy()){

         canApplyCtx.setQueryHasGroupBy(true);
         GroupByDesc conf = operator.getConf();
         List<AggregationDesc> aggrList = conf.getAggregators();
         if(aggrList != null && aggrList.size() > 0){
             for (AggregationDesc aggregationDesc : aggrList) {
               canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1);
               //In the current implementation, we do not support more than 1 agg funcs in group-by
               if(canApplyCtx.getAggFuncCnt() > 1) {
                 return false;
               }
               String aggFunc = aggregationDesc.getGenericUDAFName();
               if(!("count".equals(aggFunc))){
                 canApplyCtx.setAggFuncIsNotCount(true);
                 return false;
               }
               List<ExprNodeDesc> para = aggregationDesc.getParameters();
               //for a valid aggregation, it needs to have non-null parameter list
               if (para == null) {
                 canApplyCtx.setAggFuncColsFetchException(true);
               } else if (para.size() == 0) {
                 //count(*) case
                 canApplyCtx.setCountOnAllCols(true);
                 canApplyCtx.setAggFunction("_count_of_all");
               } else if (para.size() == 1) {
                 ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp);
                 if (expr instanceof ExprNodeColumnDesc){
                   //Add the columns to RewriteCanApplyCtx's selectColumnsList list
                   //to check later if index keys contain all select clause columns
                   //and vice-a-versa. We get the select column 'actual' names only here
                   //if we have a agg func along with group-by
                   //SelectOperator has internal names in its colList data structure
                   canApplyCtx.getSelectColumnsList().add(
                       ((ExprNodeColumnDesc) expr).getColumn());
                   //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
                   //if columns contained in agg func are index key columns
                   canApplyCtx.getAggFuncColList().add(
                       ((ExprNodeColumnDesc) expr).getColumn());
                   canApplyCtx.setAggFunction("_count_of_" +
                       ((ExprNodeColumnDesc) expr).getColumn() + "");
                 } else if(expr instanceof ExprNodeConstantDesc) {
                   //count(1) case
                   canApplyCtx.setCountOfOne(true);
                   canApplyCtx.setAggFunction("_count_of_1");
                 }
               } else {
                 throw new SemanticException("Invalid number of arguments for count");
               }
             }
         }

         //we need to have non-null group-by keys for a valid group-by operator
         List<ExprNodeDesc> keyList = conf.getKeys();
         if(keyList == null || keyList.size() == 0){
           canApplyCtx.setGbyKeysFetchException(true);
         }
         for (ExprNodeDesc expr : keyList) {
           checkExpression(canApplyCtx, expr);
View Full Code Here

            selReplacementCommand);

        //we get our new GroupByOperator here
        Map<GroupByOperator, Set<String>> newGbyOpMap = newDAGContext.getGroupOpToInputTables();
        GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next();
        GroupByDesc oldConf = operator.getConf();

        //we need this information to set the correct colList, outputColumnNames in SelectOperator
        ExprNodeColumnDesc aggrExprNode = null;

        //Construct the new AggregationDesc to get rid of the current
        //internal names and replace them with new internal names
        //as required by the operator tree
        GroupByDesc newConf = newGbyOperator.getConf();
        List<AggregationDesc> newAggrList = newConf.getAggregators();
        if(newAggrList != null && newAggrList.size() > 0){
          for (AggregationDesc aggregationDesc : newAggrList) {
            rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
            aggrExprNode = (ExprNodeColumnDesc)aggregationDesc.getParameters().get(0);
            rewriteQueryCtx.setAggrExprNode(aggrExprNode);
          }
        }

        //Now the GroupByOperator has the new AggregationList; sum(`_count_of_indexed_key`)
        //instead of count(indexed_key)
        OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator);
        RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver();
        gbyOPC.setRowResolver(gbyRR);
        rewriteQueryCtx.getOpc().put(operator, gbyOPC);

        oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
        operator.setConf(oldConf);


      }else{
        //we just need to reset the GenericUDAFEvaluator and its name for this
        //GroupByOperator whose parent is the ReduceSinkOperator
        GroupByDesc childConf = (GroupByDesc) operator.getConf();
        List<AggregationDesc> childAggrList = childConf.getAggregators();
        if(childAggrList != null && childAggrList.size() > 0){
          for (AggregationDesc aggregationDesc : childAggrList) {
            List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
            List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
            for (ExprNodeDesc expr : paraList) {
View Full Code Here

    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf
        .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);

    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
            false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0),
        new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        input), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here

    // this is the final group by operator, and multiple rows corresponding to the
    // grouping sets have been generated upstream.
    // However, if an addition MR job has been created to handle grouping sets,
    // additional rows corresponding to grouping sets need to be created here.
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
            distPartAgg, groupByMemoryUsage, memoryThreshold,
            groupingSets,
            groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
            groupingSetsPosition, containsDistinctAggr),
        new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.GroupByDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.