Examples of org.apache.hadoop.hive.ql.plan.Statistics

org.apache.hadoop.hive.ql.plan.Statistics
Statistics. Describes the output of an operator in terms of size, rows, etc based on estimates.

    long maxSize = context.conf.getLongVar(
        HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);


    int bigTablePosition = -1;


    Statistics bigInputStat = null;
    long totalSize = 0;
    int pos = 0;


    // bigTableFound means we've encountered a table that's bigger than the
    // max. This table is either the the big table or we cannot convert.
    boolean bigTableFound = false;


    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {


      Statistics currInputStat = parentOp.getStatistics();
      if (currInputStat == null) {
        LOG.warn("Couldn't get statistics from: "+parentOp);
        return -1;
      }


      long inputSize = currInputStat.getDataSize();
      if ((bigInputStat == null) ||
          ((bigInputStat != null) &&
           (inputSize > bigInputStat.getDataSize()))) {


        if (bigTableFound) {
          // cannot convert to map join; we've already chosen a big table
          // on size and there's another one that's bigger.
          return -1;
        }


        if (inputSize/buckets > maxSize) {
          if (!bigTableCandidateSet.contains(pos)) {
            // can't use the current table as the big table, but it's too
            // big for the map side.
            return -1;
          }


          bigTableFound = true;
        }


        if (bigInputStat != null) {
          // we're replacing the current big table with a new one. Need
          // to count the current one as a map table then.
          totalSize += bigInputStat.getDataSize();
        }


        if (totalSize/buckets > maxSize) {
          // sum of small tables size in this join exceeds configured limit
          // hence cannot convert.
          return -1;
        }


        if (bigTableCandidateSet.contains(pos)) {
          bigTablePosition = pos;
          bigInputStat = currInputStat;
        }
      } else {
        totalSize += currInputStat.getDataSize();
        if (totalSize/buckets > maxSize) {
          // cannot hold all map tables in memory. Cannot convert.
          return -1;
        }
      }

View Full Code Here

   * @throws HiveException
   */
  public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
      Table table, TableScanOperator tableScanOperator) {


    Statistics stats = new Statistics();


    // column level statistics are required only for the columns that are needed
    List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature();
    List<String> neededColumns = tableScanOperator.getNeededColumns();
    boolean fetchColStats =
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS);
    boolean fetchPartStats =
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS);
    float deserFactor =
        HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR);


    if (!table.isPartitioned()) {
      long nr = getNumRows(table);
      long ds = getRawDataSize(table);
      if (ds <= 0) {
        ds = getTotalSize(table);


        // if data size is still 0 then get file size
        if (ds <= 0) {
          ds = getFileSizeForTable(conf, table);
        }


        ds = (long) (ds * deserFactor);
      }


      // number of rows -1 means that statistics from metastore is not reliable
      // and 0 means statistics gathering is disabled
      if (nr <= 0) {
        int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns);
        if (avgRowSize > 0) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Estimated average row size: " + avgRowSize);
          }
          nr = ds / avgRowSize;
        }
      }
      stats.setNumRows(nr);
      stats.setDataSize(ds);


      List<ColStatistics> colStats = Lists.newArrayList();
      if (fetchColStats) {
        colStats = getTableColumnStats(table, schema, neededColumns);
      }


      stats.setColumnStatsState(deriveStatType(colStats, neededColumns));
      stats.addToColumnStats(colStats);
    } else if (partList != null) {
      // For partitioned tables, get the size of all the partitions after pruning
      // the partitions that are not required
      long nr = 0;
      long ds = 0;


      List<Long> rowCounts = Lists.newArrayList();
      List<Long> dataSizes = Lists.newArrayList();


      if (fetchPartStats) {
        rowCounts = getBasicStatForPartitions(
            table, partList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT);
        dataSizes =  getBasicStatForPartitions(
            table, partList.getNotDeniedPartns(), StatsSetupConst.RAW_DATA_SIZE);


        nr = getSumIgnoreNegatives(rowCounts);
        ds = getSumIgnoreNegatives(dataSizes);
        if (ds <= 0) {
          dataSizes = getBasicStatForPartitions(
              table, partList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE);
          ds = getSumIgnoreNegatives(dataSizes);
        }
      }


      // if data size still could not be determined, then fall back to filesytem to get file
      // sizes
      if (ds <= 0) {
        dataSizes = getFileSizeForPartitions(conf, partList.getNotDeniedPartns());
      }
      ds = getSumIgnoreNegatives(dataSizes);
      ds = (long) (ds * deserFactor);


      int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns);
      if (avgRowSize > 0) {
        setUnknownRcDsToAverage(rowCounts, dataSizes, avgRowSize);
        nr = getSumIgnoreNegatives(rowCounts);
        ds = getSumIgnoreNegatives(dataSizes);


        // number of rows -1 means that statistics from metastore is not reliable
        if (nr <= 0) {
          nr = ds / avgRowSize;
        }
      }
      stats.addToNumRows(nr);
      stats.addToDataSize(ds);


      // if at least a partition does not contain row count then mark basic stats state as PARTIAL
      if (containsNonPositives(rowCounts)) {
        stats.setBasicStatsState(State.PARTIAL);
      }
      boolean haveFullStats = fetchColStats;
      if (fetchColStats) {
        List<String> partNames = new ArrayList<String>(partList.getNotDeniedPartns().size());
        for (Partition part : partList.getNotDeniedPartns()) {
          partNames.add(part.getName());
        }
        Map<String, List<ColStatistics>> partStats =
            getPartColumnStats(table, schema, partNames, neededColumns);
        if (partStats != null) {
          for (String partName : partNames) {
            List<ColStatistics> partStat = partStats.get(partName);
            haveFullStats &= (partStat != null);
            if (partStat != null) {
              stats.updateColumnStatsState(deriveStatType(partStat, neededColumns));
              stats.addToColumnStats(partStat);
            }
          }
        }
      }
      // There are some partitions with no state (or we didn't fetch any state).
      // Update the stats with empty list to reflect that in the state/initialize structures.
      if (!haveFullStats) {
        List<ColStatistics> emptyStats = Lists.<ColStatistics>newArrayList();
        stats.addToColumnStats(emptyStats);
        stats.updateColumnStatsState(deriveStatType(emptyStats, neededColumns));
      }
    }
    return stats;
  }

View Full Code Here

    }


    private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
        AnnotateStatsProcCtx aspCtx, List<String> neededCols) throws CloneNotSupportedException {
      long newNumRows = 0;
      Statistics andStats = null;
      if (pred instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred;
        GenericUDF udf = genFunc.getGenericUDF();


        // for AND condition cascadingly update stats

View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GroupByOperator gop = (GroupByOperator) nd;
      Operator<? extends OperatorDesc> parent = gop.getParentOperators().get(0);
      Statistics parentStats = parent.getStatistics();
      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
      HiveConf conf = aspCtx.getConf();
      int mapSideParallelism =
          HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_SIDE_PARALLELISM);
      List<AggregationDesc> aggDesc = gop.getConf().getAggregators();
      Map<String, ExprNodeDesc> colExprMap = gop.getColumnExprMap();
      RowSchema rs = gop.getSchema();
      Statistics stats = null;


      try {
        if (satisfyPrecondition(parentStats)) {
          stats = parentStats.clone();


          List<ColStatistics> colStats =
              StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
          stats.setColumnStats(colStats);
          long dvProd = 1;
          long newNumRows = 0;


          // compute product of distinct values of grouping columns
          for (ColStatistics cs : colStats) {
            if (cs != null) {
              long dv = cs.getCountDistint();
              if (cs.getNumNulls() > 0) {
                dv += 1;
              }
              dvProd *= dv;
            } else {


              // partial column statistics on grouping attributes case.
              // if column statistics on grouping attribute is missing, then
              // assume worst case.
              // GBY rule will emit half the number of rows if dvProd is 0
              dvProd = 0;
              break;
            }
          }


          // map side
          if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) {


            // since we do not know if hash-aggregation will be enabled or disabled
            // at runtime we will assume that map-side group by does not do any
            // reduction.hence no group by rule will be applied


            // map-side grouping set present. if grouping set is present then
            // multiply the number of rows by number of elements in grouping set
            if (gop.getConf().isGroupingSetsPresent()) {
              int multiplier = gop.getConf().getListGroupingSets().size();


              // take into account the map-side parallelism as well, default is 1
              multiplier *= mapSideParallelism;
              newNumRows = multiplier * stats.getNumRows();
              long dataSize = multiplier * stats.getDataSize();
              stats.setNumRows(newNumRows);
              stats.setDataSize(dataSize);
              for (ColStatistics cs : colStats) {
                if (cs != null) {
                  long oldNumNulls = cs.getNumNulls();
                  long newNumNulls = multiplier * oldNumNulls;
                  cs.setNumNulls(newNumNulls);
                }
              }
            } else {


              // map side no grouping set
              newNumRows = stats.getNumRows() * mapSideParallelism;
              updateStats(stats, newNumRows, true);
            }
          } else {


            // reduce side
            newNumRows = applyGBYRule(stats.getNumRows(), dvProd);
            updateStats(stats, newNumRows, true);
          }
        } else {
          if (parentStats != null) {


            // worst case, in the absence of column statistics assume half the rows are emitted
            if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) {


              // map side
              stats = parentStats.clone();
            } else {


              // reduce side
              stats = parentStats.clone();
              long newNumRows = parentStats.getNumRows() / 2;
              updateStats(stats, newNumRows, false);
            }
          }
        }


        // if UDAFs are present, new columns needs to be added
        if (!aggDesc.isEmpty() && stats != null) {
          List<ColStatistics> aggColStats = Lists.newArrayList();
          for (ColumnInfo ci : rs.getSignature()) {


            // if the columns in row schema is not contained in column
            // expression map, then those are the aggregate columns that
            // are added GBY operator. we will estimate the column statistics
            // for those newly added columns
            if (!colExprMap.containsKey(ci.getInternalName())) {
              String colName = ci.getInternalName();
              colName = StatsUtils.stripPrefixFromColumnName(colName);
              String tabAlias = ci.getTabAlias();
              String colType = ci.getTypeName();
              ColStatistics cs = new ColStatistics(tabAlias, colName, colType);
              cs.setCountDistint(stats.getNumRows());
              cs.setNumNulls(0);
              cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType));
              aggColStats.add(cs);
            }
          }
          stats.addToColumnStats(aggColStats);


          // if UDAF present and if column expression map is empty then it must
          // be full aggregation query like count(*) in which case number of
          // rows will be 1
          if (colExprMap.isEmpty()) {
            stats.setNumRows(1);
            updateStats(stats, 1, true);
          }
        }


        gop.setStatistics(stats);


        if (LOG.isDebugEnabled() && stats != null) {
          LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString());
        }
      } catch (CloneNotSupportedException e) {
        throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
      }
      return null;

View Full Code Here


        if (allSatisfyPreCondition) {


          // statistics object that is combination of statistics from all
          // relations involved in JOIN
          Statistics stats = new Statistics();
          long prodRows = 1;
          List<Long> distinctVals = Lists.newArrayList();
          boolean multiAttr = false;


          Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
          Map<Integer, List<String>> joinKeys = Maps.newHashMap();


          // get the join keys from parent ReduceSink operators
          for (int pos = 0; pos < parents.size(); pos++) {
            ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);


            Statistics parentStats = parent.getStatistics();
            prodRows *= parentStats.getNumRows();
            List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols();


            // multi-attribute join key
            if (keyExprs.size() > 1) {
              multiAttr = true;
            }


            // compute fully qualified join key column names. this name will be
            // used to quickly look-up for column statistics of join key.
            // TODO: expressions in join condition will be ignored. assign
            // internal name for expressions and estimate column statistics for expression.
            List<String> fqCols =
                StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap());
            joinKeys.put(pos, fqCols);


            Map<String, ExprNodeDesc> colExprMap = parent.getColumnExprMap();
            RowSchema rs = parent.getSchema();


            // get column statistics for all output columns
            List<ColStatistics> cs =
                StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
            for (ColStatistics c : cs) {
              if (c != null) {
                joinedColStats.put(c.getFullyQualifiedColName(), c);
              }
            }


            // since new statistics is derived from all relations involved in
            // JOIN, we need to update the state information accordingly
            stats.updateColumnStatsState(parentStats.getColumnStatsState());
          }


          // compute denominator i.e, max(V(R,Y), V(S,Y)) in case of single
          // attribute join, else max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2))
          // in case of multi-attribute join
          long denom = 1;
          if (multiAttr) {
            List<Long> perAttrDVs = Lists.newArrayList();
            int numAttr = joinKeys.get(0).size();
            for (int idx = 0; idx < numAttr; idx++) {
              for (Integer i : joinKeys.keySet()) {
                String col = joinKeys.get(i).get(idx);
                ColStatistics cs = joinedColStats.get(col);
                if (cs != null) {
                  perAttrDVs.add(cs.getCountDistint());
                }
              }
              distinctVals.add(getDenominator(perAttrDVs));
              perAttrDVs.clear();
            }


            for (Long l : distinctVals) {
              denom *= l;
            }
          } else {
            for (List<String> jkeys : joinKeys.values()) {
              for (String jk : jkeys) {
                ColStatistics cs = joinedColStats.get(jk);
                if (cs != null) {
                  distinctVals.add(cs.getCountDistint());
                }
              }
            }
            denom = getDenominator(distinctVals);
          }


          // column statistics from different sources are put together and rename
          // fully qualified column names based on output schema of join operator
          Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
          RowSchema rs = jop.getSchema();
          List<ColStatistics> outColStats = Lists.newArrayList();
          for (ColumnInfo ci : rs.getSignature()) {
            String key = ci.getInternalName();
            ExprNodeDesc end = colExprMap.get(key);
            if (end instanceof ExprNodeColumnDesc) {
              String colName = ((ExprNodeColumnDesc) end).getColumn();
              colName = StatsUtils.stripPrefixFromColumnName(colName);
              String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias();
              String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName);
              ColStatistics cs = joinedColStats.get(fqColName);
              String outColName = key;
              String outTabAlias = ci.getTabAlias();
              outColName = StatsUtils.stripPrefixFromColumnName(outColName);
              if (cs != null) {
                cs.setColumnName(outColName);
                cs.setTableAlias(outTabAlias);
              }
              outColStats.add(cs);
            }
          }


          // update join statistics
          stats.setColumnStats(outColStats);
          long newRowCount = prodRows / denom;
          stats.setNumRows(newRowCount);
          stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats));
          jop.setStatistics(stats);


          if (LOG.isDebugEnabled()) {
            LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString());
          }
        } else {


          // worst case when there are no column statistics
          float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR);
          int numParents = parents.size();
          List<Long> parentRows = Lists.newArrayList();
          List<Long> parentSizes = Lists.newArrayList();
          int maxRowIdx = 0;
          long maxRowCount = 0;
          int idx = 0;


          for (Operator<? extends OperatorDesc> op : parents) {
            Statistics ps = op.getStatistics();
            long rowCount = ps.getNumRows();
            if (rowCount > maxRowCount) {
              maxRowCount = rowCount;
              maxRowIdx = idx;
            }
            parentRows.add(rowCount);
            parentSizes.add(ps.getDataSize());
            idx++;
          }


          long maxDataSize = parentSizes.get(maxRowIdx);
          long newNumRows = (long) (joinFactor * maxRowCount * (numParents - 1));
          long newDataSize = (long) (joinFactor * maxDataSize * (numParents - 1));


          Statistics wcStats = new Statistics();
          wcStats.setNumRows(newNumRows);
          wcStats.setDataSize(newDataSize);
          jop.setStatistics(wcStats);


          if (LOG.isDebugEnabled()) {
            LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
          }
        }
      }
      return null;
    }

View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      LimitOperator lop = (LimitOperator) nd;
      Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
      Statistics parentStats = parent.getStatistics();
      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
      HiveConf conf = aspCtx.getConf();


      try {
        long limit = -1;
        limit = lop.getConf().getLimit();


        if (satisfyPrecondition(parentStats)) {
          Statistics stats = parentStats.clone();


          // if limit is greater than available rows then do not update
          // statistics
          if (limit <= parentStats.getNumRows()) {
            updateStats(stats, limit, true);
          }
          lop.setStatistics(stats);


          if (LOG.isDebugEnabled()) {
            LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString());
          }
        } else {
          if (parentStats != null) {


            // in the absence of column statistics, compute data size based on
            // based on average row size
            Statistics wcStats = parentStats.clone();
            if (limit <= parentStats.getNumRows()) {
              long numRows = limit;
              long avgRowSize = parentStats.getAvgRowSize();
              long dataSize = avgRowSize * limit;
              wcStats.setNumRows(numRows);
              wcStats.setDataSize(dataSize);
            }
            lop.setStatistics(wcStats);


            if (LOG.isDebugEnabled()) {
              LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString());
            }
          }
        }
      } catch (CloneNotSupportedException e) {
        throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());

View Full Code Here

        Object... nodeOutputs) throws SemanticException {
      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
      OperatorDesc conf = op.getConf();


      if (conf != null) {
        Statistics stats = conf.getStatistics();
        if (stats == null) {
          if (op.getParentOperators() != null) {


            // if parent statistics is null then that branch of the tree is not
            // walked yet. don't update the stats until all branches are walked
            if (isAllParentsContainStatistics(op)) {
              stats = new Statistics();
              for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
                if (parent.getStatistics() != null) {
                  Statistics parentStats = parent.getStatistics();
                  stats.addToNumRows(parentStats.getNumRows());
                  stats.addToDataSize(parentStats.getDataSize());
                  stats.updateColumnStatsState(parentStats.getColumnStatsState());
                  stats.addToColumnStats(parentStats.getColumnStats());
                  op.getConf().setStatistics(stats);


                  if (LOG.isDebugEnabled()) {
                    LOG.debug("[0] STATS-" + op.toString() + ": " + stats.extendedToString());
                  }

View Full Code Here

        throw new SemanticException(e1);
      }
      Table table = aspCtx.getParseContext().getTopToTable().get(tsop);


      // gather statistics for the first time and the attach it to table scan operator
      Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop);
      try {
        tsop.setStatistics(stats.clone());


        if (LOG.isDebugEnabled()) {
          LOG.debug("[0] STATS-" + tsop.toString() + ": " + stats.extendedToString());
        }
      } catch (CloneNotSupportedException e) {
        throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
      }
      return null;

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {


      SelectOperator sop = (SelectOperator) nd;
      Operator<? extends OperatorDesc> parent = sop.getParentOperators().get(0);
      Statistics parentStats = parent.getStatistics();
      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
      HiveConf conf = aspCtx.getConf();


      // SELECT (*) does not change the statistics. Just pass on the parent statistics
      if (sop.getConf().isSelectStar()) {
        try {
          if (parentStats != null) {
            sop.setStatistics(parentStats.clone());
          }
        } catch (CloneNotSupportedException e) {
          throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
        }
        return null;
      }


      try {
        if (satisfyPrecondition(parentStats)) {
          Statistics stats = parentStats.clone();
          List<ColStatistics> colStats =
              StatsUtils.getColStatisticsFromExprMap(conf, parentStats, sop.getColumnExprMap(),
                  sop.getSchema());
          long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats);
          stats.setColumnStats(colStats);
          stats.setDataSize(dataSize);
          sop.setStatistics(stats);


          if (LOG.isDebugEnabled()) {
            LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString());
          }
        } else {
          if (parentStats != null) {
            sop.setStatistics(parentStats.clone());

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
      FilterOperator fop = (FilterOperator) nd;
      Operator<? extends OperatorDesc> parent = fop.getParentOperators().get(0);
      Statistics parentStats = parent.getStatistics();
      List<String> neededCols = null;
      if (parent instanceof TableScanOperator) {
        TableScanOperator tsop = (TableScanOperator) parent;
        neededCols = tsop.getNeededColumns();
      }


      try {
        if (parentStats != null) {
          ExprNodeDesc pred = fop.getConf().getPredicate();


          // evaluate filter expression and update statistics
          long newNumRows = evaluateExpression(parentStats, pred, aspCtx, neededCols);
          Statistics st = parentStats.clone();


          if (satisfyPrecondition(parentStats)) {


            // update statistics based on column statistics.
            // OR conditions keeps adding the stats independently, this may
            // result in number of rows getting more than the input rows in
            // which case stats need not be updated
            if (newNumRows <= parentStats.getNumRows()) {
              updateStats(st, newNumRows, true);
            }


            if (LOG.isDebugEnabled()) {
              LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString());
            }
          } else {


            // update only the basic statistics in the absence of column statistics
            if (newNumRows <= parentStats.getNumRows()) {
              updateStats(st, newNumRows, false);
            }


            if (LOG.isDebugEnabled()) {
              LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString());
            }
          }
          fop.setStatistics(st);
          aspCtx.setAndExprStats(null);
        }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.Statistics

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable

org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$DefaultStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$FilterStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$GroupByStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$JoinStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$LimitStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$ReduceSinkStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$SelectStatsRule

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.