Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator


        return null;
      }

      // Walk the operator tree to the TableScan and build the mapping
      // along the way for the columns that the group by uses as keys
      TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(
          op.getParentOperators().get(0), keyColNames);

      if (tso == null) {
        // Could not find an allowed path to a table scan operator,
        // hence we are done
View Full Code Here


            return null;
          }

          // Walk the operator tree to the TableScan and build the mapping
          // along the way for the columns that the group by uses as keys
          TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(
              reduceSinkOp.getParentOperators().get(0), keyColNames);

          if (tso == null) {
            // Could not find an allowed path to a table scan operator,
            // hence we are done
View Full Code Here

      // 3. Connect to metastore and get the stats
      // 4. Compose rows and add it in FetchWork
      // 5. Delete GBY - RS - GBY - SEL from the pipeline.

      try {
        TableScanOperator tsOp = (TableScanOperator) stack.get(0);
        if(tsOp.getParentOperators() != null && tsOp.getParentOperators().size() > 0) {
          // looks like a subq plan.
          return null;
        }
        SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0);
        for(ExprNodeDesc desc : selOp.getConf().getColList()) {
          if (!(desc instanceof ExprNodeColumnDesc)) {
            // Probably an expression, cant handle that
            return null;
          }
        }
        // Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS
        // we need not to do any instanceof checks for following.
        GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0);
        ReduceSinkOperator rsOp = (ReduceSinkOperator)gbyOp.getChildren().get(0);
        if (rsOp.getConf().getDistinctColumnIndices().size() > 0) {
          // we can't handle distinct
          return null;
        }

        selOp = (SelectOperator)rsOp.getChildOperators().get(0).getChildOperators().get(0);
        List<AggregationDesc> aggrs = gbyOp.getConf().getAggregators();

        if (!(selOp.getConf().getColList().size() == aggrs.size())) {
          // all select columns must be aggregations
          return null;

        }
        FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0));
        if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) {
          // looks like a subq plan.
          return null;
        }

        Table tbl = pctx.getTopToTable().get(tsOp);
        List<Object> oneRow = new ArrayList<Object>();
        List<ObjectInspector> ois = new ArrayList<ObjectInspector>();

        Hive hive = Hive.get(pctx.getConf());

        for (AggregationDesc aggr : aggrs) {
          if (aggr.getDistinct()) {
            // our stats for NDV is approx, not accurate.
            return null;
          }
          if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation(
              Description.class).name())) {
            if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){
              return null;
            }
            Long rowCnt = getRowCnt(pctx, tsOp, tbl);
            if(rowCnt == null) {
              return null;
            }
            oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0))
                .getValue().toString()).multiply(HiveDecimal.create(rowCnt)));
            ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveCategory.DECIMAL));
          }
          else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation(
              Description.class).name())) {
            Long rowCnt = 0L;
            if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
                ExprNodeConstantDesc)) {
              // Its either count (*) or count(1) case
              rowCnt = getRowCnt(pctx, tsOp, tbl);
              if(rowCnt == null) {
                return null;
              }
            } else {
              // Its count(col) case
              if (!(aggr.getParameters().get(0) instanceof ExprNodeColumnDesc)) {
                // this is weird, we got expr or something in there, bail out
                Log.debug("Unexpected expression : " + aggr.getParameters().get(0));
                return null;
              }
              ExprNodeColumnDesc desc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
              String colName = desc.getColumn();
              StatType type = getType(desc.getTypeString());
              if(!tbl.isPartitioned()) {
                if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                  Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                  return null;
                }
                rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
                if (rowCnt < 1) {
                  Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
                  return null;
                }
                List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
                    tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
                if (stats.isEmpty()) {
                  Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
                  return null;
                }
                Long nullCnt = getNullcountFor(type, stats.get(0).getStatsData());
                if (null == nullCnt) {
                  Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
                } else {
                  rowCnt -= nullCnt;
                }
              } else {
                Set<Partition> parts = pctx.getPrunedPartitions(
                    tsOp.getConf().getAlias(), tsOp).getPartitions();
                for (Partition part : parts) {
                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
                    return null;
                  }
                  Long partRowCnt = Long.parseLong(part.getParameters()
                      .get(StatsSetupConst.ROW_COUNT));
                  if (partRowCnt < 1) {
                    Log.debug("Partition doesn't have upto date stats " + part.getSpec());
                    return null;
                  }
                  rowCnt += partRowCnt;
                }
                Collection<List<ColumnStatisticsObj>> result =
                    verifyAndGetPartStats(hive, tbl, colName, parts);
                if (result == null) {
                  return null; // logging inside
                }
                for (List<ColumnStatisticsObj> statObj : result) {
                  ColumnStatisticsData statData = validateSingleColStat(statObj);
                  if (statData == null) return null;
                  Long nullCnt = getNullcountFor(type, statData);
                  if (nullCnt == null) {
                    Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
                        "metadata optimizer for column : " + colName);
                    return null;
                  } else {
                    rowCnt -= nullCnt;
                  }
                }
              }
            }
            oneRow.add(rowCnt);
            ois.add(PrimitiveObjectInspectorFactory.
                getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
          } else if (aggr.getGenericUDAFName().equals(GenericUDAFMax.class.getAnnotation(
              Description.class).name())) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if(!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
              if (stats.isEmpty()) {
                Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
                return null;
              }
              ColumnStatisticsData statData = stats.get(0).getStatsData();
              switch (type) {
                case Integeral:
                  oneRow.add(statData.getLongStats().getHighValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  oneRow.add(statData.getDoubleStats().getHighValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default:
                  // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(
                  tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch (type) {
                case Integeral: {
                  long maxVal = Long.MIN_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    long curVal = statData.getLongStats().getHighValue();
                    maxVal = Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  double maxVal = Double.MIN_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    double curVal = statData.getDoubleStats().getHighValue();
                    maxVal = Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                }
                default:
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            }
          }  else if (aggr.getGenericUDAFName().equals(GenericUDAFMin.class.getAnnotation(
              Description.class).name())) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if (!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName))
                  .get(0).getStatsData();
              switch (type) {
                case Integeral:
                  oneRow.add(statData.getLongStats().getLowValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  oneRow.add(statData.getDoubleStats().getLowValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default: // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch(type) {
                case Integeral: {
                  long minVal = Long.MAX_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
View Full Code Here

      List<String> keys = toColumns(keysMap.get((byte) index));
      if (keys == null || keys.isEmpty()) {
        return false;
      }
      int oldKeySize = keys.size();
      TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, keys);
      if (tso == null) {
        // We cannot get to root TableScan operator, likely because there is a join or group-by
        // between topOp and root TableScan operator. We don't handle that case, and simply return
        return false;
      }
View Full Code Here

          op = op.getParentOperators().get(posBigTable);
        } else {
          // nothing to be done for filters - the output schema does not change.
          if (op instanceof TableScanOperator) {
            assert !useBucketSortPositions;
            TableScanOperator ts = (TableScanOperator) op;
            Table srcTable = pGraphContext.getTopToTable().get(ts);

            // Find the positions of the bucketed columns in the table corresponding
            // to the select list.
            // Consider the following scenario:
View Full Code Here

      }

      assert (stack.size() == 3 && stack.get(1) instanceof FilterOperator) ||
          stack.size() == 2;

      TableScanOperator tsOp = (TableScanOperator) stack.get(0);
      ((SamplePrunerCtx) procCtx).getOpToSamplePruner().put(tsOp, sampleDescr);
      return null;
    }
View Full Code Here

      // symbol. So we just pop out the two elements from the top and if the
      // second one of them is not a table scan then the operator on the top of
      // the stack is the Table scan operator.
      Node tmp = stack.pop();
      Node tmp2 = stack.pop();
      TableScanOperator top = null;
      Operator<? extends OperatorDesc> pop = null;
      if (tmp2 instanceof TableScanOperator) {
        top = (TableScanOperator) tmp2;
        pop = top;
      } else {
        top = (TableScanOperator) stack.peek();
        fop2 = (FilterOperator) tmp2;
        pop = fop2;
      }
      stack.push(tmp2);
      stack.push(tmp);

      // If fop2 exists (i.e this is not the top level filter and fop2 is not
      // a sampling filter then we ignore the current filter
      if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
        return null;
      }

      // ignore the predicate in case it is not a sampling predicate
      if (fop.getConf().getIsSamplingPred()) {
        return null;
      }

      if (fop.getParentOperators().size() > 1) {
        // It's not likely if there is no bug. But in case it happens, we must
        // have found a wrong filter operator. We skip the optimization then.
        return null;
      }


      ParseContext pctx = owc.getParseContext();
      PrunedPartitionList prunedPartList;
      try {
        String alias = (String) owc.getParseContext().getTopOps().keySet().toArray()[0];
        prunedPartList = pctx.getPrunedPartitions(alias, top);
      } catch (HiveException e) {
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        throw new SemanticException(e.getMessage(), e);
      }

      // Otherwise this is not a sampling predicate. We need to process it.
      ExprNodeDesc predicate = fop.getConf().getPredicate();
      String alias = top.getConf().getAlias();

      ArrayList<Partition> partitions = new ArrayList<Partition>();
      if (prunedPartList == null) {
        return null;
      }

      for (Partition p : prunedPartList.getPartitions()) {
        if (!p.getTable().isPartitioned()) {
          return null;
        }
      }

      partitions.addAll(prunedPartList.getPartitions());

      PcrExprProcFactory.NodeInfoWrapper wrapper = PcrExprProcFactory.walkExprTree(
          alias, partitions, top.getConf().getVirtualCols(), predicate);

      if (wrapper.state == PcrExprProcFactory.WalkState.TRUE) {
        owc.getOpToRemove().add(new PcrOpWalkerCtx.OpToDeleteInfo(pop, fop));
      } else if (wrapper.state != PcrExprProcFactory.WalkState.FALSE) {
        fop.getConf().setPredicate(wrapper.outExpr);
View Full Code Here

   * store needed columns in tableScanDesc.
   */
  public static class ColumnPrunerTableScanProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator scanOp = (TableScanOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> cols = cppCtx
          .genColLists((Operator<? extends OperatorDesc>) nd);
      if (cols == null && !scanOp.getConf().isGatherStats() ) {
        scanOp.setNeededColumnIDs(null);
        return null;
      }
      cols = cols == null ? new ArrayList<String>() : cols;
     
      cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
          cols);
      List<Integer> neededColumnIds = new ArrayList<Integer>();
      List<String> neededColumnNames = new ArrayList<String>();
      RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
      TableScanDesc desc = scanOp.getConf();
      List<VirtualColumn> virtualCols = desc.getVirtualCols();
      List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();

      // add virtual columns for ANALYZE TABLE
      if(scanOp.getConf().isGatherStats()) {
        cols.add(VirtualColumn.RAWDATASIZE.getName());
      }

      for (int i = 0; i < cols.size(); i++) {
        String[] tabCol = inputRR.reverseLookup(cols.get(i));
        if(tabCol == null) {
          continue;
        }
        ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
        if (colInfo.getIsVirtualCol()) {
          // part is also a virtual column, but part col should not in this
          // list.
          for (int j = 0; j < virtualCols.size(); j++) {
            VirtualColumn vc = virtualCols.get(j);
            if (vc.getName().equals(colInfo.getInternalName())) {
              newVirtualCols.add(vc);
            }
          }
          //no need to pass virtual columns to reader.
          continue;
        }
        int position = inputRR.getPosition(cols.get(i));
        if (position >= 0) {
          // get the needed columns by id and name
          neededColumnIds.add(position);
          neededColumnNames.add(cols.get(i));
        }
      }

      desc.setVirtualCols(newVirtualCols);
      scanOp.setNeededColumnIDs(neededColumnIds);
      scanOp.setNeededColumns(neededColumnNames);
      return null;
    }
View Full Code Here

    Context baseCtx = parseCtx.getContext();
    Path taskTmpDir = baseCtx.getMRTmpPath();

    // Create the temporary file, its corresponding FileSinkOperaotr, and
    // its corresponding TableScanOperator.
    TableScanOperator tableScanOp =
        GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);

    // Add the path to alias mapping
    uCtxTask.addTaskTmpDir(taskTmpDir.toUri().toString());
    uCtxTask.addTTDesc(tt_desc);
View Full Code Here

        for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            for (int i = 0; i < neededColumnIds.size(); i++) {
              cols.add(columns.get(neededColumnIds.get(i)).getName());
            }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.TableScanOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.