Package org.apache.hadoop.hive.metastore.api

Examples of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData


                    verifyAndGetPartStats(hive, tbl, colName, parts);
                if (result == null) {
                  return null; // logging inside
                }
                for (List<ColumnStatisticsObj> statObj : result) {
                  ColumnStatisticsData statData = validateSingleColStat(statObj);
                  if (statData == null) return null;
                  Long nullCnt = getNullcountFor(type, statData);
                  if (nullCnt == null) {
                    Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
                        "metadata optimizer for column : " + colName);
                    return null;
                  } else {
                    rowCnt -= nullCnt;
                  }
                }
              }
            }
            oneRow.add(rowCnt);
            ois.add(PrimitiveObjectInspectorFactory.
                getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
          } else if (udaf instanceof GenericUDAFMax) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if(!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
              if (stats.isEmpty()) {
                Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
                return null;
              }
              ColumnStatisticsData statData = stats.get(0).getStatsData();
              switch (type) {
                case Integeral:
                  LongColumnStatsData lstats = statData.getLongStats();
                  oneRow.add(lstats.isSetHighValue() ? lstats.getHighValue() : null);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  DoubleColumnStatsData dstats = statData.getDoubleStats();
                  oneRow.add(dstats.isSetHighValue() ? dstats.getHighValue() : null);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default:
                  // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(
                  tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch (type) {
                case Integeral: {
                  Long maxVal = null;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    LongColumnStatsData lstats = statData.getLongStats();
                    if (!lstats.isSetHighValue()) {
                      continue;
                    }
                    long curVal = lstats.getHighValue();
                    maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  Double maxVal = null;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    DoubleColumnStatsData dstats = statData.getDoubleStats();
                    if (!dstats.isSetHighValue()) {
                      continue;
                    }
                    double curVal = statData.getDoubleStats().getHighValue();
                    maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                }
                default:
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            }
          }  else if (udaf instanceof GenericUDAFMin) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if (!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName))
                  .get(0).getStatsData();
              switch (type) {
                case Integeral:
                  LongColumnStatsData lstats = statData.getLongStats();
                  oneRow.add(lstats.isSetLowValue() ? lstats.getLowValue() : null);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  DoubleColumnStatsData dstats = statData.getDoubleStats();
                  oneRow.add(dstats.isSetLowValue() ? dstats.getLowValue() : null);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default: // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch(type) {
                case Integeral: {
                  Long minVal = null;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    LongColumnStatsData lstats = statData.getLongStats();
                    if (!lstats.isSetLowValue()) {
                      continue;
                    }
                    long curVal = lstats.getLowValue();
                    minVal = minVal == null ? curVal : Math.min(minVal, curVal);
                  }
                  oneRow.add(minVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  Double minVal = null;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    DoubleColumnStatsData dstats = statData.getDoubleStats();
                    if (!dstats.isSetLowValue()) {
                      continue;
                    }
                    double curVal = statData.getDoubleStats().getLowValue();
                    minVal = minVal == null ? curVal : Math.min(minVal, curVal);
                  }
                  oneRow.add(minVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
View Full Code Here


    colBuffer.append(type);
    colBuffer.append(FIELD_DELIM);
    if (colStats != null) {
      ColumnStatisticsObj cso = getColumnStatisticsObject(name, type, colStats);
      if (cso != null) {
        ColumnStatisticsData csd = cso.getStatsData();
        if (csd.isSetBinaryStats()) {
          BinaryColumnStatsData bcsd = csd.getBinaryStats();
          appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "",
              bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", "");
        } else if (csd.isSetStringStats()) {
          StringColumnStatsData scsd = csd.getStringStats();
          appendColumnStatsNoFormatting(colBuffer, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
              scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
        } else if (csd.isSetBooleanStats()) {
          BooleanColumnStatsData bcsd = csd.getBooleanStats();
          appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "", "", "",
              bcsd.getNumTrues(), bcsd.getNumFalses());
        } else if (csd.isSetDecimalStats()) {
          DecimalColumnStatsData dcsd = csd.getDecimalStats();
          appendColumnStatsNoFormatting(colBuffer, convertToString(dcsd.getLowValue()),
              convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(),
              "", "", "", "");
        } else if (csd.isSetDoubleStats()) {
          DoubleColumnStatsData dcsd = csd.getDoubleStats();
          appendColumnStatsNoFormatting(colBuffer, dcsd.getLowValue(), dcsd.getHighValue(),
              dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
        } else if (csd.isSetLongStats()) {
          LongColumnStatsData lcsd = csd.getLongStats();
          appendColumnStatsNoFormatting(colBuffer, lcsd.getLowValue(), lcsd.getHighValue(),
              lcsd.getNumNulls(), lcsd.getNumDVs(), "", "", "", "");
        }
      } else {
        appendColumnStatsNoFormatting(colBuffer, "", "", "", "", "", "", "", "");
View Full Code Here

    tableInfo.append(String.format("%-" + ALIGNMENT + "s", colType)).append(FIELD_DELIM);

    if (colStats != null) {
      ColumnStatisticsObj cso = getColumnStatisticsObject(colName, colType, colStats);
      if (cso != null) {
        ColumnStatisticsData csd = cso.getStatsData();
        if (csd.isSetBinaryStats()) {
          BinaryColumnStatsData bcsd = csd.getBinaryStats();
          appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(),
              bcsd.getMaxColLen(), "", "");
        } else if (csd.isSetStringStats()) {
          StringColumnStatsData scsd = csd.getStringStats();
          appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
              scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
        } else if (csd.isSetBooleanStats()) {
          BooleanColumnStatsData bcsd = csd.getBooleanStats();
          appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "",
              bcsd.getNumTrues(), bcsd.getNumFalses());
        } else if (csd.isSetDecimalStats()) {
          DecimalColumnStatsData dcsd = csd.getDecimalStats();
          appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()),
              convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(),
              "", "", "", "");
        } else if (csd.isSetDoubleStats()) {
          DoubleColumnStatsData dcsd = csd.getDoubleStats();
          appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(),
              dcsd.getNumDVs(), "", "", "", "");
        } else if (csd.isSetLongStats()) {
          LongColumnStatsData lcsd = csd.getLongStats();
          appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(),
              lcsd.getNumDVs(), "", "", "", "");
        }
      } else {
        appendColumnStats(tableInfo, "", "", "", "", "", "", "", "");
View Full Code Here

      return colStats;
    }
  }

  private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaException {
    ColumnStatisticsData data = new ColumnStatisticsData();
    ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
    Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
        declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
        avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
    StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
View Full Code Here

    statsObj.setColName(colName.get(0));

    statsObj.setColType(colType.get(0));

    ColumnStatisticsData statsData = new ColumnStatisticsData();

    String columnType = colType.get(0);

    if (columnType.equalsIgnoreCase("long")) {
      LongColumnStatsData longStats = new LongColumnStatsData();
      longStats.setNumNullsIsSet(false);
      longStats.setNumDVsIsSet(false);
      longStats.setLowValueIsSet(false);
      longStats.setHighValueIsSet(false);
      Map<String, String> mapProp = work.getMapProp();
      for (Entry<String, String> entry : mapProp.entrySet()) {
        String fName = entry.getKey();
        String value = entry.getValue();
        if (fName.equals("numNulls")) {
          longStats.setNumNulls(Long.parseLong(value));
        } else if (fName.equals("numDVs")) {
          longStats.setNumDVs(Long.parseLong(value));
        } else if (fName.equals("lowValue")) {
          longStats.setLowValue(Long.parseLong(value));
        } else if (fName.equals("highValue")) {
          longStats.setHighValue(Long.parseLong(value));
        } else {
          throw new SemanticException("Unknown stat");
        }

      }
      statsData.setLongStats(longStats);
      statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("double")) {
      DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
      doubleStats.setNumNullsIsSet(false);
      doubleStats.setNumDVsIsSet(false);
      doubleStats.setLowValueIsSet(false);
      doubleStats.setHighValueIsSet(false);
      Map<String, String> mapProp = work.getMapProp();
      for (Entry<String, String> entry : mapProp.entrySet()) {
        String fName = entry.getKey();
        String value = entry.getValue();
        if (fName.equals("numNulls")) {
          doubleStats.setNumNulls(Long.parseLong(value));
        } else if (fName.equals("numDVs")) {
          doubleStats.setNumDVs(Long.parseLong(value));
        } else if (fName.equals("lowValue")) {
          doubleStats.setLowValue(Double.parseDouble(value));
        } else if (fName.equals("highValue")) {
          doubleStats.setHighValue(Double.parseDouble(value));
        } else {
          throw new SemanticException("Unknown stat");
        }
      }
      statsData.setDoubleStats(doubleStats);
      statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("string")) {
      StringColumnStatsData stringStats = new StringColumnStatsData();
      stringStats.setMaxColLenIsSet(false);
      stringStats.setAvgColLenIsSet(false);
      stringStats.setNumNullsIsSet(false);
      stringStats.setNumDVsIsSet(false);
      Map<String, String> mapProp = work.getMapProp();
      for (Entry<String, String> entry : mapProp.entrySet()) {
        String fName = entry.getKey();
        String value = entry.getValue();
        if (fName.equals("numNulls")) {
          stringStats.setNumNulls(Long.parseLong(value));
        } else if (fName.equals("numDVs")) {
          stringStats.setNumDVs(Long.parseLong(value));
        } else if (fName.equals("avgColLen")) {
          stringStats.setAvgColLen(Double.parseDouble(value));
        } else if (fName.equals("maxColLen")) {
          stringStats.setMaxColLen(Long.parseLong(value));
        } else {
          throw new SemanticException("Unknown stat");
        }
      }
      statsData.setStringStats(stringStats);
      statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("boolean")) {
      BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
      booleanStats.setNumNullsIsSet(false);
      booleanStats.setNumTruesIsSet(false);
      booleanStats.setNumFalsesIsSet(false);
      Map<String, String> mapProp = work.getMapProp();
      for (Entry<String, String> entry : mapProp.entrySet()) {
        String fName = entry.getKey();
        String value = entry.getValue();
        if (fName.equals("numNulls")) {
          booleanStats.setNumNulls(Long.parseLong(value));
        } else if (fName.equals("numTrues")) {
          booleanStats.setNumTrues(Long.parseLong(value));
        } else if (fName.equals("numFalses")) {
          booleanStats.setNumFalses(Long.parseLong(value));
        } else {
          throw new SemanticException("Unknown stat");
        }
      }
      statsData.setBooleanStats(booleanStats);
      statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("binary")) {
      BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
      binaryStats.setNumNullsIsSet(false);
      binaryStats.setAvgColLenIsSet(false);
      binaryStats.setMaxColLenIsSet(false);
      Map<String, String> mapProp = work.getMapProp();
      for (Entry<String, String> entry : mapProp.entrySet()) {
        String fName = entry.getKey();
        String value = entry.getValue();
        if (fName.equals("numNulls")) {
          binaryStats.setNumNulls(Long.parseLong(value));
        } else if (fName.equals("avgColLen")) {
          binaryStats.setAvgColLen(Double.parseDouble(value));
        } else if (fName.equals("maxColLen")) {
          binaryStats.setMaxColLen(Long.parseLong(value));
        } else {
          throw new SemanticException("Unknown stat");
        }
      }
      statsData.setBinaryStats(binaryStats);
      statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("decimal")) {
      DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
      decimalStats.setNumNullsIsSet(false);
      decimalStats.setNumDVsIsSet(false);
      decimalStats.setLowValueIsSet(false);
      decimalStats.setHighValueIsSet(false);
      Map<String, String> mapProp = work.getMapProp();
      for (Entry<String, String> entry : mapProp.entrySet()) {
        String fName = entry.getKey();
        String value = entry.getValue();
        if (fName.equals("numNulls")) {
          decimalStats.setNumNulls(Long.parseLong(value));
        } else if (fName.equals("numDVs")) {
          decimalStats.setNumDVs(Long.parseLong(value));
        } else if (fName.equals("lowValue")) {
          BigDecimal d = new BigDecimal(value);
          decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d
              .unscaledValue().toByteArray()), (short) d.scale()));
        } else if (fName.equals("highValue")) {
          BigDecimal d = new BigDecimal(value);
          decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d
              .unscaledValue().toByteArray()), (short) d.scale()));
        } else {
          throw new SemanticException("Unknown stat");
        }
      }
      statsData.setDecimalStats(decimalStats);
      statsObj.setStatsData(statsData);
    } else {
      throw new SemanticException("Unsupported type");
    }
View Full Code Here

      ColumnStatisticsObj statsObj) {
    // First infer the type of object
    if (fieldName.equals("columntype")) {
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
      String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
      ColumnStatisticsData statsData = new ColumnStatisticsData();

      if (s.equalsIgnoreCase("long")) {
        LongColumnStatsData longStats = new LongColumnStatsData();
        statsData.setLongStats(longStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("double")) {
        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
        statsData.setDoubleStats(doubleStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("string")) {
        StringColumnStatsData stringStats = new StringColumnStatsData();
        statsData.setStringStats(stringStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("boolean")) {
        BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
        statsData.setBooleanStats(booleanStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        statsData.setBinaryStats(binaryStats);
        statsObj.setStatsData(statsData);
      }
    } else {
      // invoke the right unpack method depending on data type of the column
      if (statsObj.getStatsData().isSetBooleanStats()) {
View Full Code Here

   */
  public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName,
      String colName) {
    ColStatistics cs = new ColStatistics(tabName, colName, cso.getColType());
    String colType = cso.getColType();
    ColumnStatisticsData csd = cso.getStatsData();
    if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
        || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
        || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
      cs.setCountDistint(csd.getLongStats().getNumDVs());
      cs.setNumNulls(csd.getLongStats().getNumNulls());
      cs.setAvgColLen(JavaDataModel.get().primitive1());
    } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
      cs.setCountDistint(csd.getLongStats().getNumDVs());
      cs.setNumNulls(csd.getLongStats().getNumNulls());
      cs.setAvgColLen(JavaDataModel.get().primitive2());
    } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
      cs.setCountDistint(csd.getDoubleStats().getNumDVs());
      cs.setNumNulls(csd.getDoubleStats().getNumNulls());
      cs.setAvgColLen(JavaDataModel.get().primitive1());
    } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
      cs.setCountDistint(csd.getDoubleStats().getNumDVs());
      cs.setNumNulls(csd.getDoubleStats().getNumNulls());
      cs.setAvgColLen(JavaDataModel.get().primitive2());
    } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
        || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
        || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
      cs.setCountDistint(csd.getStringStats().getNumDVs());
      cs.setNumNulls(csd.getStringStats().getNumNulls());
      cs.setAvgColLen(csd.getStringStats().getAvgColLen());
    } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
      if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) {
        cs.setCountDistint(2);
      } else {
        cs.setCountDistint(1);
      }
      cs.setNumTrues(csd.getBooleanStats().getNumTrues());
      cs.setNumFalses(csd.getBooleanStats().getNumFalses());
      cs.setNumNulls(csd.getBooleanStats().getNumNulls());
      cs.setAvgColLen(JavaDataModel.get().primitive1());
    } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
      cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
      cs.setNumNulls(csd.getBinaryStats().getNumNulls());
    } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
      cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
    } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
      cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
    } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
View Full Code Here

                    verifyAndGetPartStats(hive, tbl, colName, parts);
                if (result == null) {
                  return null; // logging inside
                }
                for (List<ColumnStatisticsObj> statObj : result) {
                  ColumnStatisticsData statData = validateSingleColStat(statObj);
                  if (statData == null) return null;
                  Long nullCnt = getNullcountFor(type, statData);
                  if (nullCnt == null) {
                    Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
                        "metadata optimizer for column : " + colName);
                    return null;
                  } else {
                    rowCnt -= nullCnt;
                  }
                }
              }
            }
            oneRow.add(rowCnt);
            ois.add(PrimitiveObjectInspectorFactory.
                getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
          } else if (aggr.getGenericUDAFName().equals(GenericUDAFMax.class.getAnnotation(
              Description.class).name())) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if(!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
              if (stats.isEmpty()) {
                Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
                return null;
              }
              ColumnStatisticsData statData = stats.get(0).getStatsData();
              switch (type) {
                case Integeral:
                  oneRow.add(statData.getLongStats().getHighValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  oneRow.add(statData.getDoubleStats().getHighValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default:
                  // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(
                  tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch (type) {
                case Integeral: {
                  long maxVal = Long.MIN_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    long curVal = statData.getLongStats().getHighValue();
                    maxVal = Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  double maxVal = Double.MIN_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    double curVal = statData.getDoubleStats().getHighValue();
                    maxVal = Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                }
                default:
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            }
          }  else if (aggr.getGenericUDAFName().equals(GenericUDAFMin.class.getAnnotation(
              Description.class).name())) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if (!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName))
                  .get(0).getStatsData();
              switch (type) {
                case Integeral:
                  oneRow.add(statData.getLongStats().getLowValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  oneRow.add(statData.getDoubleStats().getLowValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default: // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch(type) {
                case Integeral: {
                  long minVal = Long.MAX_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    long curVal = statData.getLongStats().getLowValue();
                    minVal = Math.min(minVal, curVal);
                  }
                  oneRow.add(minVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  double minVal = Double.MAX_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    double curVal = statData.getDoubleStats().getLowValue();
                    minVal = Math.min(minVal, curVal);
                  }
                  oneRow.add(minVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
View Full Code Here

      ColumnStatisticsObj statsObj) {
    // First infer the type of object
    if (fieldName.equals("columntype")) {
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
      String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
      ColumnStatisticsData statsData = new ColumnStatisticsData();

      if (s.equalsIgnoreCase("long")) {
        LongColumnStatsData longStats = new LongColumnStatsData();
        statsData.setLongStats(longStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("double")) {
        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
        statsData.setDoubleStats(doubleStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("string")) {
        StringColumnStatsData stringStats = new StringColumnStatsData();
        statsData.setStringStats(stringStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("boolean")) {
        BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
        statsData.setBooleanStats(booleanStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        statsData.setBinaryStats(binaryStats);
        statsObj.setStatsData(statsData);
      } else if (s.equalsIgnoreCase("decimal")) {
        DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
        statsData.setDecimalStats(decimalStats);
        statsObj.setStatsData(statsData);
      }
    } else {
      // invoke the right unpack method depending on data type of the column
      if (statsObj.getStatsData().isSetBooleanStats()) {
View Full Code Here

      // get the lowest for now as nobody actually uses this field.
      Object laObj = row[offset + 14];
      if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > (Long)laObj)) {
        csd.setLastAnalyzed((Long)laObj);
      }
      ColumnStatisticsData data = new ColumnStatisticsData();
      // see STATS_COLLIST
      int i = offset;
      ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
      Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
          declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.