Examples of PrunedPartitionList


Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

    if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
      ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
      bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner);
    }
    if (aggressive || bypassFilter) {
      PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts);
      if (aggressive || pruned.getUnknownPartns().isEmpty()) {
        bypassFilter &= pruned.getUnknownPartns().isEmpty();
        return checkOperators(new FetchData(pruned, splitSample), ts, aggressive, bypassFilter);
      }
    }
    return null;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

            // dynamic partition
            confirmedPartns.addAll(tblSpec.partitions);
          }
          if (confirmedPartns.size() > 0) {
            Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
            PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns,
                new HashSet<Partition>(), null);
            GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx, partList);
          } else { // non-partitioned table
            GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx);
          }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

    // Retrieve all partitions generated from partition pruner and partition column pruner
    PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx, LBPartitionProcFactory.getFilterProc(),
        LBPartitionProcFactory.getDefaultProc());

    PrunedPartitionList partsList = ((LBOpPartitionWalkerCtx) opPartWalkerCtx).getPartitions();
    if (partsList != null) {
      Set<Partition> parts = null;
      parts = partsList.getConfirmedPartns();
      parts.addAll(partsList.getUnknownPartns());
      if ((parts != null) && (parts.size() > 0)) {
        for (Partition part : parts) {
          // only process partition which is skewed and list bucketed
          if (ListBucketingPrunerUtils.isListBucketingPart(part)) {
            // create a the context for walking operators
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();

    Path tblDir = null;
    TableDesc tblDesc = null;

    PrunedPartitionList partsList = pList;

    plan.setNameToSplitSample(parseCtx.getNameToSplitSample());

    if (partsList == null) {
      try {
        partsList = parseCtx.getOpToPartList().get((TableScanOperator)topOp);
        if (partsList == null) {
          partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
            parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(),
            alias_id, parseCtx.getPrunedPartitions());
          parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList);
        }
      } catch (SemanticException e) {
        throw e;
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }
    }

    // Generate the map work for this alias_id
    Set<Partition> parts = null;
    // pass both confirmed and unknown partitions through the map-reduce
    // framework

    parts = partsList.getConfirmedPartns();
    parts.addAll(partsList.getUnknownPartns());
    PartitionDesc aliasPartnDesc = null;
    try {
      if (!parts.isEmpty()) {
        aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
      }
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }

    // The table does not have any partitions
    if (aliasPartnDesc == null) {
      aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(parseCtx
          .getTopToTable().get(topOp)), null);

    }

    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);

    long sizeNeeded = Integer.MAX_VALUE;
    int fileLimit = -1;
    if (parseCtx.getGlobalLimitCtx().isEnable()) {
      long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
      sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
      // for the optimization that reduce number of input file, we limit number
      // of files allowed. If more than specific number of files have to be
      // selected, we skip this optimization. Since having too many files as
      // inputs can cause unpredictable latency. It's not necessarily to be
      // cheaper.
      fileLimit =
        HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);

      if (sizePerRow <= 0 || fileLimit <= 0) {
        LOG.info("Skip optimization to reduce input size of 'limit'");
        parseCtx.getGlobalLimitCtx().disableOpt();
      } else if (parts.isEmpty()) {
        LOG.info("Empty input: skip limit optimiztion");
      } else {
        LOG.info("Try to reduce input size for 'limit' " +
            "sizeNeeded: " + sizeNeeded +
            "  file limit : " + fileLimit);
      }
    }
    boolean isFirstPart = true;
    boolean emptyInput = true;
    boolean singlePartition = (parts.size() == 1);
    for (Partition part : parts) {
      if (part.getTable().isPartitioned()) {
        inputs.add(new ReadEntity(part));
      } else {
        inputs.add(new ReadEntity(part.getTable()));
      }

      // Later the properties have to come from the partition as opposed
      // to from the table in order to support versioning.
      Path[] paths = null;
      sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);

      // Lookup list bucketing pruner
      Map<String, ExprNodeDesc> partToPruner = parseCtx.getOpToPartToSkewedPruner().get(topOp);
      ExprNodeDesc listBucketingPruner = (partToPruner != null) ? partToPruner.get(part.getName())
          : null;

      if (sampleDescr != null) {
        assert (listBucketingPruner == null) : "Sampling and list bucketing can't coexit.";
        paths = SamplePruner.prune(part, sampleDescr);
        parseCtx.getGlobalLimitCtx().disableOpt();
      } else if (listBucketingPruner != null) {
        assert (sampleDescr == null) : "Sampling and list bucketing can't coexist.";
        /* Use list bucketing prunner's path. */
        paths = ListBucketingPruner.prune(parseCtx, part, listBucketingPruner);
      } else {
        // Now we only try the first partition, if the first partition doesn't
        // contain enough size, we change to normal mode.
        if (parseCtx.getGlobalLimitCtx().isEnable()) {
          if (isFirstPart) {
            long sizeLeft = sizeNeeded;
            ArrayList<Path> retPathList = new ArrayList<Path>();
            SamplePruner.LimitPruneRetStatus status = SamplePruner.limitPrune(part, sizeLeft,
                fileLimit, retPathList);
            if (status.equals(SamplePruner.LimitPruneRetStatus.NoFile)) {
              continue;
            } else if (status.equals(SamplePruner.LimitPruneRetStatus.NotQualify)) {
              LOG.info("Use full input -- first " + fileLimit + " files are more than "
                  + sizeNeeded
                  + " bytes");

              parseCtx.getGlobalLimitCtx().disableOpt();

            } else {
              emptyInput = false;
              paths = new Path[retPathList.size()];
              int index = 0;
              for (Path path : retPathList) {
                paths[index++] = path;
              }
              if (status.equals(SamplePruner.LimitPruneRetStatus.NeedAllFiles) && singlePartition) {
                // if all files are needed to meet the size limit, we disable
                // optimization. It usually happens for empty table/partition or
                // table/partition with only one file. By disabling this
                // optimization, we can avoid retrying the query if there is
                // not sufficient rows.
                parseCtx.getGlobalLimitCtx().disableOpt();
              }
            }
            isFirstPart = false;
          } else {
            paths = new Path[0];
          }
        }
        if (!parseCtx.getGlobalLimitCtx().isEnable()) {
          paths = part.getPath();
        }
      }

      // is it a partitioned table ?
      if (!part.getTable().isPartitioned()) {
        assert ((tblDir == null) && (tblDesc == null));

        tblDir = paths[0];
        tblDesc = Utilities.getTableDesc(part.getTable());
      } else if (tblDesc == null) {
        tblDesc = Utilities.getTableDesc(part.getTable());
      }

      for (Path p : paths) {
        if (p == null) {
          continue;
        }
        String path = p.toString();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Adding " + path + " of table" + alias_id);
        }

        partDir.add(p);
        try {
          partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part));
        } catch (HiveException e) {
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
      }
    }
    if (emptyInput) {
      parseCtx.getGlobalLimitCtx().disableOpt();
    }

    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();

    if (!local) {
      while (iterPath.hasNext()) {
        assert iterPartnDesc.hasNext();
        String path = iterPath.next().toString();

        PartitionDesc prtDesc = iterPartnDesc.next();

        // Add the path to alias mapping
        if (plan.getPathToAliases().get(path) == null) {
          plan.getPathToAliases().put(path, new ArrayList<String>());
        }
        plan.getPathToAliases().get(path).add(alias_id);
        plan.getPathToPartitionInfo().put(path, prtDesc);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Information added for path " + path);
        }
      }

      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan = new MapredLocalWork(
            new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
            new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        tblDesc = Utilities.getTableDesc(partsList.getSourceTable());
        localPlan.getAliasToFetchWork().put(
            alias_id,
            new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc, tblDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id,
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

        }
      }

      Table tbl = topToTable.get(tso);
      if (tbl.isPartitioned()) {
        PrunedPartitionList prunedParts = null;
        try {
          prunedParts = pGraphContext.getOpToPartList().get(tso);
          if (prunedParts == null) {
            prunedParts = PartitionPruner.prune(tbl, pGraphContext
                .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias,
                pGraphContext.getPrunedPartitions());
            pGraphContext.getOpToPartList().put(tso, prunedParts);
          }
        } catch (HiveException e) {
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
        List<Partition> partitions = prunedParts.getNotDeniedPartns();
        // Populate the names and order of columns for the first partition of the
        // first table
        if ((pos == 0) && (partitions != null) && (!partitions.isEmpty())) {
          Partition firstPartition = partitions.get(0);
          sortColumnsFirstTable.addAll(firstPartition.getSortCols());
        }

        for (Partition partition : prunedParts.getNotDeniedPartns()) {
          if (!checkSortColsAndJoinCols(partition.getSortCols(),
                                        joinCols,
                                        sortColumnsFirstTable)) {
            return false;
          }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

    } catch (Exception e) {
      throw new HiveException(e);
    }

    // Now return the set of partitions
    return new PrunedPartitionList(true_parts, unkn_parts, denied_parts);
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

    String key = tab.getDbName() + "." + tab.getTableName() + ";";

    if (prunerExpr != null) {
      key = key + prunerExpr.getExprString();
    }
    PrunedPartitionList ret = prunedPartitionsMap.get(key);
    if (ret != null) {
      return ret;
    }

    LinkedHashSet<Partition> true_parts = new LinkedHashSet<Partition>();
    LinkedHashSet<Partition> unkn_parts = new LinkedHashSet<Partition>();
    LinkedHashSet<Partition> denied_parts = new LinkedHashSet<Partition>();

    try {
      StructObjectInspector rowObjectInspector = (StructObjectInspector) tab
          .getDeserializer().getObjectInspector();
      Object[] rowWithPart = new Object[2];

      if (tab.isPartitioned()) {
        // If the "strict" mode is on, we have to provide partition pruner for
        // each table.
        if ("strict".equalsIgnoreCase(HiveConf.getVar(conf,
            HiveConf.ConfVars.HIVEMAPREDMODE))) {
          if (!hasColumnExpr(prunerExpr)) {
            throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE
                .getMsg("for Alias \"" + alias + "\" Table \""
                    + tab.getTableName() + "\""));
          }
        }

        if (prunerExpr == null) {
          // This can happen when hive.mapred.mode=nonstrict and there is no predicates at all
          // Add all partitions to the unknown_parts so that a MR job is generated.
          true_parts.addAll(Hive.get().getPartitions(tab));
        } else {
          // remove non-partition columns
          ExprNodeDesc compactExpr = prunerExpr.clone();
          compactExpr = compactExpr(compactExpr);
          LOG.debug("Filter w/ compacting: " +
              ((compactExpr != null) ? compactExpr.getExprString(): "null") +
              "; filter w/o compacting: " +
              ((prunerExpr != null) ? prunerExpr.getExprString(): "null"));
          if (compactExpr == null) {
            // This could happen when hive.mapred.mode=nonstrict and all the predicates
            // are on non-partition columns.
            unkn_parts.addAll(Hive.get().getPartitions(tab));
          } else if (Utilities.checkJDOPushDown(tab, compactExpr)) {
            String filter = compactExpr.getExprString();
            String oldFilter = prunerExpr.getExprString();

            if (filter.equals(oldFilter)) {
              // pruneExpr contains only partition columns
              pruneByPushDown(tab, true_parts, filter);
            } else {
              // pruneExpr contains non-partition columns
              pruneByPushDown(tab, unkn_parts, filter);
            }
          } else {
            pruneBySequentialScan(tab, true_parts, unkn_parts, denied_parts, prunerExpr, rowObjectInspector);
          }
        }
        LOG.debug("tabname = " + tab.getTableName() + " is partitioned");
      } else {
        true_parts.addAll(Hive.get().getPartitions(tab));
      }
    } catch (HiveException e) {
      throw e;
    } catch (Exception e) {
      throw new HiveException(e);
    }

    // Now return the set of partitions
    ret = new PrunedPartitionList(true_parts, unkn_parts, denied_parts);
    prunedPartitionsMap.put(key, ret);
    return ret;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

        if (tso == null) {
          return null;
        }
        Table tbl = topToTable.get(tso);
        if(tbl.isPartitioned()) {
          PrunedPartitionList prunedParts = null;
          try {
            prunedParts = pGraphContext.getOpToPartList().get(tso);
            if (prunedParts == null) {
              prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias,
                pGraphContext.getPrunedPartitions());
              pGraphContext.getOpToPartList().put(tso, prunedParts);
            }
          } catch (HiveException e) {
            // Has to use full name to make sure it does not conflict with
            // org.apache.commons.lang.StringUtils
            LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
            throw new SemanticException(e.getMessage(), e);
          }
          int partNumber = prunedParts.getConfirmedPartns().size()
              + prunedParts.getUnknownPartns().size();

          if (partNumber > 1) {
            // only allow one partition for small tables
            if(alias != baseBigAlias) {
              return null;
            }
            // here is the big table,and we get more than one partitions.
            // construct a mapping of (Partition->bucket file names) and
            // (Partition -> bucket number)
            Iterator<Partition> iter = prunedParts.getConfirmedPartns()
                .iterator();
            while (iter.hasNext()) {
              Partition p = iter.next();
              if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) {
                return null;
              }
              List<String> fileNames = getOnePartitionBucketFileNames(p);
              bigTblPartsToBucketFileNames.put(p, fileNames);
              bigTblPartsToBucketNumber.put(p, p.getBucketCount());
            }
            iter = prunedParts.getUnknownPartns().iterator();
            while (iter.hasNext()) {
              Partition p = iter.next();
              if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) {
                return null;
              }
              List<String> fileNames = getOnePartitionBucketFileNames(p);
              bigTblPartsToBucketFileNames.put(p, fileNames);
              bigTblPartsToBucketNumber.put(p, p.getBucketCount());
            }
            // If there are more than one partition for the big
            // table,aliasToBucketFileNamesMapping and partsToBucketNumber will
            // not contain mappings for the big table. Instead, the mappings are
            // contained in bigTblPartsToBucketFileNames and
            // bigTblPartsToBucketNumber

          } else {
            Partition part = null;
            Iterator<Partition> iter = prunedParts.getConfirmedPartns()
                .iterator();
            if (iter.hasNext()) {
              part = iter.next();
            }
            if (part == null) {
              iter = prunedParts.getUnknownPartns().iterator();
              if (iter.hasNext()) {
                part = iter.next();
              }
            }
            assert part != null;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

      Map<Table, List<Index>> indexes)
    throws HiveException {
    Hive hive = Hive.get(pctx.getConf());
    Set<Partition> queryPartitions = null;
    // make sure each partition exists on the index table
    PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
    if(queryPartitionList.getConfirmedPartns() != null
        && !queryPartitionList.getConfirmedPartns().isEmpty()){
      queryPartitions = queryPartitionList.getConfirmedPartns();
    }else if(queryPartitionList.getUnknownPartns() != null
        && !queryPartitionList.getUnknownPartns().isEmpty()){
      queryPartitions = queryPartitionList.getUnknownPartns();
    }

    for (Partition part : queryPartitions) {
      List<Table> sourceIndexTables = getIndexTables(hive, part, indexes);
      if (!containsPartition(hive, part, indexes)) {
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

        // have found a wrong filter operator. We skip the optimization then.
        return null;
      }


      PrunedPartitionList prunedPartList = owc.getParseContext().getOpToPartList().get(top);
      if (prunedPartList == null) {
        // We never pruned the partition. Try to prune it.
        ExprNodeDesc ppr_pred = owc.getParseContext().getOpToPartPruner().get(top);
        if (ppr_pred == null) {
          // no partition predicate found, skip.
          return null;
        }
        try {
          prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top),
              ppr_pred, owc.getParseContext().getConf(),
              (String) owc.getParseContext().getTopOps().keySet()
              .toArray()[0], owc.getParseContext().getPrunedPartitions());
          if (prunedPartList != null) {
            owc.getParseContext().getOpToPartList().put(top, prunedPartList);
          }
        } catch (HiveException e) {
          // Has to use full name to make sure it does not conflict with
          // org.apache.commons.lang.StringUtils
          throw new SemanticException(e.getMessage(), e);
        }
      }

      // Otherwise this is not a sampling predicate. We need to process it.
      ExprNodeDesc predicate = fop.getConf().getPredicate();
      String alias = top.getConf().getAlias();

      ArrayList<Partition> partitions = new ArrayList<Partition>();
      if (prunedPartList == null) {
        return null;
      }

      for (Partition p : prunedPartList.getConfirmedPartns()) {
        if (!p.getTable().isPartitioned()) {
          return null;
        }
      }
      for (Partition p : prunedPartList.getUnknownPartns()) {
        if (!p.getTable().isPartitioned()) {
          return null;
        }
      }

      partitions.addAll(prunedPartList.getConfirmedPartns());
      partitions.addAll(prunedPartList.getUnknownPartns());

      PcrExprProcFactory.NodeInfoWrapper wrapper = PcrExprProcFactory.walkExprTree(
          alias, partitions, predicate);

      if (wrapper.state == PcrExprProcFactory.WalkState.TRUE) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.