Examples of MapredWork


Examples of org.apache.hadoop.hive.ql.plan.mapredWork


  private Map<String, partitionDesc> pathToPartitionInfo;

  protected void init(JobConf job) {
    mapredWork mrwork = Utilities.getMapRedWork(job);
    pathToPartitionInfo = mrwork.getPathToPartitionInfo();
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    plan.setNumReduceTasks(desc.getNumReducers());

    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();

    rootTasks.add(currTask);
    if (reducer.getClass() == JoinOperator.class)
      plan.setNeedsTagging(true);

    assert currTopOp != null;
    List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps();
    String currAliasId = opProcCtx.getCurrAliasId();
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    plan.setNumReduceTasks(desc.getNumReducers());

    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();

    //    rootTasks.add(currTask);
    if (reducer.getClass() == JoinOperator.class)
      plan.setNeedsTagging(true);

    initUnionPlan(opProcCtx, currTask);
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  /*
   * It is a idempotent function to add various intermediate files as the source for the
   * union. The plan has already been created.
   */
  public static void initUnionPlan(GenMRProcContext opProcCtx, Task<? extends Serializable> currTask) {
    mapredWork plan = (mapredWork) currTask.getWork();
    UnionOperator currUnionOp = opProcCtx.getCurrUnionOp();
    assert currUnionOp != null;
    GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp);
    assert uCtx != null;

    List<String>    taskTmpDirLst = uCtx.getTaskTmpDir();
    List<tableDesc> tt_descLst    = uCtx.getTTDesc();
    assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
    assert taskTmpDirLst.size() == tt_descLst.size();
    int size = taskTmpDirLst.size();

    for (int pos = 0; pos < size; pos++) {
      String taskTmpDir = taskTmpDirLst.get(pos);
      tableDesc tt_desc = tt_descLst.get(pos);
      if (plan.getPathToAliases().get(taskTmpDir) == null) {
        plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
        plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
        plan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
        plan.getAliasToWork().put(taskTmpDir, currUnionOp);
      }
    }
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  public static void joinPlan(ReduceSinkOperator op,
                              Task<? extends Serializable> oldTask,
                              Task<? extends Serializable> task,
                              GenMRProcContext opProcCtx) throws SemanticException {
    Task<? extends Serializable> currTask = task;
    mapredWork plan = (mapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    // terminate the old task and make current task dependent on it
    if (oldTask != null) {
      splitTasks(op, oldTask, currTask, opProcCtx);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

   * @param opProcCtx processing context
   */
  public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
    throws SemanticException {
    // Generate a new task             
    mapredWork cplan = getMapRedWork();
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx.getConf());
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);

    // Add the reducer
    cplan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    cplan.setNumReduceTasks(new Integer(desc.getNumReducers()));

    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, redTask);
    Task<? extends Serializable> currTask    = opProcCtx.getCurrTask();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  /**
   * create a new plan and return
   * @return the new plan
   */
  public static mapredWork getMapRedWork() {
    mapredWork work = new mapredWork();
    work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<String, partitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());
    work.setTagToValueDesc(new ArrayList<tableDesc>());
    work.setReducer(null);
    return work;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

   **/
  private static void splitTasks(ReduceSinkOperator op,
                                 Task<? extends Serializable> parentTask,
                                 Task<? extends Serializable> childTask,
                                 GenMRProcContext opProcCtx) throws SemanticException {
    mapredWork plan = (mapredWork) childTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
   
    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);

    // Root Task cannot depend on any other task, therefore childTask cannot be a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask))
      rootTasks.remove(childTask);

    // generate the temporary file
    String scratchDir = opProcCtx.getScratchDir();
    int randomid = opProcCtx.getRandomId();
    int pathid   = opProcCtx.getPathId();
     
    String taskTmpDir = (new Path(scratchDir + File.separator + randomid + '.' + pathid)).toString();
    pathid++;
    opProcCtx.setPathId(pathid);
   
    Operator<? extends Serializable> parent = op.getParentOperators().get(0);
    tableDesc tt_desc =
      PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
   
    // Create a file sink operator for this file name
    Operator<? extends Serializable> fs_op =
      putOpInsertMap(OperatorFactory.get
                     (new fileSinkDesc(taskTmpDir, tt_desc,
                                       parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
                      parent.getSchema()), null, parseCtx);
   
    // replace the reduce child with this operator
    List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
    for (int pos = 0; pos < childOpList.size(); pos++) {
      if (childOpList.get(pos) == op) {
        childOpList.set(pos, fs_op);
        break;
      }
    }
   
    List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
    parentOpList.add(parent);
    fs_op.setParentOperators(parentOpList);
   
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
   
    String streamDesc;
    mapredWork cplan = (mapredWork) childTask.getWork();
   
    if (reducer.getClass() == JoinOperator.class) {
      String origStreamDesc;
      streamDesc = "$INTNAME";
      origStreamDesc = streamDesc;
      int pos = 0;
      while (cplan.getAliasToWork().get(streamDesc) != null)
        streamDesc = origStreamDesc.concat(String.valueOf(++pos));
    }
    else
      streamDesc = taskTmpDir;
   
    // Add the path to alias mapping
    if (cplan.getPathToAliases().get(taskTmpDir) == null) {
      cplan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
    }
   
    cplan.getPathToAliases().get(taskTmpDir).add(streamDesc);
    cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
    cplan.getAliasToWork().put(streamDesc, op);

    // TODO: Allocate work to remove the temporary files and make that
    // dependent on the redTask
    if (reducer.getClass() == JoinOperator.class)
      cplan.setNeedsTagging(true);

    currTopOp = null;
    String currAliasId = null;
   
    opProcCtx.setCurrTopOp(currTopOp);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;

    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork currPlan = (mapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp   = mapredCtx.getCurrTopOp();
    String currAliasId = mapredCtx.getCurrAliasId();
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

    ctx.setCurrTopOp(currTopOp);
    ctx.setCurrAliasId(currAliasId);
    ctx.setCurrTask(currTask);

    // If the plan for this reducer does not exist, initialize the plan
    if (opMapTask == null) {
      if (currPlan.getReducer() == null)
        GenMapRedUtils.initPlan(op, ctx);
      else
        GenMapRedUtils.splitPlan(op, ctx);
    }
    // This will happen in case of joins. The current plan can be thrown away after being merged with the
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

    // union consisted on a bunch of map-reduce jobs, and it has been split at the union
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);
   
    ctx.setCurrTask(currTask);

    // If the plan for this reducer does not exist, initialize the plan
    if (opMapTask == null) {
      // When the reducer is encountered for the first time
      if (plan.getReducer() == null)
        GenMapRedUtils.initUnionPlan(op, ctx);
      // When union is followed by a multi-table insert
      else
        GenMapRedUtils.splitPlan(op, ctx);
    }
    // The union is already initialized. However, the union is walked from another input
    // initUnionPlan is idempotent
    else if (plan.getReducer() == reducer)
      GenMapRedUtils.initUnionPlan(op, ctx);
    // There is a join after union. One of the branches of union has already been initialized.
    // Initialize the current branch, and join with the original plan.
    else {
      GenMapRedUtils.initUnionPlan(ctx, currTask);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.