Examples of MapredWork

org.apache.hadoop.hive.ql.plan.MapredWork
MapredWork.
org.apache.hadoop.hive.ql.plan.mapredWork
MapredWork.

Examples of org.apache.hadoop.hive.ql.plan.mapredWork




  private Map<String, partitionDesc> pathToPartitionInfo;


  protected void init(JobConf job) {
    mapredWork mrwork = Utilities.getMapRedWork(job);
    pathToPartitionInfo = mrwork.getPathToPartitionInfo();
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();


    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
    
    plan.setNumReduceTasks(desc.getNumReducers());


    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();


    rootTasks.add(currTask);
    if (reducer.getClass() == JoinOperator.class)
      plan.setNeedsTagging(true);


    assert currTopOp != null;
    List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps();
    String currAliasId = opProcCtx.getCurrAliasId();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();


    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
    
    plan.setNumReduceTasks(desc.getNumReducers());


    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();


    //    rootTasks.add(currTask);
    if (reducer.getClass() == JoinOperator.class)
      plan.setNeedsTagging(true);


    initUnionPlan(opProcCtx, currTask); 
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  /*
   * It is a idempotent function to add various intermediate files as the source for the
   * union. The plan has already been created.
   */
  public static void initUnionPlan(GenMRProcContext opProcCtx, Task<? extends Serializable> currTask) {
    mapredWork plan = (mapredWork) currTask.getWork();
    UnionOperator currUnionOp = opProcCtx.getCurrUnionOp();
    assert currUnionOp != null;
    GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp);
    assert uCtx != null;


    List<String>    taskTmpDirLst = uCtx.getTaskTmpDir();
    List<tableDesc> tt_descLst    = uCtx.getTTDesc(); 
    assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
    assert taskTmpDirLst.size() == tt_descLst.size();
    int size = taskTmpDirLst.size();


    for (int pos = 0; pos < size; pos++) {
      String taskTmpDir = taskTmpDirLst.get(pos);
      tableDesc tt_desc = tt_descLst.get(pos); 
      if (plan.getPathToAliases().get(taskTmpDir) == null) {
        plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
        plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
        plan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
        plan.getAliasToWork().put(taskTmpDir, currUnionOp);
      }
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  public static void joinPlan(ReduceSinkOperator op,
                              Task<? extends Serializable> oldTask, 
                              Task<? extends Serializable> task, 
                              GenMRProcContext opProcCtx) throws SemanticException {
    Task<? extends Serializable> currTask = task;
    mapredWork plan = (mapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();


    // terminate the old task and make current task dependent on it
    if (oldTask != null) {
      splitTasks(op, oldTask, currTask, opProcCtx);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

   * @param opProcCtx processing context
   */
  public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) 
    throws SemanticException {
    // Generate a new task              
    mapredWork cplan = getMapRedWork();
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx.getConf());
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);


    // Add the reducer
    cplan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
    
    cplan.setNumReduceTasks(new Integer(desc.getNumReducers()));


    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, redTask);
    Task<? extends Serializable> currTask    = opProcCtx.getCurrTask();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

  /**
   * create a new plan and return
   * @return the new plan
   */
  public static mapredWork getMapRedWork() {
    mapredWork work = new mapredWork();
    work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<String, partitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());
    work.setTagToValueDesc(new ArrayList<tableDesc>());
    work.setReducer(null);
    return work;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

   **/
  private static void splitTasks(ReduceSinkOperator op,
                                 Task<? extends Serializable> parentTask, 
                                 Task<? extends Serializable> childTask, 
                                 GenMRProcContext opProcCtx) throws SemanticException {
    mapredWork plan = (mapredWork) childTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
    
    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);


    // Root Task cannot depend on any other task, therefore childTask cannot be a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask))
      rootTasks.remove(childTask);


    // generate the temporary file
    String scratchDir = opProcCtx.getScratchDir();
    int randomid = opProcCtx.getRandomId();
    int pathid   = opProcCtx.getPathId();
      
    String taskTmpDir = (new Path(scratchDir + File.separator + randomid + '.' + pathid)).toString();
    pathid++;
    opProcCtx.setPathId(pathid);
    
    Operator<? extends Serializable> parent = op.getParentOperators().get(0);
    tableDesc tt_desc = 
      PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); 
    
    // Create a file sink operator for this file name
    Operator<? extends Serializable> fs_op =
      putOpInsertMap(OperatorFactory.get
                     (new fileSinkDesc(taskTmpDir, tt_desc,
                                       parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
                      parent.getSchema()), null, parseCtx);
    
    // replace the reduce child with this operator
    List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
    for (int pos = 0; pos < childOpList.size(); pos++) {
      if (childOpList.get(pos) == op) {
        childOpList.set(pos, fs_op);
        break;
      }
    }
    
    List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
    parentOpList.add(parent);
    fs_op.setParentOperators(parentOpList);
    
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    
    String streamDesc;
    mapredWork cplan = (mapredWork) childTask.getWork();
    
    if (reducer.getClass() == JoinOperator.class) {
      String origStreamDesc;
      streamDesc = "$INTNAME";
      origStreamDesc = streamDesc;
      int pos = 0;
      while (cplan.getAliasToWork().get(streamDesc) != null)
        streamDesc = origStreamDesc.concat(String.valueOf(++pos));
    }
    else
      streamDesc = taskTmpDir;
    
    // Add the path to alias mapping
    if (cplan.getPathToAliases().get(taskTmpDir) == null) {
      cplan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
    }
    
    cplan.getPathToAliases().get(taskTmpDir).add(streamDesc);
    cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
    cplan.getAliasToWork().put(streamDesc, op);


    // TODO: Allocate work to remove the temporary files and make that
    // dependent on the redTask
    if (reducer.getClass() == JoinOperator.class)
      cplan.setNeedsTagging(true);


    currTopOp = null;
    String currAliasId = null;
    
    opProcCtx.setCurrTopOp(currTopOp);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;


    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork currPlan = (mapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp   = mapredCtx.getCurrTopOp();
    String currAliasId = mapredCtx.getCurrAliasId();
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);


    ctx.setCurrTopOp(currTopOp);
    ctx.setCurrAliasId(currAliasId);
    ctx.setCurrTask(currTask);


    // If the plan for this reducer does not exist, initialize the plan
    if (opMapTask == null) {
      if (currPlan.getReducer() == null) 
        GenMapRedUtils.initPlan(op, ctx);
      else
        GenMapRedUtils.splitPlan(op, ctx);
    }
    // This will happen in case of joins. The current plan can be thrown away after being merged with the

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.mapredWork

    // union consisted on a bunch of map-reduce jobs, and it has been split at the union
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);
    
    ctx.setCurrTask(currTask);


    // If the plan for this reducer does not exist, initialize the plan
    if (opMapTask == null) {
      // When the reducer is encountered for the first time
      if (plan.getReducer() == null)
        GenMapRedUtils.initUnionPlan(op, ctx);
      // When union is followed by a multi-table insert
      else
        GenMapRedUtils.splitPlan(op, ctx);
    }
    // The union is already initialized. However, the union is walked from another input
    // initUnionPlan is idempotent
    else if (plan.getReducer() == reducer)
      GenMapRedUtils.initUnionPlan(op, ctx);
    // There is a join after union. One of the branches of union has already been initialized.
    // Initialize the current branch, and join with the original plan.
    else {
      GenMapRedUtils.initUnionPlan(ctx, currTask);

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.