Examples of MapWork

org.apache.hadoop.hive.ql.plan.MapWork
MapWork represents all the information used to run a map task on the cluster. It is first used when the query planner breaks the logical plan into tasks and used throughout physical optimization to track map-side operator plans, input paths, aliases, etc. ExecDriver will serialize the contents of this class and make sure it is distributed on the cluster. The ExecMapper will ultimately deserialize this class on the data nodes and setup it's operator pipeline accordingly. This class is also used in the explain command any property with the appropriate annotation will be displayed in the explain output.
org.jibenakka.sample.mapreduce.wordcount.messages.MapWork

Examples of org.apache.hadoop.hive.ql.plan.MapWork

        = walkerCtx.getMetadataOnlyTableScans().iterator();


      while (iterator.hasNext()) {
        TableScanOperator tso = iterator.next();
        ((TableScanDesc)tso.getConf()).setIsMetadataOnly(true);
        MapWork work = ((MapredWork) task.getWork()).getMapWork();
        String alias = getAliasForTableScanOperator(work, tso);
        LOG.info("Metadata only table scan for " + alias);
        processAlias(work, alias);
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

    Driver driver = new Driver(queryConf);
    driver.compile(qlCommand.toString(), false);


    if (pctx.getConf().getBoolVar(ConfVars.HIVE_INDEX_COMPACT_BINARY_SEARCH) && useSorted) {
      // For now, only works if the predicate is a single condition
      MapWork work = null;
      String originalInputFormat = null;
      for (Task task : driver.getPlan().getRootTasks()) {
        // The index query should have one and only one map reduce task in the root tasks
        // Otherwise something is wrong, log the problem and continue using the default format
        if (task.getWork() instanceof MapredWork) {
          if (work != null) {
            LOG.error("Tried to use a binary search on a compact index but there were an " +
                      "unexpected number (>1) of root level map reduce tasks in the " +
                      "reentrant query plan.");
            work.setInputformat(null);
            work.setInputFormatSorted(false);
            break;
          }
          if (task.getWork() != null) {
            work = ((MapredWork)task.getWork()).getMapWork();
          }
          String inputFormat = work.getInputformat();
          originalInputFormat = inputFormat;
          if (inputFormat == null) {
            inputFormat = HiveConf.getVar(pctx.getConf(), HiveConf.ConfVars.HIVEINPUTFORMAT);
          }


          // We can only perform a binary search with HiveInputFormat and CombineHiveInputFormat
          // and BucketizedHiveInputFormat
          try {
            if (!HiveInputFormat.class.isAssignableFrom(Class.forName(inputFormat))) {
              work = null;
              break;
            }
          } catch (ClassNotFoundException e) {
            LOG.error("Map reduce work's input format class: " + inputFormat + " was not found. " +
                       "Cannot use the fact the compact index is sorted.");
            work = null;
            break;
          }


          work.setInputFormatSorted(true);
        }
      }


      if (work != null) {
        // Find the filter operator and expr node which act on the index column and mark them
        if (!findIndexColumnFilter(work.getAliasToWork().values())) {
          LOG.error("Could not locate the index column's filter operator and expr node. Cannot " +
                    "use the fact the compact index is sorted.");
          work.setInputformat(originalInputFormat);
          work.setInputFormatSorted(false);
        }
      }
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

      if (confirmedPartns.size() > 0) {
        Table source = queryBlock.getMetaData().getTableForAlias(alias);
        partitions = new PrunedPartitionList(source, confirmedPartns, false);
      }


      MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
      w.setGatheringStats(true);


      return true;
      }
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

    if (task instanceof TezTask) {
      TezWork work = ((TezTask)task).getWork();
      List<BaseWork> all = work.getAllWork();
      for (BaseWork w: all) {
        if (w instanceof MapWork) {
          MapWork mapWork = (MapWork) w;
          HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
          if (!opMap.isEmpty()) {
            for (Operator<? extends OperatorDesc> op : opMap.values()) {
              setInputFormat(mapWork, op);
            }
          }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork


  // loop over all the tasks recursively
  @Override
  protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof ExecDriver) {
      MapWork work = ((MapredWork) task.getWork()).getMapWork();
      HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
      if (!opMap.isEmpty()) {
        for (Operator<? extends OperatorDesc> op : opMap.values()) {
          setInputFormat(work, op);
        }
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork


        MapredWork plan = (MapredWork) currTask.getWork();
        for (int pos = 0; pos < size; pos++) {
          String taskTmpDir = taskTmpDirLst.get(pos);
          TableDesc tt_desc = tt_descLst.get(pos);
          MapWork mWork = plan.getMapWork();
          if (mWork.getPathToAliases().get(taskTmpDir) == null) {
            mWork.getPathToAliases().put(taskTmpDir,
                new ArrayList<String>());
            mWork.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
            mWork.getPathToPartitionInfo().put(taskTmpDir,
                new PartitionDesc(tt_desc, null));
            mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
          }
        }
      }
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

   *
   * @return the new plan
   */
  public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
    MapredWork mrWork = new MapredWork();
    MapWork work = mrWork.getMapWork();


    boolean mapperCannotSpanPartns =
        conf.getBoolVar(
            HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
    work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
    work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
    work.setHadoopSupportsSplittable(
        conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE));
    return mrWork;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MoveWork dummyMv = new MoveWork(null, null, null,
         new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
    MapWork cplan;
    Serializable work;


    if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) &&
        fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) {


      // Check if InputFormatClass is valid
      String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = Class.forName(inputFormatClass);


        LOG.info("RCFile format- Using block level merge");
        cplan = GenMapRedUtils.createRCFileMergeTask(fsInputDesc, finalName,
            dpCtx != null && dpCtx.getNumDPCols() > 0);
        work = cplan;
      } catch (ClassNotFoundException e) {
        String msg = "Illegal input format class: " + inputFormatClass;
        throw new SemanticException(msg);
      }


    } else {
      cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
      if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
        cplan.setName("Merge");
        ((TezWork)work).add(cplan);
      } else {
        work = new MapredWork();
        ((MapredWork)work).setMapWork(cplan);
      }
    }
    // use CombineHiveInputFormat for map-only merging
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work,
        fsInputDesc.getFinalDirName().toString());

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

    TableDesc tblDesc = fsDesc.getTableInfo();
    aliases.add(inputDir); // dummy alias: just use the input path


    // constructing the default MapredWork
    MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
    MapWork cplan = cMrPlan.getMapWork();
    cplan.getPathToAliases().put(inputDir, aliases);
    cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null));
    cplan.getAliasToWork().put(inputDir, topOp);
    cplan.setMapperCannotSpanPartns(true);


    return cplan;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.MapWork

    assertEquals(work.getAllWork().size(),1);


    BaseWork w = work.getAllWork().get(0);
    assertTrue(w instanceof MapWork);


    MapWork mw = (MapWork)w;


    // need to make sure names are set for tez to connect things right
    assertNotNull(w.getName());


    // map work should start with our ts op
    assertSame(mw.getAliasToWork().entrySet().iterator().next().getValue(),ts);


    // preceeding work must be set to the newly generated map
    assertSame(ctx.preceedingWork, mw);


    // should have a new root now

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.