Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.ProcessorDescriptor


      numTasks = inputSplitInfo.getNumTasks();
    }

    byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf);
    map = new Vertex(mapWork.getName(),
        new ProcessorDescriptor(MapTezProcessor.class.getName()).
        setUserPayload(serializedConf), numTasks, getContainerResource(conf));
    Map<String, String> environment = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(conf, environment, true);
    map.setTaskEnvironment(environment);
    map.setJavaOpts(getContainerJavaOpts(conf));
View Full Code Here


    // Call once here, will be updated when we find edges
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // create the vertex
    Vertex reducer = new Vertex(reduceWork.getName(),
        new ProcessorDescriptor(ReduceTezProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
        reduceWork.getNumReduceTasks(), getContainerResource(conf));

    Map<String, String> environment = new HashMap<String, String>();
View Full Code Here

  public PreWarmContext createPreWarmContext(TezSessionConfiguration sessionConfig, int numContainers,
      Map<String, LocalResource> localResources) throws IOException, TezException {

    Configuration conf = sessionConfig.getTezConfiguration();

    ProcessorDescriptor prewarmProcDescriptor = new ProcessorDescriptor(HivePreWarmProcessor.class.getName());
    prewarmProcDescriptor.setUserPayload(MRHelpers.createUserPayloadFromConf(conf));

    PreWarmContext context = new PreWarmContext(prewarmProcDescriptor, getContainerResource(conf),
        numContainers, new VertexLocationHint(null));

    Map<String, LocalResource> combinedResources = new HashMap<String, LocalResource>();
View Full Code Here

                .serialize(new byte[] { combRearrange.getKeyType() }));
    }

    private Vertex newVertex(TezOperator tezOp) throws IOException,
            ClassNotFoundException, InterruptedException {
        ProcessorDescriptor procDesc = ProcessorDescriptor.create(
                tezOp.getProcessorName());

        // Pass physical plans to vertex as user payload.
        JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false));

        // We do this so that dag.getCredentials(), job.getCredentials(),
        // job.getConfiguration().getCredentials() all reference the same Credentials object
        // Unfortunately there is no setCredentials() on Job
        payloadConf.setCredentials(dag.getCredentials());
        // We won't actually use this job, but we need it to talk with the Load Store funcs
        @SuppressWarnings("deprecation")
        Job job = new Job(payloadConf);
        payloadConf = (JobConf) job.getConfiguration();

        if (tezOp.getSampleOperator() != null) {
            payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.getSampleOperator().getOperatorKey().toString());
        }

        if (tezOp.getSortOperator() != null) {
            // Required by Sample Aggregation job for estimating quantiles
            payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.getSortOperator().getOperatorKey().toString());
            // PIG-4162: Order by/Skew Join in intermediate stage.
            // Increasing order by parallelism may not be required as it is
            // usually followed by limit other than store. But would benefit
            // cases like skewed join followed by group by.
            if (tezOp.getSortOperator().getEstimatedParallelism() != -1
                    && TezCompilerUtil.isIntermediateReducer(tezOp.getSortOperator())) {
                payloadConf.setLong(
                        InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                        intermediateTaskInputSize);
            }

        }

        payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp()));
        payloadConf.set("pig.inpSignatures", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists()));
        payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits()));
        // Process stores
        LinkedList<POStore> stores = processStores(tezOp, payloadConf, job);

        payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc));
        payloadConf.set("udf.import.list",
                ObjectSerializer.serialize(PigContext.getPackageImportList()));
        payloadConf.set("exectype", "TEZ");
        payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
        payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS,
                PigInputFormat.class, InputFormat.class);

        // Set parent plan for all operators in the Tez plan.
        new PhyPlanSetter(tezOp.plan).visit();

        // Set the endOfAllInput flag on the physical plan if certain operators that
        // use this property (such as STREAM) are present in the plan.
        EndOfAllInputSetter.EndOfAllInputChecker checker =
                new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan);
        checker.visit();
        if (checker.isEndOfAllInputPresent()) {
            payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true");
        }

        // Configure the classes for incoming shuffles to this TezOp
        // TODO: Refactor out resetting input keys, PIG-3957
        List<PhysicalOperator> roots = tezOp.plan.getRoots();
        if (roots.size() == 1 && roots.get(0) instanceof POPackage) {
            POPackage pack = (POPackage) roots.get(0);

            List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack);
            if (succsList != null) {
                succsList = new ArrayList<PhysicalOperator>(succsList);
            }
            byte keyType = pack.getPkgr().getKeyType();
            tezOp.plan.remove(pack);
            payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
            setIntermediateOutputKeyValue(keyType, payloadConf, tezOp);
            POShuffleTezLoad newPack = new POShuffleTezLoad(pack);
            if (tezOp.isSkewedJoin()) {
                newPack.setSkewedJoins(true);
            }
            tezOp.plan.add(newPack);

            // Set input keys for POShuffleTezLoad. This is used to identify
            // the inputs that are attached to the POShuffleTezLoad in the
            // backend.
            Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>();
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                if (tezOp.getSampleOperator() != null && tezOp.getSampleOperator() == pred) {
                    // skip sample vertex input
                } else {
                    String inputKey = pred.getOperatorKey().toString();
                    if (pred.isVertexGroup()) {
                        pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0));
                    }
                    LinkedList<POLocalRearrangeTez> lrs =
                            PlanHelper.getPhysicalOperators(pred.plan, POLocalRearrangeTez.class);
                    for (POLocalRearrangeTez lr : lrs) {
                        if (lr.isConnectedToPackage()
                                && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) {
                            localRearrangeMap.put((int) lr.getIndex(), inputKey);
                        }
                    }
                }
            }
            for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) {
                newPack.addInputKey(entry.getValue());
            }

            if (succsList != null) {
                for (PhysicalOperator succs : succsList) {
                    tezOp.plan.connect(newPack, succs);
                }
            }

            setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
        } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
            POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
            // TODO Need to fix multiple input key mapping
            TezOperator identityInOutPred = null;
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                if (!pred.isSampleAggregation()) {
                    identityInOutPred = pred;
                    break;
                }
            }
            identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
        } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
            POValueInputTez valueInput = (POValueInputTez) roots.get(0);

            LinkedList<String> scalarInputs = new LinkedList<String>();
            for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class) ) {
                if (userFunc.getFunc() instanceof ReadScalarsTez) {
                    scalarInputs.add(((ReadScalarsTez)userFunc.getFunc()).getTezInputs()[0]);
                }
            }
            // Make sure we don't find the scalar
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                if (!scalarInputs.contains(pred.getOperatorKey().toString())) {
                    valueInput.setInputKey(pred.getOperatorKey().toString());
                    break;
                }
            }
        }
        setOutputFormat(job);

        // set parent plan in all operators. currently the parent plan is really
        // used only when POStream, POSplit are present in the plan
        new PhyPlanSetter(tezOp.plan).visit();

        // Serialize the execution plan
        payloadConf.set(PigProcessor.PLAN,
                ObjectSerializer.serialize(tezOp.plan));

        UDFContext.getUDFContext().serialize(payloadConf);

        MRToTezHelper.processMRSettings(payloadConf, globalConf);

        if (!pc.inIllustrator) {
            for (POStore store : stores) {
                // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
                store.setInputs(null);
                store.setParentPlan(null);
            }
            // We put them in the reduce because PigOutputCommitter checks the
            // ID of the task to see if it's a map, and if not, calls the reduce
            // committers.
            payloadConf.set(JobControlCompiler.PIG_MAP_STORES,
                    ObjectSerializer.serialize(new ArrayList<POStore>()));
            payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES,
                    ObjectSerializer.serialize(stores));
        }

        if (tezOp.isNeedEstimateParallelism()) {
            payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true);
            log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString());
        }

        // set various parallelism into the job conf for later analysis, PIG-2779
        payloadConf.setInt(PigImplConstants.REDUCER_DEFAULT_PARALLELISM, pc.defaultParallel);
        payloadConf.setInt(PigImplConstants.REDUCER_REQUESTED_PARALLELISM, tezOp.getRequestedParallelism());
        payloadConf.setInt(PigImplConstants.REDUCER_ESTIMATED_PARALLELISM, tezOp.getEstimatedParallelism());

        TezScriptState ss = TezScriptState.get();
        ss.addVertexSettingsToConf(dag.getName(), tezOp, payloadConf);

        // Take our assembled configuration and create a vertex
        UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf);
        procDesc.setUserPayload(userPayload);

        Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(),
                tezOp.isUseMRMapSettings() ? MRHelpers.getResourceForMRMapper(globalConf) : MRHelpers.getResourceForMRReducer(globalConf));

        Map<String, String> taskEnv = new HashMap<String, String>();
View Full Code Here

      TezUmbilical umbilical, String dagName,
      String vertexName, List<InputSpec> inputSpecs,
      List<OutputSpec> outputSpecs) throws Exception {
    jobConf.setInputFormat(SequenceFileInputFormat.class);

    ProcessorDescriptor mapProcessorDesc = ProcessorDescriptor.create(
        MapProcessor.class.getName()).setUserPayload(
        TezUtils.createUserPayloadFromConf(jobConf));
   
    Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>();
View Full Code Here

    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
        "localized-resources").toUri().toString());
    FileOutputFormat.setOutputPath(jobConf, new Path(workDir, "output"));
    ProcessorDescriptor reduceProcessorDesc = ProcessorDescriptor.create(
        ReduceProcessor.class.getName()).setUserPayload(
        TezUtils.createUserPayloadFromConf(jobConf));
   
    InputSpec reduceInputSpec = new InputSpec(mapVertexName,
        InputDescriptor.create(LocalMergedInput.class.getName())
View Full Code Here

    TezDAGID dagId = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexId = TezVertexID.getInstance(dagId, 1);
    TezTaskID taskId = TezTaskID.getInstance(vertexId, 1);
    TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 1);
    ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(processorClass)
        .setUserPayload(UserPayload.create(ByteBuffer.wrap(processorConf)));
    TaskSpec taskSpec = new TaskSpec(taskAttemptId, "dagName", "vertexName", -1, processorDescriptor,
        new ArrayList<InputSpec>(), new ArrayList<OutputSpec>(), null);

    TezTaskRunner taskRunner = new TezTaskRunner(tezConf, ugi, localDirs, taskSpec, umbilical, 1,
View Full Code Here

    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc1);
   
    // Fourth request - processor
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e4ProcessorContext1 = createTestProcessortContext();
    ProcessorDescriptor e4ProcessorDesc1 = createTestProcessorDescriptor();
    dist.requestMemory(5000, e4Callback, e4ProcessorContext1, e4ProcessorDesc1);
   
   
    dist.makeInitialAllocations();
   
View Full Code Here

    doReturn("OutputClass").when(desc).getClassName();
    return desc;
  }

  protected ProcessorDescriptor createTestProcessorDescriptor() {
    ProcessorDescriptor desc = mock(ProcessorDescriptor.class);
    doReturn("ProcessorClass").when(desc).getClassName();
    return desc;
  }
View Full Code Here

  // TODO remove hacky name lookup
  @Override
  public boolean needsWaitAfterOutputConsumable() {
    Vertex vertex = getVertex();
    ProcessorDescriptor processorDescriptor = vertex.getProcessorDescriptor();
    if (processorDescriptor != null &&
        processorDescriptor.getClassName().contains("InitialTaskWithInMemSort")) {
      return true;
    } else {
      return false;
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.ProcessorDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.