Examples of org.apache.tez.dag.api.Vertex.addDataSource()

org.apache.tez.dag.api.Vertex.addDataSource()
Specifies an external data source for a Vertex. This is meant to be used when a Vertex reads Input directly from an external source
For vertices which read data generated by another vertex - use the {@link DAG addEdge} method.If a vertex needs to use data generated by another vertex in the DAG and also from an external source, a combination of this API and the DAG.addEdge API can be used.
Note: If more than one RootInput exists on a vertex, which generates events which need to be routed, or generates information to set parallelism, a custom vertex manager should be setup to handle this. Not using a custom vertex manager for such a scenario will lead to a runtime failure. @param inputName the name of the input. This will be used when accessing the input in the {@link LogicalIOProcessor} @param dataSourceDescriptor the @{link DataSourceDescriptor} for this input. @return this Vertex

        for (POLoad ld : tezOp.getLoaderInfo().getLoads()) {


            // TODO: These should get the globalConf, or a merged version that
            // keeps settings like pig.maxCombinedSplitSize
            vertex.setLocationHint(VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints()));
            vertex.addDataSource(ld.getOperatorKey().toString(),
                    DataSourceDescriptor.create(InputDescriptor.create(MRInput.class.getName())
                          .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                          .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf))
                          .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build().toByteString().asReadOnlyByteBuffer())),
                    InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()), dag.getCredentials()));

View Full Code Here

    UserPayload vertexUserPayload = TezUtils.createUserPayloadFromConf(stageConf);
    Vertex vertex = Vertex.create(vertexName,
        ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload),
        numTasks, taskResource);
    if (isMap) {
      vertex.addDataSource("MRInput",
          configureMRInputWithLegacySplitsGenerated(stageConf, true));
    }
    // Map only jobs.
    if (stageNum == totalStages -1) {
      OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())

View Full Code Here

    }


    Vertex stage1Vertex = Vertex.create("map", ProcessorDescriptor.create(
            MapProcessor.class.getName()).setUserPayload(stage1Payload),
        dsd.getNumberOfShards(), Resource.newInstance(256, 1));
    stage1Vertex.addDataSource("MRInput", dsd);
    Vertex stage2Vertex = Vertex.create("ireduce", ProcessorDescriptor.create(
            ReduceProcessor.class.getName()).setUserPayload(stage2Payload),
        1, Resource.newInstance(256, 1));
    Vertex stage3Vertex = Vertex.create("reduce", ProcessorDescriptor.create(
            ReduceProcessor.class.getName()).setUserPayload(stage3Payload),

View Full Code Here

    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
        TextOutputFormat.class, outputPath).build();


    Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
        TokenProcessor.class.getName()));
    tokenizerVertex.addDataSource(INPUT, dataSource);


    // Use Text key and IntWritable value to bring counts for each word in the same partition
    OrderedPartitionedKVEdgeConfig summationEdgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName()).build();

View Full Code Here


    Vertex mapVertex = Vertex.create("initialmap", ProcessorDescriptor.create(
        MapProcessor.class.getName()).setUserPayload(
        TezUtils.createUserPayloadFromConf(mapStageConf))
        .setHistoryText(mapStageHistoryText)).addTaskLocalFiles(commonLocalResources);
    mapVertex.addDataSource("MRInput", dsd);
    vertices.add(mapVertex);


    ByteArrayOutputStream iROutputStream = new ByteArrayOutputStream(4096);
    iReduceStageConf.writeXml(iROutputStream);
    String iReduceStageHistoryText = new String(iROutputStream.toByteArray(), "UTF-8");

View Full Code Here

    int numTasks = generateSplitsInAM ? -1 : numMapper;


    Vertex mapVertex = Vertex.create("map", ProcessorDescriptor.create(
        MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks)
        .addTaskLocalFiles(commonLocalResources);
    mapVertex.addDataSource("MRInput", dataSource);
    vertices.add(mapVertex);


    if (iReduceStagesCount > 0
        && numIReducer > 0) {
      for (int i = 0; i < iReduceStagesCount; ++i) {

View Full Code Here

      dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
      dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath)
          .groupSplits(false).build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);


    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(
        FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils
        .createUserPayloadFromConf(stage2Conf)), dsd.getNumberOfShards());

View Full Code Here

      dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
      dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath)
          .groupSplits(false).build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);


    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(
        FilterByWordOutputProcessor.class.getName()).setUserPayload(
        TezUtils.createUserPayloadFromConf(stage2Conf)), 1);

View Full Code Here


        mapWork = (MapWork) (mapWorkList.get(i));
        conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
        conf.set(Utilities.INPUT_NAME, mapWork.getName());
        LOG.info("Going through each work and adding MultiMRInput");
        mergeVx.addDataSource(mapWork.getName(),
            MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
      }


      VertexManagerPluginDescriptor desc =
        VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());

View Full Code Here


    assert mapWork.getAliasToWork().keySet().size() == 1;


    // Add the actual source input
    String alias = mapWork.getAliasToWork().keySet().iterator().next();
    map.addDataSource(alias, dataSource);


    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr: additionalLr) {
      localResources.put(getBaseName(lr), lr);

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.