Package org.apache.flink.api.java.record.operators

Examples of org.apache.flink.api.java.record.operators.FileDataSource


 
  @SuppressWarnings("unchecked")
  public static Plan getPlan(int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations, boolean extraMap) {

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    // the loop takes the vertices as the solution set and changed vertices as the workset
    // initially, all vertices are changed
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // data source for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // join workset (changed vertices) with the edges to propagate changes to neighbors
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here


    final String edgeInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    // the loop takes the vertices as the solution set and changed vertices as the workset
    // initially, all vertices are changed
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // data source for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // join workset (changed vertices) with the edges to propagate changes to neighbors
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

      this.degreeOfParallelism = Integer.parseInt(args[0]);
      this.lineItemInputPath = args[1];
      this.outputPath = args[2];
    }
   
    FileDataSource lineItems =
      new FileDataSource(new IntTupleDataInFormat(), this.lineItemInputPath, "LineItems");
    lineItems.setDegreeOfParallelism(this.degreeOfParallelism);
   
    FileDataSink result =
      new FileDataSink(new StringTupleDataOutFormat(), this.outputPath, "Output");
    result.setDegreeOfParallelism(this.degreeOfParallelism);
   
View Full Code Here

  @Override
  protected Plan getTestJob() {
    String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath;
    String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath;

    FileDataSource input1 = new FileDataSource(
      new ContractITCaseInputFormat(), input1Path);
    DelimitedInputFormat.configureDelimitedFormat(input1)
      .recordDelimiter('\n');
    input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
   
    FileDataSource input2 = new FileDataSource(
        new ContractITCaseInputFormat(), input2Path);
    DelimitedInputFormat.configureDelimitedFormat(input2)
      .recordDelimiter('\n');
    input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
   
    MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
    testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));

    FileDataSink output = new FileDataSink(
View Full Code Here

    return getTestPlanPlan(DOP, dataPath, resultPath);
  }
 
  private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(5);
    Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1);
View Full Code Here

  public Plan getPlan(String... args) {
    int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");

    FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");

    MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
   
    ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
        .name("Count Words").build();
View Full Code Here

   
    int dop = this.config.getInteger("GroupOrderTest#NumSubtasks", 1);
   
    @SuppressWarnings("unchecked")
    CsvInputFormat format = new CsvInputFormat(',', IntValue.class, IntValue.class);
    FileDataSource source = new FileDataSource(format, this.textPath, "Source");
   
    ReduceOperator reducer = ReduceOperator.builder(CheckingReducer.class)
      .keyField(IntValue.class, 0)
      .input(source)
      .name("Ordered Reducer")
View Full Code Here

    }
  }

  @Override
  protected Plan getTestJob() {
    FileDataSource input = new FileDataSource(
        new ContractITCaseInputFormat(), inPath);
    DelimitedInputFormat.configureDelimitedFormat(input)
      .recordDelimiter('\n');
    input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1));

    MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
    testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1));

    FileDataSink output = new FileDataSink(
View Full Code Here

  }

 
  private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(5);
   
View Full Code Here

    final String clusterInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // create DataSourceContract for cluster center input
    FileDataSource initialClusterPoints = new FileDataSource(new PointInFormat(), clusterInput, "Centers");
    initialClusterPoints.setDegreeOfParallelism(1);
   
    BulkIteration iteration = new BulkIteration("K-Means Loop");
    iteration.setInput(initialClusterPoints);
    iteration.setMaximumNumberOfIterations(numIterations);
   
    // create DataSourceContract for data point input
    FileDataSource dataPoints = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points");

    // create CrossOperator for distance computation
    CrossOperator computeDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints)
        .input2(iteration.getPartialSolution())
        .name("Compute Distances")
        .build();

    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
        .input(computeDistance)
        .name("Find Nearest Centers")
        .build();

    // create ReduceOperator for computing new cluster positions
    ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
        .input(findNearestClusterCenters)
        .name("Recompute Center Positions")
        .build();
    iteration.setNextPartialSolution(recomputeClusterCenter);
   
    // create DataSourceContract for data point input
    FileDataSource dataPoints2 = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points 2");
   
    // compute distance of points to final clusters
    CrossOperator computeFinalDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints2)
        .input2(iteration)
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.