Examples of PointInFormat

eu.stratosphere.test.recordJobs.kmeans.udfs.PointInFormat
Generates records with an id and a and CoordVector. The input format is line-based, i.e. one record is read from one line which is terminated by '\n'. Within a line the first '|' character separates the id from the the CoordVector. The vector consists of a vector of decimals. The decimals are separated by '|' as well. The id is the id of a data point or cluster center and the CoordVector the corresponding position (coordinate vector) of the data point or cluster center. Example line: "42|23.23|52.57|74.43| Id: 42 Coordinate vector: (23.23, 52.57, 74.43)
org.apache.flink.test.recordJobs.kmeans.udfs.PointInFormat
Generates records with an id and a and CoordVector. The input format is line-based, i.e. one record is read from one line which is terminated by '\n'. Within a line the first '|' character separates the id from the the CoordVector. The vector consists of a vector of decimals. The decimals are separated by '|' as well. The id is the id of a data point or cluster center and the CoordVector the corresponding position (coordinate vector) of the data point or cluster center. Example line: "42|23.23|52.57|74.43| Id: 42 Coordinate vector: (23.23, 52.57, 74.43)

Examples of eu.stratosphere.test.recordJobs.kmeans.udfs.PointInFormat

    // 1. VERTICES
    // --------------------------------------------------------------------------------------------------------------


    // - input -----------------------------------------------------------------------------------------------------
    JobInputVertex input = JobGraphUtils.createInput(
      new PointInFormat(), inputPath, "Input", jobGraph, numSubTasks, numSubTasks);
    TaskConfig inputConfig = new TaskConfig(input.getConfiguration());
    {
      inputConfig.setOutputSerializer(serializer);
      inputConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    }

View Full Code Here

Examples of eu.stratosphere.test.recordJobs.kmeans.udfs.PointInFormat


    return toParameterList(config1);
  }
  
  private static Plan getPlan(int numSubTasks, String input, String output) {
    FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
    initialInput.setDegreeOfParallelism(1);
    
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(2);

View Full Code Here

Examples of eu.stratosphere.test.recordJobs.kmeans.udfs.PointInFormat

    final String clusterInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);


    // create DataSourceContract for cluster center input
    FileDataSource initialClusterPoints = new FileDataSource(new PointInFormat(), clusterInput, "Centers");
    initialClusterPoints.setDegreeOfParallelism(1);
    
    BulkIteration iteration = new BulkIteration("K-Means Loop");
    iteration.setInput(initialClusterPoints);
    iteration.setMaximumNumberOfIterations(numIterations);
    
    // create DataSourceContract for data point input
    FileDataSource dataPoints = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points");


    // create CrossOperator for distance computation
    CrossOperator computeDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints)
        .input2(iteration.getPartialSolution())
        .name("Compute Distances")
        .build();


    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
        .input(computeDistance)
        .name("Find Nearest Centers")
        .build();


    // create ReduceOperator for computing new cluster positions
    ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
        .input(findNearestClusterCenters)
        .name("Recompute Center Positions")
        .build();
    iteration.setNextPartialSolution(recomputeClusterCenter);
    
    // create DataSourceContract for data point input
    FileDataSource dataPoints2 = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points 2");
    
    // compute distance of points to final clusters 
    CrossOperator computeFinalDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints2)
        .input2(iteration)

View Full Code Here

Examples of eu.stratosphere.test.recordJobs.kmeans.udfs.PointInFormat

        }
    }


    static Plan getTestPlan(int numSubTasks, String input, String output) {


        FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
        initialInput.setDegreeOfParallelism(1);


        BulkIteration iteration = new BulkIteration("Loop");
        iteration.setInput(initialInput);
        iteration.setMaximumNumberOfIterations(2);

View Full Code Here

Examples of org.apache.flink.test.recordJobs.kmeans.udfs.PointInFormat


  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Record> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);
    
    IterativeDataSet<Record> iteration = initialInput.iterate(2);
    
    DataSet<Record> result = iteration.union(iteration).map(new IdentityMapper());

View Full Code Here

Examples of org.apache.flink.test.recordJobs.kmeans.udfs.PointInFormat

    // 1. VERTICES
    // --------------------------------------------------------------------------------------------------------------


    // - input -----------------------------------------------------------------------------------------------------
    InputFormatVertex input = JobGraphUtils.createInput(
      new PointInFormat(), inputPath, "Input", jobGraph, numSubTasks);
    TaskConfig inputConfig = new TaskConfig(input.getConfiguration());
    {
      inputConfig.setOutputSerializer(serializer);
      inputConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    }

View Full Code Here

Examples of org.apache.flink.test.recordJobs.kmeans.udfs.PointInFormat

    }
  }


  static Plan getTestPlan(int numSubTasks, String input, String output) {


    FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
    initialInput.setDegreeOfParallelism(1);


    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(2);

View Full Code Here

Examples of org.apache.flink.test.recordJobs.kmeans.udfs.PointInFormat

    final String clusterInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);


    // create DataSourceContract for cluster center input
    FileDataSource initialClusterPoints = new FileDataSource(new PointInFormat(), clusterInput, "Centers");
    initialClusterPoints.setDegreeOfParallelism(1);
    
    BulkIteration iteration = new BulkIteration("K-Means Loop");
    iteration.setInput(initialClusterPoints);
    iteration.setMaximumNumberOfIterations(numIterations);
    
    // create DataSourceContract for data point input
    FileDataSource dataPoints = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points");


    // create CrossOperator for distance computation
    CrossOperator computeDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints)
        .input2(iteration.getPartialSolution())
        .name("Compute Distances")
        .build();


    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
        .input(computeDistance)
        .name("Find Nearest Centers")
        .build();


    // create ReduceOperator for computing new cluster positions
    ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
        .input(findNearestClusterCenters)
        .name("Recompute Center Positions")
        .build();
    iteration.setNextPartialSolution(recomputeClusterCenter);
    
    // create DataSourceContract for data point input
    FileDataSource dataPoints2 = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points 2");
    
    // compute distance of points to final clusters 
    CrossOperator computeFinalDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints2)
        .input2(iteration)

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.