Package com.datasalt.pangool.tuplemr

Examples of com.datasalt.pangool.tuplemr.IdentityTupleMapper


    // Then we need to sort by topic and count (DESC) -> This way we will receive the most relevant words first.
    builder.setOrderBy(new OrderBy().add("topic", Order.ASC).add("count", Order.DESC));
    // Note that we are changing the grouping logic in the job configuration,
    // However, as we work with tuples, we don't need to write specific code for grouping the same data differently,
    // Therefore an IdentityTupleMapper is sufficient for this Job.
    builder.addTupleInput(new Path(args[0]), new IdentityTupleMapper()); // Note the use of "addTupleInput"
    /*
     * TODO Add Combiner as same Reducer when possible
     */
    builder.setTupleOutput(new Path(args[1]), TopicalWordCount.getSchema());
    builder.addNamedTupleOutput(OUTPUT_TOTALCOUNT, getOutputCountSchema());
View Full Code Here


    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Url Resolution");
    mr.addIntermediateSchema(getURLMapSchema());
    mr.addIntermediateSchema(getURLRegisterSchema());
    mr.addInput(new Path(input1), new TupleTextInputFormat(getURLMapSchema(), false, false, '\t',
        NO_QUOTE_CHARACTER, NO_ESCAPE_CHARACTER, null, null), new IdentityTupleMapper());
    mr.addInput(new Path(input2), new TupleTextInputFormat(getURLRegisterSchema(), false, false, '\t',
        NO_QUOTE_CHARACTER, NO_ESCAPE_CHARACTER, null, null), new IdentityTupleMapper());
    mr.setFieldAliases("urlMap", new Aliases().add("url", "nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("urlRegister", new OrderBy().add("timestamp", Order.ASC));
    mr.setTupleReducer(new Handler());
View Full Code Here

    // Then we need to sort by topic and count (DESC) -> This way we will receive the most relevant words first.
    builder.setOrderBy(new OrderBy().add("topic", Order.ASC).add("count", Order.DESC));
    // Note that we are changing the grouping logic in the job configuration,
    // However, as we work with tuples, we don't need to write specific code for grouping the same data differently,
    // Therefore an IdentityTupleMapper is sufficient for this Job.
    builder.addTupleInput(new Path(args[0]), new IdentityTupleMapper()); // Note the use of "addTupleInput"
    /*
     * TODO Add Combiner as same Reducer when possible
     */
    builder.setTupleOutput(new Path(args[1]), TopicalWordCount.getSchema());
    builder.addNamedTupleOutput(OUTPUT_TOTALCOUNT, getOutputCountSchema());
View Full Code Here

    // Then we need to sort by topic and count (DESC) -> This way we will receive the most relevant words first.
    builder.setOrderBy(new OrderBy().add("topic", Order.ASC).add("count", Order.DESC));
    // Note that we are changing the grouping logic in the job configuration,
    // However, as we work with tuples, we don't need to write specific code for grouping the same data differently,
    // Therefore an IdentityTupleMapper is sufficient for this Job.
    builder.addTupleInput(new Path(args[0]), new IdentityTupleMapper()); // Note the use of "addTupleInput"

    builder.setTupleOutput(new Path(args[1]), TopicalWordCount.getSchema());
    builder.addNamedTupleOutput(OUTPUT_TOTALCOUNT, getOutputCountSchema());
    builder.setTupleCombiner(new TopNWords(n));
    builder.setTupleReducer(new TopNWords(n));
View Full Code Here

        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, '\t',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);

    Job job = builder.createJob();
    try {
View Full Code Here

     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ',', '"', '\\',
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ',', '"', '\\');

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
    try {
      Job job = builder.createJob();
      assertRun(job);
View Full Code Here

        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, true, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
    Job job = builder.createJob();
    try {
      assertRun(job);
View Full Code Here

        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER, selector,
        TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
    Job job = builder.createJob();
    try {
      assertRun(job);
View Full Code Here

    /*
     * Define the Input Format and the Output Format!
     */

    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ',', '"', '\\', null, null);
    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setTupleOutput(outPath, schema);
    Job job = builder.createJob();
    try {
      assertRun(job);
View Full Code Here

    InputFormat inputFormat = new TupleTextInputFormat(schema, fieldsPos, false, null);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
    Job job = builder.createJob();
    try {
      assertRun(job);
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.IdentityTupleMapper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.