Examples of org.apache.hadoop.mapred.JobConf.addInputPath()

org.apache.hadoop.mapred.JobConf.addInputPath()
Add a {@link Path} to the list of inputs for the map-reduce job. @param dir {@link Path} to be added to the list of inputs for the map-reduce job.

    Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());


    JobConf job = new NutchJob(config);
    job.setJobName("stats " + crawlDb);


    job.addInputPath(new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);


    job.setMapperClass(CrawlDbStatMapper.class);
    job.setCombinerClass(CrawlDbStatCombiner.class);
    job.setReducerClass(CrawlDbStatReducer.class);

View Full Code Here

    Path outFolder = new Path(output);


    JobConf job = new NutchJob(config);
    job.setJobName("dump " + crawlDb);


    job.addInputPath(new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);


    job.setOutputPath(outFolder);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);

View Full Code Here

               "/readdb-topN-temp-"+
               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));


    JobConf job = new NutchJob(config);
    job.setJobName("topN prepare " + crawlDb);
    job.addInputPath(new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CrawlDbTopNMapper.class);
    job.setReducerClass(IdentityReducer.class);


    job.setOutputPath(tempDir);

View Full Code Here

    }
    job = new NutchJob(config);
    job.setJobName("topN collect " + crawlDb);
    job.setLong("CrawlDbReader.topN", topN);


    job.addInputPath(tempDir);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(CrawlDbTopNReducer.class);


    job.setOutputPath(outFolder);

View Full Code Here

    }


    JobConf job = createJobConf();
    job.setJobName("read " + segment);


    if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
    if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
    if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
    if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
    if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));

View Full Code Here


    JobConf job = createJobConf();
    job.setJobName("read " + segment);


    if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
    if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
    if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
    if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
    if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));

View Full Code Here

    JobConf job = createJobConf();
    job.setJobName("read " + segment);


    if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
    if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
    if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
    if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
    if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));


    job.setInputFormat(SequenceFileInputFormat.class);

View Full Code Here

    job.setJobName("read " + segment);


    if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
    if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
    if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
    if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
    if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));


    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(InputCompatMapper.class);

View Full Code Here


    if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
    if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
    if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
    if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
    if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));


    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(InputCompatMapper.class);
    job.setReducerClass(SegmentReader.class);

View Full Code Here

    if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
    if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
    if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
    if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
    if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));


    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(InputCompatMapper.class);
    job.setReducerClass(SegmentReader.class);

View Full Code Here

0 1 2 3 4 5 6

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.