new Path("dedup-hash-"+
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
job = new NutchJob(getConf());
job.setJobName("dedup 2: content by hash");
job.addInputPath(outDir1);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapOutputKeyClass(MD5Hash.class);
job.setMapOutputValueClass(IndexDoc.class);
job.setPartitionerClass(HashPartitioner.class);
job.setSpeculativeExecution(false);