Package org.apache.hadoop.zebra.mapred.ArticleGenerator

Examples of org.apache.hadoop.zebra.mapred.ArticleGenerator.Summary


    LOG.info("Cleaning directory: " + batchName);
    fileSys.delete(batchDir, true);
    LOG.info("Generating input files: " + batchName);
    articalGen.batchArticalCreation(fileSys, new Path(srcPath, batchName),
        "doc-", options.srcFiles, options.srcFileLen);
    Summary s = articalGen.getSummary();
    // dumpSummary(s);
    long tmp = 0;
    for (Iterator<Long> it = s.wordCntDist.values().iterator(); it.hasNext(); tmp += it
        .next())
      ;
View Full Code Here


   * the ArticleGenerator.
   *
   * @throws IOException
   */
  void verifyWordCount() throws IOException, ParseException {
    Summary expected = new Summary();
    for (Iterator<Summary> it = summary.values().iterator(); it.hasNext();) {
      Summary e = it.next();
      // dumpSummary(e);
      reduce(expected, e);
    }
    // LOG.info("Dumping aggregated Summary");
    // dumpSummary(expected);

    Summary actual = new Summary();
    BasicTable.Reader reader = new BasicTable.Reader(invIndexTablePath, conf);
    reader.setProjection("count");
    TableScanner scanner = reader.getScanner(null, true);
    Tuple tuple = TypesUtils.createTuple(Projection.toSchema(scanner
        .getProjection()));
View Full Code Here

    LOG.info("Cleaning directory: " + batchName);
    fileSys.delete(batchDir, true);
    LOG.info("Generating input files: " + batchName);
    articalGen.batchArticalCreation(fileSys, new Path(srcPath, batchName),
        "doc-", options.srcFiles, options.srcFileLen);
    Summary s = articalGen.getSummary();
    // dumpSummary(s);
    long tmp = 0;
    for (Iterator<Long> it = s.wordCntDist.values().iterator(); it.hasNext(); tmp += it
        .next())
      ;
View Full Code Here

   * the ArticleGenerator.
   *
   * @throws IOException
   */
  void verifyWordCount() throws IOException, ParseException {
    Summary expected = new Summary();
    for (Iterator<Summary> it = summary.values().iterator(); it.hasNext();) {
      Summary e = it.next();
      // dumpSummary(e);
      reduce(expected, e);
    }
    // LOG.info("Dumping aggregated Summary");
    // dumpSummary(expected);

    Summary actual = new Summary();
    BasicTable.Reader reader = new BasicTable.Reader(invIndexTablePath, conf);
    reader.setProjection("count");
    TableScanner scanner = reader.getScanner(null, true);
    Tuple tuple = TypesUtils.createTuple(Projection.toSchema(scanner
        .getProjection()));
View Full Code Here

    LOG.info("Cleaning directory: " + batchName);
    fileSys.delete(batchDir, true);
    LOG.info("Generating input files: " + batchName);
    articalGen.batchArticalCreation(fileSys, new Path(srcPath, batchName),
        "doc-", options.srcFiles, options.srcFileLen);
    Summary s = articalGen.getSummary();
    // dumpSummary(s);
    long tmp = 0;
    for (Iterator<Long> it = s.wordCntDist.values().iterator(); it.hasNext(); tmp += it
        .next())
      ;
View Full Code Here

   * the ArticleGenerator.
   *
   * @throws IOException
   */
  void verifyWordCount() throws IOException, ParseException {
    Summary expected = new Summary();
    for (Iterator<Summary> it = summary.values().iterator(); it.hasNext();) {
      Summary e = it.next();
      // dumpSummary(e);
      reduce(expected, e);
    }
    // LOG.info("Dumping aggregated Summary");
    // dumpSummary(expected);

    Summary actual = new Summary();
    BasicTable.Reader reader = new BasicTable.Reader(invIndexTablePath, conf);
    reader.setProjection("count");
    TableScanner scanner = reader.getScanner(null, true);
    Tuple tuple = TypesUtils.createTuple(Projection.toSchema(scanner
        .getProjection()));
View Full Code Here

TOP

Related Classes of org.apache.hadoop.zebra.mapred.ArticleGenerator.Summary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.