Examples of ParseImpl


Examples of org.apache.nutch.parse.ParseImpl

              // Score at this stage is 1.0f.
              metaData.set(Nutch.SCORE_KEY, Float.toString(datum.getScore())); // TODO MC
                               
              // WritableComparable outkey = new UTF8(d.urlString);
              WritableComparable outkey = new Text(url);
              Writable outvalue = new FetcherOutput(datum, null, new ParseImpl(parse));                
                   
              // output.collect(outkey, outvalue);
              Text key=Nutchwax.generateWaxKey(outkey, collectionName);
              output.collect(key, outvalue);                       
            }
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

  private Configuration conf;
   
  public Parse getParse(Content content)
  {
//    return new ParseImpl(content.getUrl(),  TODO MC BUG - don't index url as content
    return new ParseImpl("",
      new ParseData(ParseStatus.STATUS_SUCCESS,
      "", new Outlink[0], content.getMetadata()));
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
                                        outlinks, content.getMetadata(),
                                        metadata);
    parseData.setConf(this.conf);
    return new ParseImpl(text, parseData);
    // any filter?
    //return HtmlParseFilters.filter(content, parse, root);
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
      outlinks, content.getMetadata());
    parseData.setConf(this.conf);
   
    return new ParseImpl(text, parseData);
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
                                        outlinks, content.getMetadata(),
                                        metadata);
    parseData.setConf(this.conf);
    return new ParseImpl(text, parseData);
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

          kbPerSecond));
      }
    }

    Writable v = new FetcherOutput(datum, null,
      parse != null ? new ParseImpl(parse) : null);      
    if (collectionType.equals(Global.COLLECTION_TYPE_MULTIPLE)) {
      LOG.info("multiple: "+SqlSearcher.getCollectionNameWithTimestamp(this.collectionName,arcData.getDate())+" "+url);
      output.collect(Nutchwax.generateWaxKey(url,SqlSearcher.getCollectionNameWithTimestamp(this.collectionName,arcData.getDate())), v);
    }
    else {
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

    String class1 = "NonExistingFilter";
    String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
    conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);

    IndexingFilters filters = new IndexingFilters(conf);
    filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
      new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
      "http://www.example.com/"), new CrawlDatum(), new Inlinks());
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

          Integer.toString(metaTags.getRefreshTime()) });
    }
    ParseData parseData = new ParseData(status, title, outlinks, content
        .getMetadata(), nutchMetadata);
    ParseResult parseResult = ParseResult.createParseResult(content
        .getUrl(), new ParseImpl(text, parseData));

    // run filters on parse
    ParseResult filteredParse = this.htmlParseFilters.filter(content,
        parseResult, metaTags, root);
    if (metaTags.getNoCache()) { // not okay to cache
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

    conf.setBoolean("anchorIndexingFilter.deduplicate", true);
    AnchorIndexingFilter filter = new AnchorIndexingFilter();
    filter.setConf(conf);
    Assert.assertNotNull(filter);
    NutchDocument doc = new NutchDocument();
    ParseImpl parse = new ParseImpl("foo bar", new ParseData());
    Inlinks inlinks = new Inlinks();
    inlinks.add(new Inlink("http://test1.com/", "text1"));
    inlinks.add(new Inlink("http://test2.com/", "text2"));
    inlinks.add(new Inlink("http://test3.com/", "text2"));
    try {
View Full Code Here

Examples of org.apache.nutch.parse.ParseImpl

    final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
                                              resultTitle, outlinks,
                                              content.getMetadata());

    if (LOG.isTraceEnabled()) { LOG.trace("Zip file parsed sucessfully !!"); }
    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(resultText, parseData));
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.