Package spiderman.plugin.util

Examples of spiderman.plugin.util.PageFetcherImpl$GzipDecompressingEntity


  public void destroy() {
  }
 
  public static void main(String[] args){
    PageFetcherImpl fetcher = new PageFetcherImpl();
    SpiderConfig config = new SpiderConfig();
    config.setCharset("utf-8");
    config.setPolitenessDelay(200);
    fetcher.setConfig(config);
    fetcher.init(null);
    try {
      String url = "http://www.livingsocial.com/cities/1964-klang-valley-kuala-lumpur/deals/638602-patong-bay-resotel-return-flight?append_ref_code=source_cities_show";
      FetchRequest req = new FetchRequest();
      req.setUrl(url);
      FetchResult rs = fetcher.fetch(req);
      System.out.println(rs);
      System.out.println(rs.getPage().getContent());
    } catch (Exception e) {
      e.printStackTrace();
    }
View Full Code Here


  }
 
  public FetchResult fetch(Task task, FetchResult result) throws Exception {
    synchronized (site) {
      if (site.fetcher == null){
        PageFetcherImpl fetcher = new PageFetcherImpl();
        SpiderConfig config = new SpiderConfig();
        if (task.site.getCharset() != null && task.site.getCharset().trim().length() > 0)
          config.setCharset(task.site.getCharset());
        if (task.site.getUserAgent() != null && task.site.getUserAgent().trim().length() > 0)
          config.setUserAgentString(task.site.getUserAgent());
        if (task.site.getIncludeHttps() != null && task.site.getIncludeHttps().trim().length() > 0)
          config.setIncludeHttpsPages("1".equals(task.site.getIncludeHttps()) || "true".equals(task.site.getIncludeHttps()));
        String sdelay = task.site.getReqDelay();
        if (sdelay == null || sdelay.trim().length() == 0)
          sdelay = "200";
       
        int delay = CommonUtil.toSeconds(sdelay).intValue()*1000;
        if (delay < 0)
          delay = 200;
       
        config.setPolitenessDelay(delay);
        fetcher.setConfig(config);
       
        fetcher.init(site);
        site.fetcher = fetcher;
      }
     
      String url = task.url.replace(" ", "%20");
     
View Full Code Here

TOP

Related Classes of spiderman.plugin.util.PageFetcherImpl$GzipDecompressingEntity

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.