Package org.eweb4j.spiderman.fetcher

Examples of org.eweb4j.spiderman.fetcher.FetchRequest


    config.setPolitenessDelay(200);
    fetcher.setConfig(config);
    fetcher.init(null);
    try {
      String url = "http://www.livingsocial.com/cities/1964-klang-valley-kuala-lumpur/deals/638602-patong-bay-resotel-return-flight?append_ref_code=source_cities_show";
      FetchRequest req = new FetchRequest();
      req.setUrl(url);
      FetchResult rs = fetcher.fetch(req);
      System.out.println(rs);
      System.out.println(rs.getPage().getContent());
    } catch (Exception e) {
      e.printStackTrace();
View Full Code Here


        site.fetcher = fetcher;
      }
     
      String url = task.url.replace(" ", "%20");
     
      FetchRequest req = new FetchRequest();
      req.setUrl(url);
     
      FetchResult fr = site.fetcher.fetch(req);
      return fr;
    }
//    return fetch();
View Full Code Here

   
    if (visitedUrls.contains(nextUrl)){
      return ;
    }
   
    FetchRequest req = new FetchRequest();
    req.setUrl(nextUrl);
    FetchResult fr = task.site.fetcher.fetch(req);
    if (fr == null || fr.getPage() == null)
      return ;
   
    //记录已经访问过该url,下次不要重复访问它
View Full Code Here

    }
   
    //解析nextPage,找出里面的目标URL
    Task nextTask = new Task(nextUrl, task.url, task.site, 0);
   
    FetchRequest req = new FetchRequest();
    req.setUrl(nextUrl);
    FetchResult fr = task.site.fetcher.fetch(req);
    if (fr == null || fr.getPage() == null)
      return ;
   
    //记录已经访问过该url,下次不要重复访问它
View Full Code Here

TOP

Related Classes of org.eweb4j.spiderman.fetcher.FetchRequest

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.