Package edu.uci.ics.crawler4j.crawler

Examples of edu.uci.ics.crawler4j.crawler.CrawlController.start()


    // controller.setProxy("proxyserver.example.com", 8080, username,
    // password);

    MyImageCrawler.configure(crawlDomains, storageFolder);

    controller.start(MyImageCrawler.class, numberOfCrawlers);
  }

}
View Full Code Here


      /*
       * Start the crawl. This is a blocking operation, meaning
       * that your code will reach the line after this only when
       * crawling is finished.
       */
      controller.start(MyCrawler.class, numberOfCrawlers);
    }

}

View Full Code Here

      String rootFolder = args[0];
      int numberOfCrawlers = Integer.parseInt(args[1]);
     
      CrawlController controller = new CrawlController(rootFolder);   
      controller.addSeed("http://www.ics.uci.edu/");
      controller.start(MyCrawler.class, numberOfCrawlers)
     
      List<Object> crawlersLocalData = controller.getCrawlersLocalData();
      long totalLinks = 0;
      long totalTextSize = 0;
      int totalProcessedPages = 0;
View Full Code Here

    /*
     * Start the crawl. This is a blocking operation, meaning that your code
     * will reach the line after this only when crawling is finished.
     */
    controller.start(StatusHandlerCrawler.class, numberOfCrawlers);
  }
}
View Full Code Here

      controller.addSeed(domain);
    }

    ImageCrawler.configure(crawlDomains, storageFolder);

    controller.start(ImageCrawler.class, numberOfCrawlers);
  }

}
View Full Code Here

    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
    CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);

    controller.addSeed("http://www.ics.uci.edu/");
    controller.start(LocalDataCollectorCrawler.class, numberOfCrawlers);

    List<Object> crawlersLocalData = controller.getCrawlersLocalData();
    long totalLinks = 0;
    long totalTextSize = 0;
    int totalProcessedPages = 0;
View Full Code Here

    /*
     * Start the crawl. This is a blocking operation, meaning that your code
     * will reach the line after this only when crawling is finished.
     */
    controller.start(BasicCrawler.class, numberOfCrawlers);
  }
}
View Full Code Here

      controller.addSeed("http://zookeeper.apache.org/");
      controller.setPolitenessDelay(1000);
      controller.setMaximumCrawlDepth(2);
      controller.setMaximumPagesToFetch(1);

      controller.start(MyCrawler.class, numberOfCrawlers);
    }


  public static class MyCrawler extends WebCrawler {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.