Package edu.uci.ics.crawler4j.example.imagecrawler

Source Code of edu.uci.ics.crawler4j.example.imagecrawler.Controller

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package edu.uci.ics.crawler4j.example.imagecrawler;

import edu.uci.ics.crawler4j.crawler.CrawlController;

/**
* @author Yasser Ganjisaffar <yganjisa at uci dot edu>
*/

/*
* IMPORTANT: Make sure that you update crawler4j.properties file and
*            set crawler.include_images to true          
*/

public class Controller {

  public static void main(String[] args) throws Exception {
    if (args.length < 3) {
      System.out.println("Needed parameters: ");
      System.out.println("\t rootFolder (it will contain intermediate crawl data)");
      System.out.println("\t numberOfCralwers (number of concurrent threads)");
      System.out.println("\t storageFolder (a folder for storing downloaded images)");
      return;
    }
    String rootFolder = args[0];
    int numberOfCrawlers = Integer.parseInt(args[1]);
    String storageFolder = args[2];

    String[] crawlDomains = new String[] { "http://uci.edu/" };

    CrawlController controller = new CrawlController(rootFolder);
    for (String domain : crawlDomains) {
      controller.addSeed(domain);
    }

    // Be polite:
    // Make sure that we don't send more than 5 requests per second (200
    // milliseconds between requests).0
    controller.setPolitenessDelay(200);

    // Do you need to set a proxy?
    // If so, you can uncomment the following line
    // controller.setProxy("proxyserver.example.com", 8080);
    // OR
    // controller.setProxy("proxyserver.example.com", 8080, username,
    // password);

    MyImageCrawler.configure(crawlDomains, storageFolder);

    controller.start(MyImageCrawler.class, numberOfCrawlers);
  }

}
TOP

Related Classes of edu.uci.ics.crawler4j.example.imagecrawler.Controller

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.