Package com.crawl.control

Examples of com.crawl.control.Crawler


     * This is the main function it is generating the output collection.
     */
    public String getDoAnalyze() {
        logger.info(this.toString());
        // Create the crawler object
        Crawler aCrawl = Crawler.getInstance();
        String craigslistURL = aCrawl.createUrl(
                this.getCategoryCode(),
                this.getLocationURL(),
                this.getKeyword());
        // Get all items
        crawlResult = aCrawl.crawlWebPages(craigslistURL, 2500);
        logger.debug("aResultColl Size=" + crawlResult.size());
        // Create analyzer object
        analyzer = new AnaCrack();
        // Do the analyzing and return the offers
        analyzerOffers = analyzer.analyse(
View Full Code Here


     */
    public static void main(String[] args) {
        StartMain.initLogger();

        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.SAN_FRANCISCO,
                "Apple");
       
        // 2. Step get all offers
        Collection<CrawlResultPackage> aResultColl =aCrawl.crawlWebPages(aUrl, 5000);

        logger.debug("aResultColl Size=" + aResultColl.size());

        // 3. Create analyzer object
        AnaCrack aAnaCrack = new AnaCrack();
View Full Code Here

  static Logger logger = Logger.getLogger(CrawlTest.class);

  @Test
  public void test_createCrawler() {
    logger.debug("Create Crawler class...");
    Crawler c = Crawler.getInstance();
    assertTrue(c != null);
  }
View Full Code Here

    @Test
    public void test_callGetWebPagesMethod() {
        logger.debug("get 100 apple items");
       
        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);
       
        logger.debug("aCrawlResultColl.size()="+aCrawlResultColl.size());
       
        assertTrue(aCrawlResultColl.size()==100);
   
View Full Code Here

    @Test
    public void test_MatchPattern() {
        logger.debug("Test MatchPattern...");
       
        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);

        String aString=aCrawl.getMatchPattern();
       
        logger.debug("MatchPatter="+aString);
       
        assertTrue(aString!=null);
    }
View Full Code Here

    @Test
    public void test_MatchPatternCompare() {
        logger.debug("Test MatchPattern... compare");

        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);

        String aString=aCrawl.getMatchPattern();
       
        logger.debug("MatchPatter="+aString);
       
        assertTrue(aString.compareTo(".*<a href=\"http://.*.craigslist.*/.*html.*>")==0);
                                     
View Full Code Here

    @Test
    public void test_MatchPatternCompareLength() {
        logger.debug("Test MatchPattern... compare length");
       
        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);

        String aString=aCrawl.getMatchPattern();
       
        logger.info("MatchPatter="+aString+" length="+aString.length());
       
        assertTrue(aString.length()==43);
    }
View Full Code Here

    @Test
    public void test_url1() {
        logger.debug("Test url 1");
       
        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);
       
        logger.info("URL="+aUrl+" length="+aUrl.length());
       
        assertTrue(aUrl != null);
    }
View Full Code Here

    @Test
    public void test_url2() {
        logger.debug("Test url 2...");
       
        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);
       
        logger.info("URL="+aUrl+" length="+aUrl.length());
       
        assertTrue(aUrl.compareTo("http://sfbay.craigslist.org/search/sya/pen?query=Apple&maxAsk=100000&sort=pricedsc&srchType=A&s=") ==0);   
    }
View Full Code Here

    @Test
    public void test_url3() {
        logger.debug("Test url 3...");
       
        // 1. Create the crawler object
        Crawler aCrawl = Crawler.getInstance();

        String aUrl=aCrawl.createUrl(
                CraigslistCategoryEnum.FOR_SALE__COMPUTER,
                CraigslistAreasEnum.PENINSULA,
                "Apple");
              
        Collection<CrawlResultPackage> aCrawlResultColl=aCrawl.crawlWebPages(aUrl, 100);
               
        logger.debug("URL="+aUrl+" length="+aUrl.length());
       
        assertTrue(aUrl.length()==96);   
    }
View Full Code Here

TOP

Related Classes of com.crawl.control.Crawler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.