Examples of CrawlURIDispositionEvent


Examples of org.archive.crawler.event.CrawlURIDispositionEvent

            long delay_ms = retryDelayFor(curi) * 1000;
            curi.processingCleanup(); // lose state that shouldn't burden retry
            wq.unpeek(curi);
            wq.update(this, curi); // rewrite any changes
            handleQueue(wq,curi.includesRetireDirective(),now,delay_ms);
            appCtx.publishEvent(new CrawlURIDispositionEvent(this,curi,DEFERRED_FOR_RETRY));
            doJournalReenqueued(curi);
            wq.makeDirty();
            return; // no further dequeueing, logging, rescheduling to occur
        }

        // Curi will definitely be disposed of without retry, so remove from queue
        wq.dequeue(this,curi);
        decrementQueuedCount(1);
        largestQueues.update(wq.getClassKey(), wq.getCount());
        log(curi);

       
        if (curi.isSuccess()) {
            // codes deemed 'success'
            incrementSucceededFetchCount();
            totalProcessedBytes.addAndGet(curi.getRecordedSize());
            appCtx.publishEvent(new CrawlURIDispositionEvent(this,curi,SUCCEEDED));
            doJournalFinishedSuccess(curi);
          
        } else if (isDisregarded(curi)) {
            // codes meaning 'undo' (even though URI was enqueued,
            // we now want to disregard it from normal success/failure tallies)
            // (eg robots-excluded, operator-changed-scope, etc)
            incrementDisregardedUriCount();
            appCtx.publishEvent(new CrawlURIDispositionEvent(this,curi,DISREGARDED));
            holderCost = 0; // no charge for disregarded URIs
            // TODO: consider reinstating forget-URI capability, so URI could be
            // re-enqueued if discovered again
            doJournalDisregarded(curi);
           
        } else {
            // codes meaning 'failure'
            incrementFailedFetchCount();
            appCtx.publishEvent(new CrawlURIDispositionEvent(this,curi,FAILED));
            // if exception, also send to crawlErrors
            if (curi.getFetchStatus() == S_RUNTIME_EXCEPTION) {
                Object[] array = { curi };
                loggerModule.getRuntimeErrors().log(Level.WARNING, curi.getUURI()
                        .toString(), array);
View Full Code Here

Examples of org.archive.crawler.event.CrawlURIDispositionEvent

     * @see org.archive.crawler.framework.Frontier#deleted(org.archive.modules.CrawlURI)
     */
    public void deleted(CrawlURI curi) {
        //treat as disregarded
        appCtx.publishEvent(
            new CrawlURIDispositionEvent(this,curi,DISREGARDED));
        log(curi);
        incrementDisregardedUriCount();
        curi.stripToMinimal();
        curi.processingCleanup();
    }
View Full Code Here

Examples of org.archive.crawler.event.CrawlURIDispositionEvent

                    throw new RuntimeException("Unknown state: " + event1.getState());
            }
        }

        if(event instanceof CrawlURIDispositionEvent) {
            CrawlURIDispositionEvent dvent = (CrawlURIDispositionEvent)event;
            switch(dvent.getDisposition()) {
                case SUCCEEDED:
                    this.crawledURISuccessful(dvent.getCrawlURI());
                    break;
                case FAILED:
                    this.crawledURIFailure(dvent.getCrawlURI());
                    break;
                case DISREGARDED:
                    this.crawledURIDisregard(dvent.getCrawlURI());
                    break;
                case DEFERRED_FOR_RETRY:
                    this.crawledURINeedRetry(dvent.getCrawlURI());
                    break;
                default:
                    throw new RuntimeException("Unknown disposition: " + dvent.getDisposition());
            }
        }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.