Package org.archive.util

Examples of org.archive.util.Recorder


    }


    protected Recorder getRecorder() throws IOException {
        if (Recorder.getHttpRecorder() == null) {
            Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(),
                    getClass().getName(), 16 * 1024, 512 * 1024);
            Recorder.setHttpRecorder(httpRecorder);
        }
   
        return Recorder.getHttpRecorder();
View Full Code Here


    throws Exception {
        List<TestData> result = new ArrayList<TestData>();
        UURI src = UURIFactory.getInstance("http://www.archive.org/start/");
        CrawlURI euri = new CrawlURI(src, null, null,
            LinkContext.SPECULATIVE_MISC);
        Recorder recorder = createRecorder(content, "UTF-8");
        euri.setContentType("text/xml");
        euri.setRecorder(recorder);
        euri.setContentSize(content.length());
               
        UURI dest = UURIFactory.getInstance(destURI);
View Full Code Here

    protected void innerProcess(final CrawlURI curi) throws InterruptedException {
        // Note begin time
        curi.setFetchBeginTime(System.currentTimeMillis());

        // Get a reference to the HttpRecorder that is set into this ToeThread.
        final Recorder rec = curi.getRecorder();

        // Shall we get a digest on the content downloaded?
        boolean digestContent = getDigestContent();
        String algorithm = null;
        if (digestContent) {
            algorithm = getDigestAlgorithm();
            rec.getRecordedInput().setDigest(algorithm);
        } else {
            // clear
            rec.getRecordedInput().setDigest((MessageDigest)null);
        }

        FetchHTTPRequest req;
        try {
            req = new FetchHTTPRequest(this, curi);
        } catch (URIException e) {
            cleanup(curi, e, e.getMessage(), S_UNFETCHABLE_URI);
            return;
        }
       
        rec.getRecordedInput().setLimits(getMaxLengthBytes(),
                1000l * (long) getTimeoutSeconds(), (long) getMaxFetchKBSec());

        HttpResponse response = null;
        try {
            response = req.execute();
            addResponseContent(response, curi);
        } catch (ClientProtocolException e) {
            failedExecuteCleanup(curi, e);
            return;
        } catch (IOException e) {
            failedExecuteCleanup(curi, e);
            return;
        }
       
        maybeMidfetchAbort(curi, req.request);
       
        long contentLength = -1l;
        Header h = response.getLastHeader("content-length");
        if (h != null && h.getValue().trim().length()>0) {
            contentLength = Long.parseLong(h.getValue());
        }
        try {
            if (!req.request.isAborted()) {
                // Force read-to-end, so that any socket hangs occur here,
                // not in later modules.
                rec.getRecordedInput().readToEndOfContent(contentLength);
            }
        } catch (RecorderTimeoutException ex) {
            doAbort(curi, req.request, TIMER_TRUNC);
        } catch (RecorderLengthExceededException ex) {
            doAbort(curi, req.request, LENGTH_TRUNC);
        } catch (IOException e) {
            cleanup(curi, e, "readFully", S_CONNECT_LOST);
            return;
        } catch (ArrayIndexOutOfBoundsException e) {
            // For weird windows-only ArrayIndex exceptions from native code
            // see http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
            // treating as if it were an IOException
            cleanup(curi, e, "readFully", S_CONNECT_LOST);
            return;
        } finally {
            rec.close();
            // ensure recording has stopped
            rec.closeRecorders();
            // Note completion time
            curi.setFetchCompletedTime(System.currentTimeMillis());
           
            // Set the response charset into the HttpRecord if available.
            setCharacterEncoding(curi, rec, response);
            setSizes(curi, rec);
            setOtherCodings(curi, rec, response);
        }

        if (digestContent) {
            curi.setContentDigest(algorithm,
                rec.getRecordedInput().getDigestValue());
        }

        if (logger.isLoggable(Level.FINE)) {
            logger.fine(((curi.getFetchType() == HTTP_POST) ? "POST" : "GET")
                    + " " + curi.getUURI().toString() + " "
                    + response.getStatusLine().getStatusCode() + " "
                    + rec.getRecordedInput().getSize() + " "
                    + curi.getContentType());
        }

        if (isSuccess(curi) && req.addedCredentials) {
            // Promote the credentials from the CrawlURI to the CrawlServer
            // so they are available for all subsequent CrawlURIs on this
            // server.
            promoteCredentials(curi);
        } else if (response.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
            // 401 is not 'success'.
            handle401(response, curi);
        } else if (response.getStatusLine().getStatusCode() == HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED) {
            // 407 - remember Proxy-Authenticate headers for later use
            kp.put("proxyAuthChallenges",
                    extractChallenges(response, curi, ProxyAuthenticationStrategy.INSTANCE));
        }

        if (rec.getRecordedInput().isOpen()) {
            logger.severe(curi.toString() + " RIS still open. Should have"
                    + " been closed by method release: "
                    + Thread.currentThread().getName());
            try {
                rec.getRecordedInput().close();
            } catch (IOException e) {
                logger.log(Level.SEVERE, "second-chance RIS close failed", e);
            }
        }
    }
View Full Code Here

        CrawlURI testUri = new CrawlURI(testUuri, null, null, LinkContext.NAVLINK_MISC);
        StringBuilder content = new StringBuilder();
        for (String chunk: TEST_CONTENT_CHUNKS) {
            content.append(chunk);
        }
        Recorder recorder = createRecorder(content.toString(), "UTF-8");
        testUri.setContentType("text/html");
        testUri.setRecorder(recorder);
        testUri.setContentSize(content.length());
       
        extractor.process(testUri);
View Full Code Here

        URLConnection conn = new URL(url).openConnection();
        conn.setConnectTimeout(10000);
        conn.setReadTimeout(30000);
        InputStream in = conn.getInputStream();

        Recorder recorder = Recorder.wrapInputStreamWithHttpRecord(
            TestUtils.tmpDir(), this.getClass().getName(), in, null);
        logger.info("got recorder for " + url);

        curi.setContentSize(recorder.getRecordedInput().getSize());
        curi.setContentType("application/x-shockwave-flash");
        curi.setFetchStatus(200);
        curi.setRecorder(recorder);

        return curi;
View Full Code Here

    throws Exception {
        List<TestData> result = new ArrayList<TestData>();
        UURI src = UURIFactory.getInstance("http://www.archive.org/start/");
        CrawlURI euri = new CrawlURI(src, null, null,
                LinkContext.NAVLINK_MISC);
        Recorder recorder = createRecorder(content, "UTF-8");
        euri.setContentType("text/html");
        euri.setRecorder(recorder);
        euri.setContentSize(content.length());
               
        UURI dest = UURIFactory.getInstance(destURI);
View Full Code Here

    protected Collection<TestData> makeData(String content, String destURI)
    throws Exception {
        List<TestData> result = new ArrayList<TestData>();
        UURI src = UURIFactory.getInstance("http://www.archive.org/foo/dummy.js");
        CrawlURI euri = new CrawlURI(src, null, src, LinkContext.NAVLINK_MISC);
        Recorder recorder = createRecorder(content, "UTF-8");
        euri.setContentType("text/javascript");
        euri.setRecorder(recorder);
        euri.setContentSize(content.length());

        if (destURI != null) {
View Full Code Here

    @Override
    protected Collection<TestData> makeData(String content, String uri)
    throws Exception {
        UURI src = UURIFactory.getInstance("http://www.archive.org/start/");
        CrawlURI euri = new CrawlURI(src, null, null, NAVLINK_MISC);
        Recorder recorder = createRecorder(content);
        euri.setContentType("text/css");
        euri.setRecorder(recorder);
        euri.setContentSize(content.length());
       
        // TODO: This test was naively modified to account for the abscense of LINK, but no effort was made to confirm that it is actually testing anything useful
View Full Code Here

        return fetchHttp;
    }

    protected Recorder getRecorder() throws IOException {
        if (Recorder.getHttpRecorder() == null) {
            Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(),
                    getClass().getName(), 16 * 1024, 512 * 1024);
            Recorder.setHttpRecorder(httpRecorder);
        }

        return Recorder.getHttpRecorder();
View Full Code Here

        controller = g.getController();
        serialNumber = sn;
        setPriority(DEFAULT_PRIORITY);
        int outBufferSize = controller.getRecorderOutBufferBytes();
        int inBufferSize = controller.getRecorderInBufferBytes();
        httpRecorder = new Recorder(controller.getScratchDir().getFile(),
            "tt" + sn + "http", outBufferSize, inBufferSize);
        lastFinishTime = System.currentTimeMillis();
    }
View Full Code Here

TOP

Related Classes of org.archive.util.Recorder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.