Package org.archive.util

Examples of org.archive.util.Recorder


        StringBuilder content = new StringBuilder();
        String line = "";
        while ((line = reader.readLine()) != null) {
            content.append(line);
        }
        Recorder recorder = createRecorder(content.toString(), "UTF-8");
        IOUtils.closeQuietly(is);

        testUri.setContentType("text/html");
        testUri.setFetchStatus(200);
        testUri.setRecorder(recorder);
View Full Code Here


        }
    }

    protected void fetch(CrawlURI curi, String whoisServer, String whoisQuery) {
        WhoisClient client = new WhoisClient();
        Recorder recorder = curi.getRecorder();
       
        try {
            client.setConnectTimeout(getSoTimeoutMs());
            client.setDefaultTimeout(getSoTimeoutMs());
           
            if (curi.getUURI().getPort() > 0) {
                client.connect(whoisServer, curi.getUURI().getPort());
            } else {
                client.connect(whoisServer);
            }

            client.setSoTimeout(getSoTimeoutMs()); // must be after connect()
           
            curi.getData().put(CoreAttributeConstants.A_WHOIS_SERVER_IP,
                    client.getRemoteAddress().getHostAddress());
           
            recorder.inputWrap(client.getInputStream(whoisQuery));

            // look for info about whois server in the response
            // XXX run regex on the whole thing, rather than line by line?
            BufferedReader reader = new BufferedReader(new InputStreamReader(recorder.getRecordedInput(), "ASCII"));
            for (String line = reader.readLine(); line != null; line = reader.readLine()) {
                Matcher matcher = TextUtils.getMatcher(WHOIS_SERVER_REGEX, line);
                if (matcher.find()) {
                    // gets rid of "domain " for whois.verisign-grs.com queries
                    String key = whoisQuery.replaceFirst("(\\S+\\s+)+", "").toLowerCase();
                    referralServers.put(key, matcher.group(1).toLowerCase());
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("added referral server " + matcher.group(1) + " to server list for " + key);
                    }
                }
            }

            curi.setContentType("text/plain");
            curi.setFetchStatus(S_WHOIS_SUCCESS);
        } catch (IOException e) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("failed to connect to whois server for uri " + curi + ": " + e);
            }
            curi.getNonFatalFailures().add(e);
            curi.setFetchStatus(S_CONNECT_FAILED);
        } finally {
            recorder.close();
            curi.setContentSize(recorder.getRecordedInput().getSize());
            logger.fine(curi + ": " + recorder.getRecordedInput().getSize() + " bytes read");

            if (client != null && client.isConnected()) try {
                client.disconnect();
            } catch (IOException e) {
                logger.fine("problem closing connection to whois server for uri " + curi + ": " + e);
View Full Code Here

    // to track if we got a response (any response) or an exception.
    boolean gotUrl = false;
    boolean isTimeout = false;
    String fName = backingFileBase + "-" + Thread.currentThread().getId();
    Recorder recorder = new Recorder(recorderCacheDir,fName,
        outBufferSize, inBufferSize);
   
    ExtendedGetMethod getMethod = null;

    // TWO STEPS:
    // first do the GET, using a Recorder to get the response.
    // then, if that worked, save the recorded value into an ARC
        //     and return it's region
    // if we didn't get a response, forge a fake record and return that.
    try {
      Recorder.setHttpRecorder(recorder);
      LaxURI lURI = new LaxURI(url,true);
      getMethod = new ExtendedGetMethod(url,recorder);
      getMethod.setURI(lURI);
      HttpClient client = getHttpClient();
      getMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
      getMethod.setFollowRedirects(false);
      getMethod.setRequestHeader("User-Agent", userAgent);
      int code = client.executeMethod(getMethod);
      LOGGER.info("URL(" + url + ") HTTP:" + code);
      InputStream responseIS = getMethod.getResponseBodyAsStream();
      if(responseIS != null) {
        ByteOp.discardStream(responseIS);
        responseIS.close();
      }
      gotUrl = true;

    } catch (URIException e) {
      e.printStackTrace();
    } catch (UnknownHostException e) {
      LOGGER.warning("Unknown host for " + url);

    } catch (ConnectTimeoutException e) {
      // TODO: should we act like it's a full block?
      LOGGER.warning("Timeout out connecting to " + url);
      isTimeout = true;
    } catch(SocketTimeoutException e) {
      LOGGER.warning("Timeout out socket for " + url);
      isTimeout = true;
    } catch (ConnectException e) {
      LOGGER.warning("ConnectionRefused to " + url);
    } catch (NoRouteToHostException e) {
      LOGGER.warning("NoRouteToHost for " + url);
    } catch (SocketException e) {
      // should only be things like "Connection Reset", etc..
      LOGGER.warning("SocketException for " + url);   
    } catch (HttpException e) {
      e.printStackTrace();
      // we have to let IOExceptions out, problems caused by local disk
      // NEED to return errors, indicating that there is not an
      // authoritative answer, and thus... NOTHING can be shown.
//    } catch (IOException e) {
//      e.printStackTrace();
    } finally {
      recorder.closeRecorders();
      Recorder.setHttpRecorder(null);
      if(getMethod != null) {
        getMethod.releaseConnection();
      }
    }

    // now write the content, or a fake record:
    ARCWriter writer = null;
    ReplayInputStream replayIS = null;
    try {
      writer = cache.getWriter();
      if(gotUrl) {

        RecordingInputStream ris = recorder.getRecordedInput();
        replayIS = ris.getReplayInputStream();
        region = storeInputStreamARCRecord(writer, url,
            getMethod.getMime(), getMethod.getRemoteIP(),
            getMethod.getCaptureDate(),
            replayIS, (int) ris.getSize());
      } else if(isTimeout) {
        region = storeTimeout(writer,url);
      } else {
        region = storeNotAvailable(writer, url);
      }

    } finally {
      IOUtils.closeQuietly(replayIS);
      if(writer != null) {
        cache.returnWriter(writer);
      }
    }
    recorder.close();

    return region;
  }
View Full Code Here

TOP

Related Classes of org.archive.util.Recorder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.