Package net.yacy.cora.protocol.http

Examples of net.yacy.cora.protocol.http.HTTPClient


        requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
        requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, DEFAULT_LANGUAGE);
        requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
        requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);

        final HTTPClient client = new HTTPClient();
        client.setTimout(20000);
        client.setHeader(requestHeader.entrySet());
          final byte[] responseBody = client.GETbytes(request.url());
          final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
          final int code = client.getHttpResponse().getStatusLine().getStatusCode();
            // FIXME: 30*-handling (bottom) is never reached
            // we always get the final content because httpClient.followRedirects = true

          if (responseBody != null && (code == 200 || code == 203)) {
                // the transfer is ok

            //statistics:
            ByteCount.addAccountCount(ByteCount.CRAWLER, responseBody.length);

                // we write the new cache entry to file system directly

                // create a new cache entry
                response = new Response(
                        request,
                        requestHeader,
                        header,
                        Integer.toString(code),
                        null,
                        responseBody
                );

                return response;
            } else if (code > 299 && code < 310) {
                if (header.containsKey(HeaderFramework.LOCATION)) {
                    // getting redirection URL
                  String redirectionUrlString = header.get(HeaderFramework.LOCATION);
                    redirectionUrlString = redirectionUrlString.trim();

                    if (redirectionUrlString.length() == 0) {
                        throw new IOException("CRAWLER Redirection of URL=" + request.url().toString() + " aborted. Location header is empty.");
                    }

                    // normalizing URL
                    final DigestURI redirectionUrl = new DigestURI(MultiProtocolURI.newURL(request.url(), redirectionUrlString));


                    // if we are already doing a shutdown we don't need to retry crawling
                    if (Thread.currentThread().isInterrupted()) {
                        throw new IOException("CRAWLER Retry of URL=" + request.url().toString() + " aborted because of server shutdown.");
                    }

                    // retry crawling with new url
                    request.redirectURL(redirectionUrl);
                    return load(request, retryCount - 1);
                }
            } else {
                // if the response has not the right response type then reject file
              throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
            }
        return response;
    }
View Full Code Here


        reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());

        final String name = getUrl().getFileName();
        byte[] signatureBytes = null;

        final HTTPClient client = new HTTPClient();
        client.setTimout(6000);
        client.setHeader(reqHeader.entrySet());

        // download signature first, if public key is available
        try {
            if (this.publicKey != null) {
              final byte[] signatureData = client.GETbytes(getUrl().toString() + ".sig");
                if (signatureData == null) {
                    Log.logWarning("yacyVersion", "download of signature " + getUrl().toString() + " failed. ignoring signature file.");
                }
                else signatureBytes = Base64Order.standardCoder.decode(UTF8.String(signatureData).trim());
            }
            client.setTimout(120000);
            client.GET(getUrl().toString());
            final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());

            final boolean unzipped = header.gzip() && (header.mime().toLowerCase().equals("application/x-tar")); // if true, then the httpc has unzipped the file
            if ((unzipped) && (name.endsWith(".tar.gz"))) {
                download = new File(storagePath, name.substring(0, name.length() - 3));
            } else {
                download = new File(storagePath, name);
            }
            if (this.publicKey != null && signatureBytes != null) {
                // copy to file and check signature
                SignatureOutputStream verifyOutput = null;
                try {
                    verifyOutput = new SignatureOutputStream(new FileOutputStream(download), CryptoLib.signAlgorithm, this.publicKey);
                    client.writeTo(new BufferedOutputStream(verifyOutput));

                    if (!verifyOutput.verify(signatureBytes)) throw new IOException("Bad Signature!");
                } catch (final NoSuchAlgorithmException e) {
                    throw new IOException("No such algorithm");
                } catch (final SignatureException e) {
                    throw new IOException("Signature exception");
                } finally {
                    if (verifyOutput != null)
                    verifyOutput.close();
                }
                // Save signature
                final File signatureFile = new File(download.getAbsoluteFile() + ".sig");
                FileUtils.copy(UTF8.getBytes(Base64Order.standardCoder.encode(signatureBytes)), signatureFile);
                if ((!signatureFile.exists()) || (signatureFile.length() == 0)) throw new IOException("create signature file failed");
            } else {
                // just copy into file
                client.writeTo(new BufferedOutputStream(new FileOutputStream(download)));
            }
            if ((!download.exists()) || (download.length() == 0)) throw new IOException("wget of url " + getUrl() + " failed");
        } catch (final IOException e) {
            // Saving file failed, abort download
            Log.logSevere("yacyVersion", "download of " + getName() + " failed: " + e.getMessage());
            if (download != null && download.exists()) {
                FileUtils.deletedelete(download);
                if (download.exists()) Log.logWarning("yacyVersion", "could not delete file "+ download);
            }
            download = null;
        } finally {
          try {
        client.finish();
      } catch (final IOException e) {
        Log.logSevere("yacyVersion", "finish of " + getName() + " failed: " + e.getMessage());
      }
        }
        this.releaseFile = download;
View Full Code Here

        final RequestHeader reqHeader = new RequestHeader();
        reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
        reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
        reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
        final HTTPClient client = new HTTPClient();
        client.setHeader(reqHeader.entrySet());
        client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000));

        Network.log.logInfo("BOOTSTRAP: " + sc + " seeds known from previous run");

        // - use the superseed to further fill up the seedDB
        int ssc = 0, c = 0;
        while (true) {
            if (Thread.currentThread().isInterrupted()) {
                break;
            }
            seedListFileURL = sb.getConfig("network.unit.bootstrap.seedlist" + c, "");
            if (seedListFileURL.length() == 0) {
                break;
            }
            c++;
            if (
                    seedListFileURL.startsWith("http://") ||
                    seedListFileURL.startsWith("https://")
            ) {
                // load the seed list
                try {

                    url = new DigestURI(seedListFileURL);
                    //final long start = System.currentTimeMillis();
                    client.HEADResponse(url.toString());
                    header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
                    //final long loadtime = System.currentTimeMillis() - start;
                    /*if (header == null) {
                        if (loadtime > getConfigLong("bootstrapLoadTimeout", 6000)) {
                            yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, time-out after " + loadtime + " milliseconds");
                        } else {
                            yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, no content");
                        }
                    } else*/ if (header.lastModified() == null) {
                        Network.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not usable, last-modified is missing");
                    } else if ((header.age() > 86400000) && (ssc > 0)) {
                        Network.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
                    } else {
                        ssc++;
                        final byte[] content = client.GETbytes(url);
                        enu = FileUtils.strings(content);
                        lc = 0;
                        while (enu.hasNext()) {
                            try {
                                ys = Seed.genRemoteSeed(enu.next(), null, false, null);
View Full Code Here

     * @return
     */
    public static Map<String, String> loadFileAsMap(final DigestURI url) {
        final RequestHeader reqHeader = new RequestHeader();
        reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
        final HTTPClient client = new HTTPClient();
        client.setHeader(reqHeader.entrySet());
        try {
            // sending request
            final Map<String, String> result = FileUtils.table(client.GETbytes(url));
            return (result == null) ? new HashMap<String, String>() : result;
        } catch (final Exception e) {
            Log.logException(e);
            return new HashMap<String, String>();
        }
View Full Code Here

        final RequestHeader reqHeader = new RequestHeader();
        reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
        reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
        reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());

        final HTTPClient client = new HTTPClient();
        client.setHeader(reqHeader.entrySet());
        byte[] content = null;
        try {
            // send request
          content = client.GETbytes(seedURL);
        } catch (final Exception e) {
          throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage());
        }

        // check response code
        if (client.getHttpResponse().getStatusLine().getStatusCode() != 200) {
          throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine());
        }

        try {
            // uncompress it if it is gzipped
            content = FileUtils.uncompressGZipArray(content);
View Full Code Here

            final byte[] b = client.get(this.path);
            client.CLOSE();
            return new ByteArrayInputStream(b);
        }
        if (isHTTP() || isHTTPS()) {
                final HTTPClient client = new HTTPClient();
                client.setTimout(timeout);
                client.setUserAgent(userAgent);
                client.setHost(getHost());
                return new ByteArrayInputStream(client.GETbytes(this));
        }

        return null;
    }
View Full Code Here

            final byte[] b = client.get(this.path);
            client.CLOSE();
            return b;
        }
        if (isHTTP() || isHTTPS()) {
                final HTTPClient client = new HTTPClient();
                client.setTimout(timeout);
                client.setUserAgent(userAgent);
                client.setHost(getHost());
                return client.GETbytes(this);
        }

        return null;
    }
View Full Code Here

                    log.logSevere("RuntimeException:", e);
                }
            }
            log.logInfo("URLs vorher: " + this.urlIndexFile.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());

            final HTTPClient client = new HTTPClient();
            final Iterator<String> eiter2 = damagedURLS.iterator();
            byte[] urlHashBytes;
            while (eiter2.hasNext()) {
                urlHashBytes = ASCII.getBytes(eiter2.next());

                // trying to fix the invalid URL
                String oldUrlStr = null;
                try {
                    // getting the url data as byte array
                    final Row.Entry entry = this.urlIndexFile.get(urlHashBytes, true);

                    // getting the wrong url string
                    oldUrlStr = entry.getColUTF8(1).trim();

                    int pos = -1;
                    if ((pos = oldUrlStr.indexOf("://",0)) != -1) {
                        // trying to correct the url
                        final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
                        final DigestURI newUrl = new DigestURI(newUrlStr);

                        if (client.HEADResponse(newUrl.toString()) != null
                            && client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
                            entry.setCol(1, UTF8.getBytes(newUrl.toString()));
                            this.urlIndexFile.put(entry);
                            if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
                        } else {
                            remove(urlHashBytes);
                            if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + (client.getHttpResponse() == null ? "null" : client.getHttpResponse().getStatusLine()));
                        }
                    }
                } catch (final Exception e) {
                    remove(urlHashBytes);
                    if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tExecption: " + e.getMessage());
View Full Code Here

     * @param realm authentification realm
     * @return a map of the called urls and the http status code of the api call or -1 if any other IOException occurred
     */
    public Map<String, Integer> execAPICalls(String host, int port, String realm, Collection<String> pks) {
        // now call the api URLs and store the result status
        final HTTPClient client = new HTTPClient();
        client.setRealm(realm);
        client.setTimout(120000);
        Tables.Row row;
        String url;
        LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
        for (final String pk: pks) {
            row = null;
            try {
                row = select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
            } catch (IOException e) {
                Log.logException(e);
            } catch (RowSpaceExceededException e) {
                Log.logException(e);
            }
            if (row == null) continue;
            url = "http://" + host + ":" + port + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL));
            url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
            Log.logInfo("WorkTables", "executing url: " + url);
            try {
                client.GETbytes(url);
                l.put(url, client.getStatusCode());
            } catch (IOException e) {
                Log.logException(e);
                l.put(url, -1);
            }
        }
View Full Code Here

        return l;
    }
   
    public static int execAPICall(String host, int port, String realm, String path, byte[] pk) {
        // now call the api URLs and store the result status
        final HTTPClient client = new HTTPClient();
        client.setRealm(realm);
        client.setTimout(120000);
        String url = "http://" + host + ":" + port + path;
        if (pk != null) url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(pk);
        try {
            client.GETbytes(url);
            return client.getStatusCode();
        } catch (IOException e) {
            Log.logException(e);
            return -1;
        }
    }
View Full Code Here

TOP

Related Classes of net.yacy.cora.protocol.http.HTTPClient

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.