Package net.yacy.cora.protocol

Examples of net.yacy.cora.protocol.RequestHeader


   
    public Response load(final Request request, boolean acceptOnlyParseable) throws IOException {
        DigestURI url = request.url();
        if (!url.getProtocol().equals("file")) throw new IOException("wrong loader for FileLoader: " + url.getProtocol());

        RequestHeader requestHeader = new RequestHeader();
        if (request.referrerhash() != null) {
            DigestURI ur = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
            if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
        }
       
        // process directories: transform them to html with meta robots=noindex (using the ftpc lib)
        String[] l = null;
        try {l = url.list();} catch (IOException e) {}
View Full Code Here


   
    public Response load(final Request request, boolean acceptOnlyParseable) throws IOException {
        DigestURI url = request.url();
        if (!url.getProtocol().equals("smb")) throw new IOException("wrong loader for SMBLoader: " + url.getProtocol());

        RequestHeader requestHeader = new RequestHeader();
        if (request.referrerhash() != null) {
            DigestURI ur = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
            if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
        }
       
        // process directories: transform them to html with meta robots=noindex (using the ftpc lib)
        String[] l = null;
        try {l = url.list();} catch (IOException e) {}
View Full Code Here

                }
            }

            if (file.length() == 0) {
                // directory -> get list of files
                final RequestHeader requestHeader = new RequestHeader();
                if (request.referrerhash() != null) {
                    final DigestURI u = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
                    if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false));
                }

                final StringBuilder dirList = ftpClient.dirhtml(path);

                if (dirList == null) {
View Full Code Here

        // determine the file date
        final Date fileDate = ftpClient.entryDate(path);

        // create response header
        final RequestHeader requestHeader = new RequestHeader();
        if (request.referrerhash() != null) {
            final DigestURI refurl = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
            if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false));
        }
        final ResponseHeader responseHeader = new ResponseHeader();
        responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(fileDate));
        responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
View Full Code Here

      if (uri.startsWith("http://") || uri.startsWith("https://")) {
            final String[] uris = uri.split(",");
            for (String netdef: uris) {
                netdef = netdef.trim();
                try {
                    final RequestHeader reqHeader = new RequestHeader();
                    reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
                    final HTTPClient client = new HTTPClient();
                    client.setHeader(reqHeader.entrySet());
                    byte[] data = client.GETbytes(uri);
                    if (data == null || data.length == 0) continue;
                    // save locally in case next fetch fails
                    if (file != null) {
                      FileOutputStream f = new FileOutputStream(file);
View Full Code Here

        String eTag=null, oldEtag = null;
        Date lastMod=null;
        downloadStart = System.currentTimeMillis();

        // if we previously have downloaded this robots.txt then we can set the if-modified-since header
        RequestHeader reqHeaders = new RequestHeader();

        // add yacybot user agent
        reqHeaders.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());

        // adding referer
        reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));
        reqHeaders.put(RequestHeader.ACCEPT, HTTPLoader.DEFAULT_ACCEPT);
        if (entry != null) {
            oldEtag = entry.getETag();
            reqHeaders = new RequestHeader();
            final Date modDate = entry.getModDate();
            if (modDate != null) reqHeaders.put(RequestHeader.IF_MODIFIED_SINCE, HeaderFramework.formatRFC1123(entry.getModDate()));

        }

        // setup http-client
        //TODO: adding Traffic statistic for robots download?
        final HTTPClient client = new HTTPClient();
        client.setHeader(reqHeaders.entrySet());
        try {
            // check for interruption
            if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress.");

            // sending the get request
View Full Code Here

    }

    public Response(final Request request, final CrawlProfile profile) {
        this.request = request;
        // request and response headers may be zero in case that we process surrogates
        this.requestHeader = new RequestHeader();
        this.responseHeader = new ResponseHeader();
        if (request.size() > 0) this.responseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(request.size()));
        this.responseStatus = "200";
        this.profile = profile;
        this.status = QUEUE_STATE_FRESH;
View Full Code Here

        // take a file from the net
        Response response = null;

        // create a request header
        final RequestHeader requestHeader = new RequestHeader();
        requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
        DigestURI refererURL = null;
        if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
        if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
        requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
        requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
        requestHeader.put(HeaderFramework.ACCEPT_CHARSET, this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
        requestHeader.put(HeaderFramework.ACCEPT_ENCODING, this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));

        // HTTP-Client
        final HTTPClient client = new HTTPClient();
        client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice
        client.setTimout(this.socketTimeout);
        client.setHeader(requestHeader.entrySet());
            // send request
          final byte[] responseBody = client.GETbytes(url, maxFileSize);
          final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
          final int code = client.getHttpResponse().getStatusLine().getStatusCode();
View Full Code Here

        // take a file from the net
        Response response = null;

        // create a request header
        final RequestHeader requestHeader = new RequestHeader();
        requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
        requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, DEFAULT_LANGUAGE);
        requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
        requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);

        final HTTPClient client = new HTTPClient();
        client.setTimout(20000);
        client.setHeader(requestHeader.entrySet());
          final byte[] responseBody = client.GETbytes(request.url());
          final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
          final int code = client.getHttpResponse().getStatusLine().getStatusCode();
            // FIXME: 30*-handling (bottom) is never reached
            // we always get the final content because httpClient.followRedirects = true
View Full Code Here

     */
    public File downloadRelease() {
        final File storagePath = Switchboard.getSwitchboard().releasePath;
        File download = null;
        // setup httpClient
        final RequestHeader reqHeader = new RequestHeader();
        reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());

        final String name = getUrl().getFileName();
        byte[] signatureBytes = null;

        final HTTPClient client = new HTTPClient();
        client.setTimout(6000);
        client.setHeader(reqHeader.entrySet());

        // download signature first, if public key is available
        try {
            if (this.publicKey != null) {
              final byte[] signatureData = client.GETbytes(getUrl().toString() + ".sig");
View Full Code Here

TOP

Related Classes of net.yacy.cora.protocol.RequestHeader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.