Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.DigestURI.toNormalform()


                }

                // normalize URL
                DigestURI crawlingStartURL = null;
                if (crawlingFile == null) try {crawlingStartURL = new DigestURI(crawlingStart);} catch (final MalformedURLException e1) {Log.logException(e1);}
                crawlingStart = (crawlingStartURL == null) ? null : crawlingStartURL.toNormalform(true, true);

                // set new properties
                final boolean fullDomain = "domain".equals(post.get("range", "wide")); // special property in simple crawl start
                final boolean subPath    = "subpath".equals(post.get("range", "wide")); // special property in simple crawl start
View Full Code Here


                    if (entry.getValue().startsWith("mark_")) {
                        byte [] pk = entry.getValue().substring(5).getBytes();
                        DigestURI url = pkmap.get(pk);
                        if (url != null) {
                            String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
                            path += "&crawlingURL=" + url.toNormalform(true, false);
                            WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), path, pk);
                        }
                    }
                }
            }
View Full Code Here

                        Map.Entry<Scanner.Service, Scanner.Access> host;
                        while (se.hasNext()) {
                            host = se.next();
                            try {
                                u = new DigestURI(host.getKey().url());
                                urlString = u.toNormalform(true, false);
                                if (host.getValue() == Access.granted && Scanner.inIndex(apiCommentCache, urlString) == null) {
                                    String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
                                    path += "&crawlingURL=" + urlString;
                                    WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), path, u.hash());
                                }
View Full Code Here

                    Map.Entry<Scanner.Service, Scanner.Access> host;
                    while (se.hasNext()) {
                        host = se.next();
                        try {
                            u = new DigestURI(host.getKey().url());
                            urlString = u.toNormalform(true, false);
                            prop.put("servertable_list_" + i + "_pk", ASCII.String(u.hash()));
                            prop.put("servertable_list_" + i + "_count", i);
                            prop.putHTML("servertable_list_" + i + "_protocol", u.getProtocol());
                            prop.putHTML("servertable_list_" + i + "_ip", host.getKey().getInetAddress().getHostAddress());
                            prop.putHTML("servertable_list_" + i + "_url", urlString);
View Full Code Here

                    prop.put("import-one_rt", r.getResumptionToken().toString());
                   
                    // set next default url
                    try {
                        DigestURI nexturl = (rt == null) ? null : rt.resumptionURL();
                        if (rt != null) prop.put("defaulturl", (nexturl == null) ? "" : nexturl.toNormalform(true, false));
                    } catch (MalformedURLException e) {
                        prop.put("defaulturl", e.getMessage());
                    } catch (IOException e) {
                        // reached end of resumption
                        prop.put("defaulturl", e.getMessage());
View Full Code Here

            prop.put("error", "1");
            prop.put("viewMode", VIEW_MODE_NO_TEXT);
            prop.put("url", "");
            return prop;
        } else {
            prop.put("url", url.toNormalform(false, true));
        }

        // loading the resource content as byte array
        prop.put("error_incache", Cache.has(url) ? 1 : 0);
View Full Code Here

            prop.put("viewMode", VIEW_MODE_AS_PLAIN_TEXT);
            prop.put("viewMode_plainText", markup(wordArray, content).replaceAll("\n", "<br />").replaceAll("\t", "&nbsp;&nbsp;&nbsp;&nbsp;"));

        } else if (viewMode.equals("iframeWeb")) {
            prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_WEB);
            prop.put("viewMode_url", url.toNormalform(false, true));

        } else if (viewMode.equals("iframeCache")) {
            prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
            final String ext = url.getFileExtension();
            if ("jpg.jpeg.png.gif".indexOf(ext) >= 0) {
View Full Code Here

        } else if (viewMode.equals("iframeCache")) {
            prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
            final String ext = url.getFileExtension();
            if ("jpg.jpeg.png.gif".indexOf(ext) >= 0) {
                prop.put("viewMode_png", 1);
                prop.put("viewMode_png_url", url.toNormalform(false, true));
            } else {
                prop.put("viewMode_html", 1);
                prop.put("viewMode_html_url", url.toNormalform(false, true));
            }
        } else if (viewMode.equals("parsed") || viewMode.equals("sentences"|| viewMode.equals("words") || viewMode.equals("links")) {
View Full Code Here

            if ("jpg.jpeg.png.gif".indexOf(ext) >= 0) {
                prop.put("viewMode_png", 1);
                prop.put("viewMode_png_url", url.toNormalform(false, true));
            } else {
                prop.put("viewMode_html", 1);
                prop.put("viewMode_html_url", url.toNormalform(false, true));
            }
        } else if (viewMode.equals("parsed") || viewMode.equals("sentences"|| viewMode.equals("words") || viewMode.equals("links")) {
            // parsing the resource content
            Document document = null;
            try {
View Full Code Here

            }
            if (document != null) document.close();
        }
        prop.put("error", "0");
        prop.put("error_url", url.toNormalform(false, true));
        prop.put("error_hash", urlHash);
        prop.put("error_wordCount", wordCount);
        prop.putHTML("error_desc", (descr.isEmpty()) ? "&nbsp;" : descr);
        prop.putNum("error_size", size);
        prop.put("error_mimeTypeAvailable", (response.getMimeType() == null) ? "0" : "1");
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.