Package com.flaptor.util

Examples of com.flaptor.util.Config


    private final float categoryThreshold; // threshold to add category to document
   
    public BayesianClassifierModule (String moduleName, Config globalConfig) {  
        super(moduleName, globalConfig);
        textLengthLimit = globalConfig.getInt("page.text.max.length");
        Config mdlConfig = getModuleConfig();
        String categoryList = mdlConfig.getString("categories");       
        if ((categoryList != null) && !"".equals(categoryList.trim())) {
            categories = categoryList.split(",");
            double unknownTermsProbability = mdlConfig.getFloat("unknown.terms.probability");
            categoryThreshold   = mdlConfig.getFloat("category.score.threshold");           
            multiClassifier = new MultiClassifier(categories, unknownTermsProbability);
        } else { // categories is null or ""
            // It makes no sense to have a BayesianClassifierModule that
            // has no categories. Someone misplaced this module in the modules
            // manager, or forgot to set categories. Either way, fail.
View Full Code Here


            xmlRpcServer = new XmlrpcServer(port);
            xmlRpcServer.addHandler(XMLRPC_CONTEXT, new VectorResults(baseSearcher));
            xmlRpcServer.start();
        }
        if (openSearch || web || xml) {
            Config config = Config.getConfig("searcher.properties");
            int httpServerPort = PortUtil.getPort("searcher.http");
            httpServer = new WebServer(httpServerPort);

            if (openSearch) {
                String context = config.getString("opensearch.context");
                logger.info("MultipleRpcSearcher constructor: starting OpenSearch searcher on port " + httpServerPort + " context "+context);
                httpServer.addHandler(context, new OpenSearchHandler(baseSearcher));
            }
            if (xml) {
                String context = config.getString("xmlsearch.context");
                logger.info("MultipleRpcSearcher constructor: starting xml searcher on port " + httpServerPort  + " context "+context);
                httpServer.addHandler(context, new XmlSearchHandler(baseSearcher));
            }
            if (web) {
                String context = config.getString("websearch.context");
                logger.info("MultipleRpcSearcher constructor: starting web searcher on port " + httpServerPort  + " context "+context);
                WebSearchUtil.setSearcher(baseSearcher);
                String webappPath = this.getClass().getClassLoader().getResource("web-searcher").getPath();
                httpServer.addWebAppHandler(context, webappPath);
            }
            boolean redirect = config.getBoolean("websearch.redirect");
            if (redirect) {
                String from = config.getString("websearch.redirect.from");
                String to = config.getString("websearch.redirect.to");
                httpServer.addHandler("/", new RedirectHandler(from,to));
            }
            try {httpServer.start();} catch (Exception e) {throw new RuntimeException(e);}
        }
    }
View Full Code Here

            PropertyConfigurator.configureAndWatch(log4jConfigPath);
        } else {
            logger.warn("log4j.properties not found in classpath! Reload disabled.");
        }

        Config conf = Config.getConfig("searcher.properties");
        ISearcher baseSearcher = new CompositeSearcher();
        new MultipleRpcSearcher(baseSearcher,conf.getBoolean("rmiInterface"), conf.getBoolean("xmlRpcInterface"), conf.getBoolean("openSearchInterface"), conf.getBoolean("webInterface"), conf.getBoolean("xmlInterface"));
    }
View Full Code Here

            PropertyConfigurator.configureAndWatch(log4jConfigPath);
        } else {
            logger.warn("log4j.properties not found in classpath! Reload disabled.");
        }

        Config conf = Config.getConfig("indexer.properties");
        IIndexer indexer = conf.getBoolean("isMultiIndexer") ? new MultiIndexer() : new Indexer();
        new MultipleRpcIndexer(indexer,conf.getBoolean("rmiInterface"), conf.getBoolean("xmlInterface"));
    }
View Full Code Here

    /**
     * Initialize the fetcher.
     */
    public Nutch9Fetcher() {
        Config config = Config.getConfig("nutchfetcher.properties");
        segmentsDir = config.getString("fetchlist.dir");
        keepUrl = config.getBoolean("keep.original.url.on.redirect");
        fetcher = new Fetcher();
        Configuration conf = new Configuration();
        // conf.addDefaultResource("crawl-tool.xml");
        conf.addDefaultResource("nutch-default.xml");
        conf.addDefaultResource("nutch-site.xml");
View Full Code Here

    public MultiSearcher() {
      this(new ExponentialFallbackPolicy());
    }

    public MultiSearcher(IRetryPolicy policy) {
        Config config = Config.getConfig("multiSearcher.properties");
        String[] hosts = config.getStringArray("multiSearcher.hosts");
        int workerThreads = config.getInt("multiSearcher.workerThreads");
        logger.info("init: number of worker threads set to " + workerThreads + " (from config file).");
        int maxThreadsPerSearcher = (workerThreads * 2 / hosts.length) + 1 ;
        logger.info("init: setting maxThreadsPerSearcher to " + maxThreadsPerSearcher);

        for (int i = 0; i < hosts.length; i++) {
            Pair<String, Integer> host = PortUtil.parseHost(hosts[i]);
            searchers.add(new RmiSearcherStub(host.last(), host.first(), policy, maxThreadsPerSearcher));
            searcherIPs.add(host.first());
        }
        timeout = config.getLong("multiSearcher.timeout");
        logger.info("init: timeout set to " + timeout + "ms (from config file).");
        multiQueryExecutor = new MultiExecutor<GroupedSearchResults>(workerThreads, "multiSearcher");
    }
View Full Code Here

  /**
   * Constructor.
   * @param indexer a reference to the indexer that contains this module.
   */
  public SanitizerModule() {
        Config config = Config.getConfig("indexer.properties");
        xpath = config.getString("SanitizerModule.XPath");
        htmlFields = new HashSet<String>(Arrays.asList(config.getStringArray("SanitizerModule.html")));
        xmlFields = new HashSet<String>(Arrays.asList(config.getStringArray("SanitizerModule.xml")));
        accentFields = new HashSet<String>(Arrays.asList(config.getStringArray("SanitizerModule.accents")));
        allFields = new HashSet<String>();
        allFields.addAll(htmlFields);
        allFields.addAll(xmlFields);
        allFields.addAll(accentFields);
  }
View Full Code Here

    private String docIdName = null;
    private static final Logger logger = Logger.getLogger(Execute.whoAmI());

    public Writer(final Indexer indexer) {
        super(indexer);
    Config config = Config.getConfig("indexer.properties");
    docIdName = config.getString("docIdName");
        if (docIdName.equals("")) {
            throw new IllegalArgumentException("The docIdName cannot be empty");
        }

    }
View Full Code Here

        seen = 0;
        fetched = 0;
        processed = 0;
        sorted = 0;
        trimmed = 0;
        Config config = Config.getConfig("crawler.properties");
        String baseFileName = config.getString("progress.report.filename");
        baseFile = new File(baseFileName);
        reportFile = new File(baseFileName+"."+cycle);
        binaryFile = new File(baseFileName+"-b."+cycle);
        startTime = new long[6];
        startTime[START] = System.currentTimeMillis();
View Full Code Here

     * Marks the start of the fetch stage.
     * @param max number of pages in the old pagedb.
     * @param known number of known pages in the old pagedb.
     */
    public void startFetch(long max, long known) {
        Config config = Config.getConfig("crawler.properties");
        int refetchPercent = config.getInt("priority.percentile.to.fetch");
        tosee = max > 0 ? max : 1;
        tofetch = (max-known)+known*refetchPercent/100;
        if (0 == tofetch) tofetch = 1;
        stage = FETCH;
        startTime[stage] = System.currentTimeMillis();
View Full Code Here

TOP

Related Classes of com.flaptor.util.Config

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.