Package com.flaptor.util

Examples of com.flaptor.util.Config


    /**
     * Deletes the progess report files.
     */
    public static void cleanup() {
        Config config = Config.getConfig("crawler.properties");
        String baseFileName = config.getString("progress.report.filename");
        File baseFile = new File(".",baseFileName);
        File dir = baseFile.getParentFile();
        for (File file : dir.listFiles(new Filter(baseFile.getName()))) {
            FileUtil.deleteFile(file);
        }
View Full Code Here


     * If there is no binary progress report file, null is returned.
     * @return a CrawlerProgress instance, or null if there is no binary report file.
     */
    public static CrawlerProgress readCrawlerProgress() {
        CrawlerProgress cp = null;
        Config config = Config.getConfig("crawler.properties");
        String baseFileName = config.getString("progress.report.filename");
        int cycle = latestReportedCycle(baseFileName);
        if (cycle > 0) {
            ObjectInputStream in = null;
            try {
                in = new ObjectInputStream(new FileInputStream(baseFileName+"-b."+cycle));
View Full Code Here


   

    private DocumentConverter() {
    Config config = Config.getConfig("indexer.properties");
    docIdName = config.getString("docIdName");
        if (docIdName.equals("")) {
            throw new IllegalArgumentException("The docIdName cannot be empty");
        }

    requiredFields = new HashSet<String>();
    compressedFields = new HashSet<String>();
    requiredPayloads = new HashSet<String>();

    String[] fields = config.getStringArray("Writer.compressedFields");
    for (int j = 0; j < fields.length; j++) {
      compressedFields.add(fields[j]);
      logger.info("The field \"" + fields[j] + "\" will be stored compressed in the index.");
      if (fields[j].equals(docIdName)) {
        logger.warn("Asked to compress the documentId field. It won't be compressed.");
      }
    }

    fields = config.getStringArray("Writer.fields");
    for (int j = 0; j < fields.length; j++) {
      requiredFields.add(fields[j]);
      logger.info("The field \"" + fields[j] + "\" will be checked for in every document.");
    }

        String[] payloads = config.getStringArray("Writer.payloads");
        for (String payload: payloads) {
            if ("".equals(payload)) {
                throw new IllegalArgumentException("\"\" can not be a payload.");
            }
            requiredPayloads.add(payload);
View Full Code Here

     * @todo the extra fields will use the same stored|indexed setting as defined
     * on HtmlParser.stored and HtmlParser.indexed. It would be nice to have the
     * posibility to optionally override it for each field.
     */
    public HtmlParser() {
        Config conf = Config.getConfig("indexer.properties");
        String[] inputTagNames = conf.getStringArray("HtmlParser.inputTagNames");
        String[] outputFieldNames= conf.getStringArray("HtmlParser.outputFieldNames");
        if (inputTagNames.length != outputFieldNames.length) {
            throw new IllegalArgumentException("Length of inputTagName list does not match length of outputFieldName list.");
        }
        tags = new HashSet<Pair<String, String>>();
        for (int i = 0; i < inputTagNames.length; i++) {
            tags.add(new Pair<String, String>(inputTagNames[i], outputFieldNames[i]));
        }

        String removedXPathElements = conf.getString("HtmlParser.removedXPath");
        String[] separatorTags = conf.getStringArray("HtmlParser.separatorTags");
        List<Pair<String,String>> extraFieldMapping = conf.getPairList("HtmlParser.extraFieldMapping");
       
        Map<String,String> mapping = new HashMap<String,String>();
        extraFields = new ArrayList<String>(extraFieldMapping.size());

        for (Pair<String,String> pair: extraFieldMapping) {
            mapping.put(pair.first(),pair.last());
            extraFields.add(pair.first());
        }
       
        parser = new com.flaptor.util.parser.HtmlParser(removedXPathElements, separatorTags,mapping);

        STORED = conf.getBoolean("HtmlParser.stored");
        INDEXED = conf.getBoolean("HtmlParser.indexed");
        if (!(STORED || INDEXED)) {
            throw new IllegalArgumentException("constructor: both indexed an stored are set to false in the configuration.");
        }
    }
View Full Code Here

       
      this.graphicMode = graphicMode;
        ui = graphicMode ? new GUI("Hounder Installation Wizard") : new CLI();
        this.distDir =  distDir;
       
        Config searcherProperties = Config.getConfig("searcher.properties");
   
//        try {installDir = FileUtil.getDir(".") + "/hounder/";} catch (IOException e) {installDir = DEFAULT_INSTALLATION_DIR;}
        installDir = DEFAULT_INSTALLATION_DIR;

//create the pages
View Full Code Here

    private Set<String> longFields;
    // TODO insert more sets here


    public FieldFormatCheckerModule() {
        Config config = Config.getConfig("indexer.properties");
        List<Pair<String,String>> pairList = config.getPairList("FieldFormatChecker.fields");

        longFields = new HashSet<String>();

        for (Pair<String,String> pair: pairList) {
            String type = pair.last();
View Full Code Here

    /**
     * Class initializer.
     * Prepares the fetchlist factory to work on a new set of pagedbs with a new threshold.
     */
    public FetchlistFactory (PageDB pageSource, PageDB pageDest, CrawlerProgress progress) throws IOException {
        Config config = Config.getConfig("crawler.properties");
        fetchlistSize = config.getInt("fetchlist.size");
        priorityPercentileToFetch = config.getInt("priority.percentile.to.fetch");
        pageSourceDB = pageSource;
        pageDestDB = pageDest;
        priorityThreshold = pageSourceDB.getPriorityThreshold(100-priorityPercentileToFetch);
        pages = pageSourceDB.iterator();
        this.progress = progress;
View Full Code Here

                URL[] urls = new URL[classpath.length];
                for (int i = 0; i < classpath.length; i++) {
                    urls[i] = new URL("file://"+classpath[i]);
                }
                URLClassLoader loader = new URLClassLoader(urls);
                Config config = Config.getConfig("searcher.properties",loader);
                inited = true;
            } catch (MalformedURLException e) {
                System.err.println(e);
            }
        }
View Full Code Here

        for (int i = 0; i < clusterSize; i++) {
            IndexDescriptor id = new IndexDescriptor(clusterSize, i , fragmentName);
            indexManagers[i] = new BatchIndexManager(id, new File(outputDirectory, "index-" + String.valueOf(i)));
        }
        //Some of the configuration is taken from the config system
        Config config = Config.getConfig("indexer.properties");
        docIdName = config.getString("docIdName");
    }
View Full Code Here

     * Accepts fetcher suggestoin.
     * @param fetcher the provided fetcher, or null if no specific fetcher provided.
     */
    public Crawler (IFetcher fetcher) throws Exception {
        this.fetcher = fetcher;
        Config config = Config.getConfig("crawler.properties");
        indexOptimizePeriod = config.getInt("index.optimize.period");
        pagedbDir = config.getString("pagedb.dir");
        injectedPagedbDir = config.getString("injected.pagedb.dir");
        distributed = config.getBoolean("pagedb.is.distributed");
        protectAgainstEmptyPageDB = config.getBoolean("protect.against.empty.pagedb");
        starting = true;
        fetchlistQueue = new CloseableQueue<FetchList>(1); // max one fetchlists in the queue
        injectedFetchlistQueue = new CloseableQueue<FetchList>(); //TODO: put a limit, a large injectdb causes an OutOfMemoryError.
        fetchdataQueue = new CloseableQueue<FetchData>(1); //TODO: analyze if the fetchdata should be written to disk.
        cycleFinishedMonitor = new Object();
        stopMonitor = new StopMonitor("stop");
        urlFilter = new UrlFilter();

      if (config.getBoolean("clustering.enable")) {
          int port = PortUtil.getPort("clustering.rpc.crawler");
        nodeListener = new NodeListener(port, config);
        MonitorModule.addModuleListener(nodeListener, new CrawlerMonitoredNode(this));
        ControllerModule.addModuleListener(nodeListener, new ControllableImplementation());
        nodeListener.addModuleListener("crawlerControl", new CrawlerControllableImplementation());
View Full Code Here

TOP

Related Classes of com.flaptor.util.Config

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.