Package cn.edu.hfut.dmic.webcollector.parser

Examples of cn.edu.hfut.dmic.webcollector.parser.ParseData


        reader_fetch.close();

        File file_parse = new File(getSegmentPath(), "parse_data/info.avro");
        if (file_parse.exists()) {
            DbReader<ParseData> reader_parse = new DbReader<ParseData>(ParseData.class, file_parse);
            ParseData parseresult = null;
            while (reader_parse.hasNext()) {
                parseresult = reader_parse.readNext();
                for (Link link : parseresult.getLinks()) {
                    datum = new CrawlDatum();
                    datum.setUrl(link.getUrl());
                    datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
                    if (indexmap.containsKey(datum.getUrl())) {
                        continue;
View Full Code Here


        reader.close();
        if (parseFile.exists()) {
            DbReader<ParseData> parseReader = new DbReader<ParseData>(ParseData.class, parseFile);
            while (parseReader.hasNext()) {
                ParseData parseData = parseReader.readNext();
                if (parseData.getLinks() == null) {
                    continue;
                }
                for (Link link : parseData.getLinks()) {
                    CrawlDatum datum = new CrawlDatum();
                    datum.setUrl(link.getUrl());
                    datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
                    datum.setFetchTime(CrawlDatum.FETCHTIME_UNDEFINED);
                    addToRedis(datum);
View Full Code Here

TOP

Related Classes of cn.edu.hfut.dmic.webcollector.parser.ParseData

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.