Package com.nardoz.restopengov.utils

Source Code of com.nardoz.restopengov.utils.CSVFetcher

package com.nardoz.restopengov.utils;


import au.com.bytecode.opencsv.CSVReader;
import com.google.gson.Gson;
import com.ibm.icu.text.CharsetDetector;
import com.nardoz.restopengov.Crawler;

import java.io.*;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;

public class CSVFetcher implements IFormatReader {

    protected ICSVFetcherResult callback;
    private Gson gson = new Gson();
    private char separator = ',';

    public CSVFetcher() {

    }

    public CSVFetcher(ICSVFetcherResult callback) {
        this.callback = callback;
    }

    public ICSVFetcherResult readFromURL(String sourceURL) throws Exception {

        URL url = new URL(sourceURL.replace("https", "http"));
        separator = detectSeparator(url.openStream());

        return read(url.openStream());
    }

    public ICSVFetcherResult readFromFile(String path) throws Exception {

        separator = detectSeparator(new FileInputStream(path));

        return read(new FileInputStream(path));
    }

    public ICSVFetcherResult read(InputStream stream) throws Exception {

        CharsetDetector detector = new CharsetDetector();
        detector.setText(new BufferedInputStream(stream));

        CSVReader reader = new CSVReader(detector.detect().getReader(), separator);

        String[] keys = reader.readNext();
        String[] nextLine;

        callback.onStart();

        Integer id = 0;
        while ((nextLine = reader.readNext()) != null) {
            callback.add(id.toString(), buildJson(keys, nextLine));
            id++;
        }

        callback.onEnd();

        reader.close();

        return callback;
    }

    private char detectSeparator(InputStream stream) {

        BufferedReader br = new BufferedReader(new InputStreamReader(stream));

        String line = "";

        try {
            line = br.readLine();
            br.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return detectSeparator(line);
    }

    private char detectSeparator(String line) {

        Crawler.logger.debug("CSV header: " + line);

        String[] separators = new String[] { ",", ";", "\t", "|" };

        TreeMap tm = new TreeMap();
        for(String s : separators) {
            tm.put(line.split(Pattern.quote(s)).length, s);
        }

        char separator = tm.lastEntry().getValue().toString().charAt(0);

        Crawler.logger.debug("Detected separator: " + separator);

        return separator;
    }

    private String buildJson(String[] keys, String[] dataLine) throws Exception {

        if(keys.length != dataLine.length) {
            throw new Exception("There are not as much columns for the keys as for the rows");
        }

        Map<String, String> result = new HashMap<String, String>();

        for (int i = 0; i < dataLine.length; i++) {
            result.put(keys[i], dataLine[i]);
        }

        return gson.toJson(result);
    }


}
TOP

Related Classes of com.nardoz.restopengov.utils.CSVFetcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.