Package org.mozilla.universalchardet

Examples of org.mozilla.universalchardet.UniversalDetector


     * @throws java.io.IOException
     */
    protected String detectEncoding(String fileName) throws java.io.IOException {
        byte[] buf = new byte[4096];
        java.io.FileInputStream fis = new java.io.FileInputStream(fileName);
        UniversalDetector detector = new UniversalDetector(null);
        String encoding;
        int nread;

        while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        detector.dataEnd();
        fis.close();

        encoding = detector.getDetectedCharset();
        detector.reset();
        if (encoding != null) {
            return encoding;
        } else {
            return "UTF-8";
        }
View Full Code Here


  protected String getCharsetFromBytes(Resource resource) throws IOException {
    String charsetName = null;

    byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD];
       // (1)
      UniversalDetector detector = new UniversalDetector(null);

      // (2)
    resource.mark(MAX_CHARSET_READAHEAD);
    int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD);
    resource.reset();
    detector.handleData(bbuffer, 0, len);
    // (3)
    detector.dataEnd();
      // (4)
      charsetName = detector.getDetectedCharset();

      // (5)
      detector.reset();

    return charsetName;
  }
View Full Code Here

    return ldr.getCachedBytes(bigFileThreshold);
  }

  private static Charset charset(byte[] content, String encoding) {
    if (encoding == null) {
      UniversalDetector d = new UniversalDetector(null);
      d.handleData(content, 0, content.length);
      d.dataEnd();
      encoding = d.getDetectedCharset();
    }
    if (encoding == null) {
      return ISO_8859_1;
    }
    try {
View Full Code Here

  private UniversalDetectorUtil() {
  }

  public static String getDetectedEncoding(InputStream is) throws IOException {
    UniversalDetector detector = new UniversalDetector(null);
    byte[] buf = new byte[4096];
    int nread;
    while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
      detector.handleData(buf, 0, nread);
    }
    detector.dataEnd();
    return detector.getDetectedCharset();
  }
View Full Code Here

    public static String getDetectedEncodingFrom(IFile file, String defaultEncoding) {
        InputStream is = null;
        String encoding = defaultEncoding == null ? Charset.defaultCharset().name() : defaultEncoding;
        try {
            is = EclipseIFileUtil.getInputStreamFrom(file);
            UniversalDetector detector = new UniversalDetector(null);
            byte[] buf = new byte[4096];
            int nread;
            while ((nread = is.read(buf)) > 0 && !detector.isDone())
                detector.handleData(buf, 0, nread);
            detector.dataEnd();
            encoding = detector.getDetectedCharset();
        } catch (Exception e) {
            Stderr.p("EclipseIFileUtil.getDetectedEncodingFrom(IFile): " + e.getClass().getName() + ","
                    + e.getLocalizedMessage());
        } finally {
            IOUtil.close(is);
View Full Code Here

import java.io.IOException;

public class CharsetDetector {

    public static String detectedCharset(byte[] data) throws IOException {
        UniversalDetector charsetDetector = new UniversalDetector(new CharsetListener() {
            public void report(String charset) {
            }
        });
        charsetDetector.handleData(data, 0, data.length);
        charsetDetector.dataEnd();
        return charsetDetector.getDetectedCharset();
    }
View Full Code Here

    public static String detectEncoding(String filepath) throws IOException {
        byte[] buf = new byte[4096];
        FileInputStream fis = new FileInputStream(filepath);

        // Construct an instance of UniversalDetector
        UniversalDetector detector = new UniversalDetector(null);

        // Feed some data to the detector
        int nread;
        while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        // Notify the detector of the end of data
        detector.dataEnd();

        // Get the detected encoding name
        String detectedEncoding = detector.getDetectedCharset();
        if (detectedEncoding != null) {
            LOG.info(String.format("The encoding of the file is %s", detectedEncoding));
        } else {
            LOG.warn(String.format("No encoding detected! Use defaul %s", Utils.encoding));
            detectedEncoding = Utils.encoding;
        }
       
        // Reset the detecor
        detector.reset();
       
        return detectedEncoding;
    }
View Full Code Here

    public static String detect(InputStream is) throws IOException {

        byte[] buf = new byte[4096];
        // (1)
        UniversalDetector detector = new UniversalDetector(null);

        // (2)
        int nread;
        while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        // (3)
        detector.dataEnd();

        // (4)
        String encoding = detector.getDetectedCharset();
        if (encoding != null) {
            logger.debug("Detected encoding = " + encoding);
        } else {
            logger.debug("No encoding detected, using default: " + DEFAULT_ENCODING);
            encoding = DEFAULT_ENCODING;
        }

        // (5)
        detector.reset();
       
        return encoding;
    }
View Full Code Here

  public String getDetectedEncoding(File file) {
    InputStream is = null;
    String encoding = null;
    try {
      is = new FileInputStream(file);
      UniversalDetector detector = new UniversalDetector(null);
      byte[] buf = new byte[4096];
      int nread;
      while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
        detector.handleData(buf, 0, nread);
      }
      detector.dataEnd();
      encoding = detector.getDetectedCharset();
    } catch (IOException e) {
      // nothing to do
    } finally {
      IOUtil.close(is);
      if (encoding == null) {
View Full Code Here

  public String getDetectedEncoding(File file) {
    InputStream is = null;
    String encoding = null;
    try {
      is = new FileInputStream(file);
      UniversalDetector detector = new UniversalDetector(null);
      byte[] buf = new byte[4096];
      int nread;
      while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
        detector.handleData(buf, 0, nread);
      }
      detector.dataEnd();
      encoding = detector.getDetectedCharset();
    } catch (IOException e) {
      // nothing to do
    } finally {
      IOUtil.close(is);
      if (encoding == null) {
View Full Code Here

TOP

Related Classes of org.mozilla.universalchardet.UniversalDetector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.