Package org.mozilla.universalchardet

Examples of org.mozilla.universalchardet.UniversalDetector


    public String getDetectedEncoding(File file) {
        InputStream is = null;
        String encoding = null;
        try {
            is = new FileInputStream(file);
            UniversalDetector detector = new UniversalDetector(null);
            byte[] buf = new byte[4096];
            int nread;
            while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
                detector.handleData(buf, 0, nread);
            }
            detector.dataEnd();
            encoding = detector.getDetectedCharset();
        } catch (IOException e) {
            // nothing to do
        } finally {
            IOUtil.close(is);
            if (encoding == null) {
View Full Code Here


    private UniversalDetectorUtil() {
    }

    public static String getDetectedEncoding(InputStream is) throws IOException {
        UniversalDetector detector = new UniversalDetector(null);
        byte[] buf = new byte[4096];
        int nread;
        while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        detector.dataEnd();
        return detector.getDetectedCharset();
    }
View Full Code Here

  public String getDetectedEncoding(File file) {
    InputStream is = null;
    String encoding = null;
    try {
      is = new FileInputStream(file);
      UniversalDetector detector = new UniversalDetector(null);
      byte[] buf = new byte[4096];
      int nread;
      while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
        detector.handleData(buf, 0, nread);
      }
      detector.dataEnd();
      encoding = detector.getDetectedCharset();
    } catch (IOException e) {
      // nothing to do
    } finally {
      IOUtil.close(is);
      if (encoding == null) {
View Full Code Here

                in = new FileInputStream(file);
                inStream = new DataInputStream(in);
      byte[] buf = new byte[(int)file.length()];
      int nrRead = inStream.read(buf);
   
      UniversalDetector detector = new UniversalDetector(null);
      detector.handleData(buf, 0, nrRead);
      detector.dataEnd();

      String encoding = detector.getDetectedCharset();
      detector.reset();
      if (encoding != null) {
        if (!encoding.equals(ENCODING)) {
          Log.error(Log.JEEVES,"Detected character set "+encoding+", converting to UTF-8");
          return convertByteArrayToUTF8ByteArray(buf, encoding);
        }
View Full Code Here

   *         or null if not detected
   * @throws IOException
   */
  public static String getFileCharset(File file) throws IOException {
    byte[] buf = new byte[4096];
    final UniversalDetector universalDetector;
    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
      universalDetector = new UniversalDetector(null);
      int numberOfBytesRead;
      while ((numberOfBytesRead = bufferedInputStream.read(buf)) > 0 && !universalDetector.isDone()) {
        universalDetector.handleData(buf, 0, numberOfBytesRead);
      }
    }
    universalDetector.dataEnd();
    String encoding = universalDetector.getDetectedCharset();

    if (encoding != null) {
      LOGGER.debug("Detected encoding for {} is {}.", file.getAbsolutePath(), encoding);
    } else {
      LOGGER.debug("No encoding detected for {}.", file.getAbsolutePath());
    }

    universalDetector.reset();

    return encoding;
  }
View Full Code Here

        return content;
    }

    private static String detectEncoding(byte[] content)
    {
        UniversalDetector detector = new UniversalDetector(null);
        detector.handleData(content, 0, content.length);
        detector.dataEnd();
        String encoding = detector.getDetectedCharset();
        if (encoding != null)
        {
            LOGGER.debug(String.format("Detected encoding: %s\n", encoding));
        }
        else
        {
            encoding = getDefaultEncoding();
            LOGGER.debug(String.format("No encoding detected, using default: %s\n", encoding));
        }
        detector.reset();
        return encoding;
    }
View Full Code Here

   * @throws Exception
   */
  private void checkForBadCharacters(String json) throws Exception
  {
    byte[] bytes = json.getBytes();   
    UniversalDetector ud = new UniversalDetector(null);
    ud.handleData(bytes, 0, bytes.length);
    ud.dataEnd();
    String encoding = ud.getDetectedCharset();     
    if ( encoding != null )
    {
      //do an extra check for charcode 65533 if encoding is utf-8   
      if ( encoding.equals("UTF-8"))
      {
View Full Code Here

   * @throws Exception
   */
  private void checkForBadCharacters(String json) throws Exception
  {
    byte[] bytes = json.getBytes();   
    UniversalDetector ud = new UniversalDetector(null);
    ud.handleData(bytes, 0, bytes.length);
    ud.dataEnd();
    String encoding = ud.getDetectedCharset();     
    if ( encoding != null )
    {
      //do an extra check for charcode 65533 if encoding is utf-8   
      if ( encoding.equals("UTF-8"))
      {
View Full Code Here

     * @param bytes - a byte array to be checked
     * @return charset - the charset used
     * @throws IOException
     */
    public static String detectCharset(byte bytes[]) {
        UniversalDetector detector = new UniversalDetector(null);
        int offset = 0;

        do {
            int blockSize = Math.min(4096, bytes.length - offset);
            detector.handleData(bytes, offset, blockSize);
            offset += blockSize;
        } while(offset < bytes.length);
        detector.dataEnd();

        return or(detector.getDetectedCharset(), "UTF-8");
    }
View Full Code Here

     * @param is - an input stream to be checked
     * @return charset - the charset used
     * @throws IOException
     */
    public static String detectCharset(InputStream is) throws IOException {
        UniversalDetector detector = new UniversalDetector(null);
        byte[] buf = new byte[4096];
        int nRead;

        while ((nRead = is.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nRead);
        }
        detector.dataEnd();

        return or(detector.getDetectedCharset(), "UTF-8");
    }
View Full Code Here

TOP

Related Classes of org.mozilla.universalchardet.UniversalDetector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.