Package com.ibm.icu.text

Examples of com.ibm.icu.text.CharsetDetector


      }

      // charset detection with icu
      try {
        bis = new BufferedInputStream(new FileInputStream(tempFile));
        CharsetDetector detector;
        detector = new CharsetDetector();
        detector.enableInputFilter(true);
        detector.setText(bis);
        if (declaredEncoding!=null && !"".equals(declaredEncoding))
          detector.setDeclaredEncoding(declaredEncoding);
        CharsetMatch[] matches = null;
        matches = detector.detectAll();
        bis.close();
        encoding = HttpUtils.filtreEncoding(matches[0].getName().toLowerCase());
        if (encoding!=null && !"".equals(encoding))
        {
          if (encodingFreq.containsKey(encoding))
View Full Code Here


          encodingFreq.put(encoding, 2);
      }

      // charset detection with icu
      bis = new BufferedInputStream(new FileInputStream(tempFile));
      CharsetDetector detector;
      detector = new CharsetDetector();
      detector.enableInputFilter(true);
      detector.setText(bis);
      if (declaredEncoding!=null && !"".equals(declaredEncoding))
        detector.setDeclaredEncoding(declaredEncoding);
      CharsetMatch[] matches = detector.detectAll();
      bis.close();
      encoding = HttpUtils.filtreEncoding(matches[0].getName().toLowerCase());
      if (encoding!=null && !"".equals(encoding)) {
        if (encodingFreq.containsKey(encoding))
          encodingFreq.put(encoding, encodingFreq.get(encoding) + 2);
View Full Code Here

    }

    @Override
    protected CoderResult implFlush(CharBuffer out) {
      if (usedDecoder == null) {
        CharsetDetector detector = new CharsetDetector();
        detector.enableInputFilter(filtered);
        byte[] data = buffer.toByteArray();
        detector.setText(data);
        CharsetMatch cm = detector.detect();
        try {
          usedDecoder = Charset.forName(cm == null ? "ISO-8859-1" : cm.getName()).newDecoder();
        } catch (UnsupportedCharsetException ex) {
          usedDecoder = Charset.forName("ISO-8859-1").newDecoder();
        }
View Full Code Here

    public static Reader readerWithCharsetDetect(File f) throws FileNotFoundException {
        return readerWithCharsetDetect(new BufferedInputStream(new FileInputStream(f)));
    }

    public static Reader readerWithCharsetDetect(InputStream is) {
        CharsetDetector detector = new CharsetDetector();
        try {
            CharsetMatch match = detector.setText(is).detect();
            is.reset();
            return new InputStreamReader(is, match.getName());
        } catch (IOException e) {
            e.printStackTrace();
            try {
View Full Code Here

        return source.readByte();
    }
   
    public Encoding sniff() throws IOException {
        try {
            CharsetDetector detector = new CharsetDetector();
            detector.setText(this);
            CharsetMatch match = detector.detect();
            Encoding enc = Encoding.forName(match.getName());
            Encoding actual = enc.getActualHtmlEncoding();
            if (actual != null) {
                enc = actual;
            }
View Full Code Here

  public static BufferedReader getSafeReaderFor(String name) throws FileNotFoundException {
    InputStream stream = getStreamFor(name);
    if( stream!=null ) {   
      Charset charset;
      try {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(new BufferedInputStream(stream));
        charset = Charset.forName( detector.detect().getName() );
      } catch (Exception e) {
        charset = Charset.defaultCharset();
      }
      return new BufferedReader(new InputStreamReader(getStreamFor(name), charset));
    } else {
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.CharsetDetector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.