Package com.caucho.xml2.readers

Source Code of com.caucho.xml2.readers.Utf8Reader

/*
* Copyright (c) 1998-2011 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT.  See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
*   Free SoftwareFoundation, Inc.
*   59 Temple Place, Suite 330
*   Boston, MA 02111-1307  USA
*
* @author Scott Ferguson
*/

package com.caucho.xml2.readers;

import com.caucho.util.CharBuffer;
import com.caucho.vfs.ReadStream;
import com.caucho.xml2.XmlParser;

import java.io.CharConversionException;
import java.io.EOFException;
import java.io.IOException;

/**
* A fast reader to convert bytes to characters for parsing XML.
*/
public class Utf8Reader extends XmlReader {
  /**
   * Create a new reader.
   */
  public Utf8Reader()
  {
  }

  /**
   * Create a new reader with the given read stream.
   */
  public Utf8Reader(XmlParser parser, ReadStream is)
  {
    super(parser, is);
  }

  /**
   * Read the next character, returning -1 on end of file..
   */
  public int read()
    throws IOException
  {
    int ch1 = _is.read();

    if (ch1 == '\n') {
      _parser.setLine(++_line);
      return ch1;
    }
    else if (ch1 == '\r') {
      _parser.setLine(++_line);

      int ch2 = _is.read();
      if (ch2 == '\n')
        return '\n';

      if (ch2 < 0) {
      }
      else if (ch2 < 0x80)
        _parser.unread(ch2);
      else
        _parser.unread(readSecond(ch2));
     
      return '\n';
    }
    else if (ch1 < 0x80)
      return ch1;
    else
      return readSecond(ch1);
  }
   
  private int readSecond(int ch1)
    throws IOException
  {
    if ((ch1 & 0xe0) == 0xc0) {
      int ch2 = _is.read();
      if (ch2 < 0)
        throw new EOFException("unexpected end of file in utf8 character");
      else if ((ch2 & 0xc0) != 0x80)
        throw error(L.l("illegal utf8 encoding {0}", hex(ch1)));
     
      return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f);
    }
    else if ((ch1 & 0xf0) == 0xe0) {
      int ch2 = _is.read();
      int ch3 = _is.read();
     
      if (ch2 < 0)
        throw new EOFException("unexpected end of file in utf8 character");
      else if ((ch2 & 0xc0) != 0x80)
        throw error(L.l("illegal utf8 encoding at {0} {1} {2}", hex(ch1), hex(ch2), hex(ch3)));
     
      if (ch3 < 0)
        throw new EOFException("unexpected end of file in utf8 character");
      else if ((ch3 & 0xc0) != 0x80)
        throw error(L.l("illegal utf8 encoding {0} {1} {2}",
                        hex(ch1), hex(ch2), hex(ch3)));

      int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f);

      if (ch == 0xfeff) // handle some writers, e.g. microsoft
        return read();
      else
        return ch;
    }
    else
      throw error(L.l("illegal utf8 encoding at {0}", hex(ch1)));
  }

  private String hex(int n)
  {
    n = n & 0xff;
   
    CharBuffer cb = CharBuffer.allocate();

    cb.append("0x");

    int d = n / 16;
    if (d >= 0 && d <= 9)
      cb.append((char) ('0' + d));
    else
      cb.append((char) ('a' + d - 10));
   
    d = n % 16;
    if (d >= 0 && d <= 9)
      cb.append((char) ('0' + d));
    else
      cb.append((char) ('a' + d - 10));

    return cb.close();
  }

  private CharConversionException error(String msg)
  {
    String filename = _parser.getFilename();
    int line = _parser.getLine();

    if (filename != null)
      return new CharConversionException(filename + ":" + line + ": " + msg);
    else
      return new CharConversionException(msg);
  }
}
TOP

Related Classes of com.caucho.xml2.readers.Utf8Reader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.