Package org.hsqldb.lib

Source Code of org.hsqldb.lib.StringConverter

/*
* For work developed by the HSQL Development Group:
*
* Copyright (c) 2001-2011, The HSQL Development Group
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the HSQL Development Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
*
* For work originally developed by the Hypersonic SQL Group:
*
* Copyright (c) 1995-2000, The Hypersonic SQL Group.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the Hypersonic SQL Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This software consists of voluntary contributions made by many individuals
* on behalf of the Hypersonic SQL Group.
*/


package org.hsqldb.lib;

import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;

import org.hsqldb.map.BitMap;

/**
* Collection of static methods for converting strings between different
* formats and to and from byte arrays.<p>
*
* Includes some methods based on Hypersonic code as indicated.
*
* @author Thomas Mueller (Hypersonic SQL Group)
* @author Fred Toussi (fredt@users dot sourceforge.net)
* @version 2.0.1
* @since 1.7.2
*/
public class StringConverter {

    private static final byte[] HEXBYTES = {
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b',
        (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'
    };

    private static int getNibble(int value) {

        if (value >= '0' && value <= '9') {
            return value - '0';
        }

        if (value >= 'a' && value <= 'f') {
            return 10 + value - 'a';
        }

        if (value >= 'A' && value <= 'F') {
            return 10 + value - 'A';
        }

        return -1;
    }

    /**
     * Converts a hexadecimal string into a byte array
     *
     *
     * @param s hexadecimal string
     *
     * @return byte array for the hex string
     * @throws IOException
     */
    public static byte[] hexStringToByteArray(String s) throws IOException {

        int     l    = s.length();
        byte[]  data = new byte[l / 2 + (l % 2)];
        int     n,
                b    = 0;
        boolean high = true;
        int     i    = 0;

        for (int j = 0; j < l; j++) {
            char c = s.charAt(j);

            if (c == ' ') {
                continue;
            }

            n = getNibble(c);

            if (n == -1) {
                throw new IOException(
                    "hexadecimal string contains non hex character");    //NOI18N
            }

            if (high) {
                b    = (n & 0xf) << 4;
                high = false;
            } else {
                b         += (n & 0xf);
                high      = true;
                data[i++] = (byte) b;
            }
        }

        if (!high) {
            throw new IOException(
                "hexadecimal string with odd number of characters");    //NOI18N
        }

        if (i < data.length) {
            data = (byte[]) ArrayUtil.resizeArray(data, i);
        }

        return data;
    }

    /**
     * Compacts a bit string into a BitMap
     *
     *
     * @param s bit string
     *
     * @return byte array for the hex string
     * @throws IOException
     */
    public static BitMap sqlBitStringToBitMap(String s) throws IOException {

        int    l = s.length();
        int    n;
        int    bitIndex = 0;
        BitMap map      = new BitMap(l, true);

        for (int j = 0; j < l; j++) {
            char c = s.charAt(j);

            if (c == ' ') {
                continue;
            }

            n = getNibble(c);

            if (n != 0 && n != 1) {
                throw new IOException(
                    "hexadecimal string contains non hex character");    //NOI18N
            }

            if (n == 1) {
                map.set(bitIndex);
            }

            bitIndex++;
        }

        map.setSize(bitIndex);

        return map;
    }

    /**
     * Converts a byte array into a hexadecimal string
     *
     *
     * @param b byte array
     *
     * @return hex string
     */
    public static String byteArrayToHexString(byte[] b) {

        int    len = b.length;
        char[] s   = new char[len * 2];

        for (int i = 0, j = 0; i < len; i++) {
            int c = ((int) b[i]) & 0xff;

            s[j++] = (char) HEXBYTES[c >> 4 & 0xf];
            s[j++] = (char) HEXBYTES[c & 0xf];
        }

        return new String(s);
    }

    /**
     * Converts a byte array into an SQL hexadecimal string
     *
     *
     * @param b byte array
     *
     * @return hex string
     */
    public static String byteArrayToSQLHexString(byte[] b) {

        int    len = b.length;
        char[] s   = new char[len * 2 + 3];

        s[0] = 'X';
        s[1] = '\'';

        int j = 2;

        for (int i = 0; i < len; i++) {
            int c = ((int) b[i]) & 0xff;

            s[j++] = (char) HEXBYTES[c >> 4 & 0xf];
            s[j++] = (char) HEXBYTES[c & 0xf];
        }

        s[j] = '\'';

        return new String(s);
    }

    /**
     * Converts a byte array into a bit string
     *
     *
     * @param bytes byte array
     * @param bitCount number of bits
     * @return hex string
     */
    public static String byteArrayToBitString(byte[] bytes, int bitCount) {

        char[] s = new char[bitCount];

        for (int j = 0; j < bitCount; j++) {
            byte b = bytes[j / 8];

            s[j] = BitMap.isSet(b, j % 8) ? '1'
                                          : '0';
        }

        return new String(s);
    }

    /**
     * Converts a byte array into an SQL binary string
     *
     *
     * @param bytes byte array
     * @param bitCount number of bits
     * @return hex string
     */
    public static String byteArrayToSQLBitString(byte[] bytes, int bitCount) {

        char[] s = new char[bitCount + 3];

        s[0] = 'B';
        s[1] = '\'';

        int pos = 2;

        for (int j = 0; j < bitCount; j++) {
            byte b = bytes[j / 8];

            s[pos++] = BitMap.isSet(b, j % 8) ? '1'
                                              : '0';
        }

        s[pos] = '\'';

        return new String(s);
    }

    /**
     * Converts a byte array into hexadecimal characters which are written as
     * ASCII to the given output stream.
     *
     * @param o output array
     * @param from offset into output array
     * @param b input array
     */
    public static void writeHexBytes(byte[] o, int from, byte[] b) {

        int len = b.length;

        for (int i = 0; i < len; i++) {
            int c = ((int) b[i]) & 0xff;

            o[from++] = HEXBYTES[c >> 4 & 0xf];
            o[from++] = HEXBYTES[c & 0xf];
        }
    }

    public static String byteArrayToString(byte[] b, String charset) {

        try {
            return (charset == null) ? new String(b)
                                     : new String(b, charset);
        } catch (Exception e) {}

        return null;
    }

    /**
     * Hsqldb specific encoding used only for log files. The SQL statements that
     * need to be written to the log file (input) are Java Unicode strings.
     * input is converted into a 7bit escaped ASCII string (output)with the
     * following transformations. All characters outside the 0x20-7f range are
     * converted to a escape sequence and added to output. If a backslash
     * character is immdediately followed by 'u', the backslash character is
     * converted to escape sequence and added to output. All the remaining
     * characters in input are added to output without conversion. The escape
     * sequence is backslash, letter u, xxxx, where xxxx is the hex
     * representation of the character code. (fredt@users)<p>
     *
     * Method based on Hypersonic Code
     *
     * @param b output stream to wite to
     * @param s Java string
     * @param doubleSingleQuotes boolean
     */
    public static void stringToUnicodeBytes(HsqlByteArrayOutputStream b,
            String s, boolean doubleSingleQuotes) {

        if (s == null) {
            return;
        }

        final int len = s.length();
        int       extras = 0;

        if (len == 0) {
            return;
        }

        b.ensureRoom(len * 2 + 5);

        for (int i = 0; i < len; i++) {
            char c = s.charAt(i);

            if (c == '\\') {
                if ((i < len - 1) && (s.charAt(i + 1) == 'u')) {
                    b.writeNoCheck(c);    // encode the \ as unicode, so 'u' is ignored
                    b.writeNoCheck('u');
                    b.writeNoCheck('0');
                    b.writeNoCheck('0');
                    b.writeNoCheck('5');
                    b.writeNoCheck('c');

                    extras += 5;
                } else {
                    b.write(c);
                }
            } else if ((c >= 0x0020) && (c <= 0x007f)) {
                b.writeNoCheck(c);        // this is 99%

                if (c == '\'' && doubleSingleQuotes) {
                    b.writeNoCheck(c);

                    extras++;
                }
            } else {
                b.writeNoCheck('\\');
                b.writeNoCheck('u');
                b.writeNoCheck(HEXBYTES[(c >> 12) & 0xf]);
                b.writeNoCheck(HEXBYTES[(c >> 8) & 0xf]);
                b.writeNoCheck(HEXBYTES[(c >> 4) & 0xf]);
                b.writeNoCheck(HEXBYTES[c & 0xf]);

                extras += 5;
            }

            if (extras > len) {
                b.ensureRoom(len + extras + 5);

                extras = 0;
            }
        }
    }

// fredt@users 20020522 - fix for 557510 - backslash bug
// this legacy bug resulted from forward reading the input when a backslash
// was present and manifested itself when a backslash was followed
// immdediately by a character outside the 0x20-7f range in a database field.

    /**
     * Hsqldb specific decoding used only for log files. This method converts
     * the 7 bit escaped ASCII strings in a log file back into Java Unicode
     * strings. See stringToUnicodeBytes() above. <p>
     *
     * Method based on Hypersonic Code
     *
     * @param s encoded ASCII string in byte array
     * @return Java string
     */
    public static String unicodeStringToString(String s) {

        if ((s == null) || (s.indexOf("\\u") == -1)) {
            return s;
        }

        int    len = s.length();
        char[] b   = new char[len];
        int    j   = 0;

        for (int i = 0; i < len; i++) {
            char c = s.charAt(i);

            if (c == '\\' && i < len - 5) {
                char c1 = s.charAt(i + 1);

                if (c1 == 'u') {
                    i++;

                    // 4 characters read should always return 0-15
                    int k = getNibble(s.charAt(++i)) << 12;

                    k      += getNibble(s.charAt(++i)) << 8;
                    k      += getNibble(s.charAt(++i)) << 4;
                    k      += getNibble(s.charAt(++i));
                    b[j++] = (char) k;
                } else {
                    b[j++] = c;
                }
            } else {
                b[j++] = c;
            }
        }

        return new String(b, 0, j);
    }

    public static String readUTF(byte[] bytearr, int offset,
                                 int length) throws IOException {

        char[] buf = new char[length];

        return readUTF(bytearr, offset, length, buf);
    }

    public static String readUTF(byte[] bytearr, int offset, int length,
                                 char[] buf) throws IOException {

        int bcount = 0;
        int c, char2, char3;
        int count = 0;

        while (count < length) {
            c = (int) bytearr[offset + count];

            if (bcount == buf.length) {
                buf = (char[]) ArrayUtil.resizeArray(buf, length);
            }

            if (c > 0) {

                /* 0xxxxxxx*/
                count++;

                buf[bcount++] = (char) c;

                continue;
            }

            c &= 0xff;

            switch (c >> 4) {

                case 12 :
                case 13 :

                    /* 110x xxxx   10xx xxxx*/
                    count += 2;

                    if (count > length) {
                        throw new UTFDataFormatException();
                    }

                    char2 = (int) bytearr[offset + count - 1];

                    if ((char2 & 0xC0) != 0x80) {
                        throw new UTFDataFormatException();
                    }

                    buf[bcount++] = (char) (((c & 0x1F) << 6)
                                            | (char2 & 0x3F));
                    break;

                case 14 :

                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
                    count += 3;

                    if (count > length) {
                        throw new UTFDataFormatException();
                    }

                    char2 = (int) bytearr[offset + count - 2];
                    char3 = (int) bytearr[offset + count - 1];

                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
                        throw new UTFDataFormatException();
                    }

                    buf[bcount++] = (char) (((c & 0x0F) << 12)
                                            | ((char2 & 0x3F) << 6)
                                            | ((char3 & 0x3F) << 0));
                    break;

                default :

                    /* 10xx xxxx,  1111 xxxx */
                    throw new UTFDataFormatException();
            }
        }

        // The number of chars produced may be less than length
        return new String(buf, 0, bcount);
    }

    /**
     * Writes a string to the specified DataOutput using UTF-8 encoding in a
     * machine-independent manner.
     * <p>
     * @param      str   a string to be written.
     * @param      out   destination to write to
     * @return     The number of bytes written out.
     */
    public static int stringToUTFBytes(String str,
                                       HsqlByteArrayOutputStream out) {

        int strlen = str.length();
        int c,
            count  = 0;

        if (out.count + strlen + 8 > out.buffer.length) {
            out.ensureRoom(strlen + 8);
        }

        char[] arr = str.toCharArray();

        for (int i = 0; i < strlen; i++) {
            c = arr[i];

            if (c >= 0x0001 && c <= 0x007F) {
                out.buffer[out.count++] = (byte) c;

                count++;
            } else if (c > 0x07FF) {
                out.buffer[out.count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                out.buffer[out.count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
                out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
                count                   += 3;
            } else {
                out.buffer[out.count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
                out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
                count                   += 2;
            }

            if (out.count + 8 > out.buffer.length) {
                out.ensureRoom(strlen - i + 8);
            }
        }

        return count;
    }

    public static int getUTFSize(String s) {

        int len = (s == null) ? 0
                              : s.length();
        int l   = 0;

        for (int i = 0; i < len; i++) {
            int c = s.charAt(i);

            if ((c >= 0x0001) && (c <= 0x007F)) {
                l++;
            } else if (c > 0x07FF) {
                l += 3;
            } else {
                l += 2;
            }
        }

        return l;
    }

    /**
     * Using an output stream, returns a String from an InputStream.
     *
     * @param is InputStream to read from
     * @param encoding character encoding of the string
     * @throws IOException
     * @return a Java string
     */
    public static String inputStreamToString(InputStream is,
            String encoding) throws IOException {

        HsqlByteArrayOutputStream baOS = new HsqlByteArrayOutputStream(1024);

        while (true) {
            int c = is.read();

            if (c == -1) {
                break;
            }

            baOS.write(c);
        }

        return new String(baOS.getBuffer(), 0, baOS.size(), encoding);
    }

// fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size

    /**
     * Returns the quoted version of the string using the quotechar argument.
     * doublequote argument indicates whether each instance of quotechar inside
     * the string is doubled.<p>
     *
     * null string argument returns null. If the caller needs the literal
     * "NULL" it should created it itself<p>
     *
     * @param s Java string
     * @param quoteChar character used for quoting
     * @param extraQuote true if quoteChar itself should be repeated
     * @return String
     */
    public static String toQuotedString(String s, char quoteChar,
                                        boolean extraQuote) {

        if (s == null) {
            return null;
        }

        int    count = extraQuote ? count(s, quoteChar)
                                  : 0;
        int    len   = s.length();
        char[] b     = new char[2 + count + len];
        int    i     = 0;
        int    j     = 0;

        b[j++] = quoteChar;

        for (; i < len; i++) {
            char c = s.charAt(i);

            b[j++] = c;

            if (extraQuote && c == quoteChar) {
                b[j++] = c;
            }
        }

        b[j] = quoteChar;

        return new String(b);
    }

    /**
     * Counts Character c in String s
     *
     * @param s Java string
     * @param c character to count
     * @return int count
     */
    static int count(final String s, final char c) {

        int pos   = 0;
        int count = 0;

        if (s != null) {
            while ((pos = s.indexOf(c, pos)) > -1) {
                count++;
                pos++;
            }
        }

        return count;
    }

    /**
     * Converts the string to an HTML representation in the ASCII character set
     * and appends it to a byte array output stream.
     *
     * @param b the output byte array output stream
     * @param s the input string
     */
    public static void stringToHtmlBytes(HsqlByteArrayOutputStream b,
                                         String s) {

        if (s == null) {
            return;
        }

        final int len = s.length();
        char[]    chars;

        if (len == 0) {
            return;
        }

        chars = s.toCharArray();

        b.ensureRoom(len);

        for (int i = 0; i < len; i++) {
            char c = chars[i];

            if (c > 0x007f || c == '"' || c == '&' || c == '<' || c == '>') {
                int codePoint = Character.codePointAt(chars, i);

                if (Character.charCount(codePoint) == 2) {
                    i++;
                }

                b.ensureRoom(16);
                b.writeNoCheck('&');
                b.writeNoCheck('#');
                b.writeBytes(String.valueOf(codePoint));
                b.writeNoCheck(';');
            } else if (c < 0x0020 ) {
                b.writeNoCheck(' ');
            } else {
                b.writeNoCheck(c);
            }
        }
    }

    /**
     * Returns a string representation in UUID form from a binary string.
     *
     * UUID string is composed of 8-4-4-4-12 hexadecimal characters.
     *
     * @param b the byte array
     * @return UUID string form
     */
    public static String toStringUUID(byte[] b) {

        char[] chars = new char[36];
        int    hexIndex;

        if (b == null) {
            return null;
        }

        if (b.length != 16) {
            throw new NumberFormatException();
        }

        for (int i = 0, j = 0; i < b.length; ) {
            hexIndex   = (b[i] & 0xf0) >> 4;
            chars[j++] = (char) HEXBYTES[hexIndex];
            hexIndex   = b[i] & 0xf;
            chars[j++] = (char) HEXBYTES[hexIndex];

            i++;

            if (i >= 4 && i <= 10 && (i % 2) == 0) {
                chars[j++] = '-';
            }
        }

        return new String(chars);
    }

    /**
     * Returns a byte[] representation in UUID form from a UUID string.
     *
     * @param s the UUID string
     * @return byte array
     */
    public static byte[] toBinaryUUID(String s) {

        byte[] bytes = new byte[16];

        if (s == null) {
            return null;
        }

        if (s.length() != 36) {
            throw new NumberFormatException();
        }

        for (int i = 0, j = 0; i < bytes.length; ) {
            char c    = s.charAt(j++);
            int  high = getNibble(c);

            c        = s.charAt(j++);
            bytes[i] = (byte) ((high << 4) + getNibble(c));

            i++;

            if (i >= 4 && i <= 10 && (i % 2) == 0) {
                c = s.charAt(j++);

                if (c != '-') {}
            }
        }

        return bytes;
    }
}
TOP

Related Classes of org.hsqldb.lib.StringConverter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.