Package org.openjena.atlas.io

Source Code of org.openjena.atlas.io.TestBlockUTF8$Alloc

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.openjena.atlas.io;

import java.io.ByteArrayOutputStream ;
import java.io.IOException ;
import java.io.OutputStreamWriter ;
import java.io.Writer ;
import java.nio.Buffer ;
import java.nio.ByteBuffer ;
import java.nio.CharBuffer ;
import java.nio.charset.Charset ;
import java.nio.charset.CharsetDecoder ;
import java.nio.charset.CharsetEncoder ;

import org.junit.Test ;
import org.openjena.atlas.io.BlockUTF8 ;
import org.openjena.atlas.junit.BaseTest ;
import org.openjena.atlas.lib.Chars ;

public class TestBlockUTF8 extends BaseTest
{
    // Need array and non-array versions.
   
    static Charset utf8 = Chars.charsetUTF8 ;
    static CharsetDecoder dec = utf8.newDecoder() ;
    static CharsetEncoder enc = utf8.newEncoder() ;

    // UTF-8 encoding.
    // character '¢' = code point U+00A2 -> C2 A2
    // character '€' = code point U+20AC -> E2 82 AC

    static private final String asciiBase             = "abc" ;
    static private final String latinBase             = "Àéíÿ" ;
    static private final String latinExtraBase        = "ỹfifl" // fi-ligature, fl-ligature
    static private final String greekBase             = "αβγ" ;
    static private final String hewbrewBase           = "אבג" ;
    static private final String arabicBase            = "ءآأ";
    static private final String symbolsBase           = "☺☻♪♫" ;
    static private final String chineseBase           = "孫子兵法" ; // The Art of War
    static private final String japaneseBase          = "日本" ;    // Japanese
    static private final String binaryStr1            = "abc\uD800xyz" ;    // A single surrogate, without it's pair.
    static private final String binaryStr2            = "\uD800" ;          // A single surrogate, without it's pair.
    static private final String binaryStr3            = "\u0000" ;          // A zero character 

    static private final byte[] binaryBytes1 = {} ;        
    static private final byte[] binaryBytes2 = { (byte)0x00 } ;             // Java encoding of 0 codepoint is 0        
    static private final byte[] binaryBytes3 = { (byte)0xC0, (byte)0x80 } ;     // Modifed unicode zero codepoint.        

    @Test public void convert_in_00() { testIn("") ; }
    @Test public void convert_in_01() { testIn(asciiBase) ; }
    @Test public void convert_in_02() { testIn(latinBase) ; }
    @Test public void convert_in_03() { testIn(latinExtraBase) ; }
    @Test public void convert_in_04() { testIn(greekBase) ; }
    @Test public void convert_in_05() { testIn(hewbrewBase) ; }
    @Test public void convert_in_06() { testIn(arabicBase) ; }
    @Test public void convert_in_07() { testIn(symbolsBase) ; }
    @Test public void convert_in_08() { testIn(chineseBase) ; }
    @Test public void convert_in_09() { testIn(japaneseBase) ; }
    @Test public void convert_in_10() { testInOutBinary(binaryStr1) ;
    @Test public void convert_in_11() { testInOutBinary(binaryStr2) ;
    @Test public void convert_in_12() { testInOutBinary(binaryStr3) ;

    @Test public void convert_out_00() { testOut("") ; }
    @Test public void convert_out_01() { testOut(asciiBase) ; }
    @Test public void convert_out_02() { testOut(latinBase) ; }
    @Test public void convert_out_03() { testOut(latinExtraBase) ; }
    @Test public void convert_out_04() { testOut(greekBase) ; }
    @Test public void convert_out_05() { testOut(hewbrewBase) ; }
    @Test public void convert_out_06() { testOut(arabicBase) ; }
    @Test public void convert_out_07() { testOut(symbolsBase) ; }
    @Test public void convert_out_08() { testOut(chineseBase) ; }
    @Test public void convert_out_09() { testOut(japaneseBase) ; }
    @Test public void convert_out_10() { testOut(binaryStr1) ; }
    @Test public void convert_out_11() { testOut(binaryStr2) ; }
    @Test public void convert_out_12() { testOut(binaryStr3) ; }

    // While it is key is chars->bytes-chars, we also test bytes->bytes
    @Test public void binary_01() { testBinary(binaryBytes1) ; }
    @Test public void binary_02() { testBinary(binaryBytes2) ; }
    @Test public void binary_03() { testBinary(binaryBytes3, binaryBytes2) ; }

    @Test public void binary_10() { testBinary(binaryBytes2, CharBuffer.wrap(binaryStr3)) ; }
    @Test public void binary_11() { testBinary(binaryBytes3, CharBuffer.wrap(binaryStr3)) ; }

   
    static void testIn(String x)
    {
        testIn(x, allocByteBufferArray, allocCharBufferArray) ;
        testIn(x, allocByteBufferDirect, allocCharBufferDirect) ;

    }
    static void testIn(String x, Alloc<ByteBuffer> allocBB, Alloc<CharBuffer> allocCB)
    {
        // Test as binary.
        testInOutBinary(x) ;

        // Now test, comparing to std Java.
        // Correct answer, in bytes
        ByteBuffer bytes = ByteBuffer.wrap(stringAsBytes(x)) ;
        // To bytes.stringAsBytes
        int N = x.length() ;
        CharBuffer cb = CharBuffer.wrap(x.toCharArray()) ;
        ByteBuffer bb = allocBB.allocate(4*N) ;
        BlockUTF8.fromChars(cb, bb) ;
        bb.flip() ;

        assertTrue("Bytes", sameBytes(bytes, bb)) ;
        // From bytes.
        CharBuffer cb2 = allocCB.allocate(N) ;
        BlockUTF8.toChars(bb, cb2) ;
        cb2.flip() ;
        String str = cb2.toString() ;
        assertEquals(x, str) ;
    }

    // Tesing, but not against what Java would do (it replaces bad chars, we want binary).
    static void testInOutBinary(String x)
    {
        int N = x.length() ;
        CharBuffer cb = CharBuffer.wrap(x.toCharArray()) ;
        ByteBuffer bb = ByteBuffer.allocate(4*N) ;
        BlockUTF8.fromChars(cb, bb) ;
        bb.flip() ;
        CharBuffer cb2 = CharBuffer.allocate(N) ;
        BlockUTF8.toChars(bb, cb2) ;
        // compare cb and cb2.
        String str = new String(cb2.array(), 0, cb2.position()) ;
        assertEquals(x, str) ;

        // And re-code as bytes.
        CharBuffer cb3 = CharBuffer.wrap(x.toCharArray()) ;
        ByteBuffer bb3 = ByteBuffer.allocate(4*N) ;
        BlockUTF8.fromChars(cb3, bb3) ;
        bb3.flip() ;
        assertArrayEquals(bb.array(), bb3.array()) ;
    }

    static void testOut(String x)
    {
        testOut(x, allocByteBufferArray, allocCharBufferArray) ;
        testOut(x, allocByteBufferDirect, allocCharBufferDirect) ;
    }
   
    static interface Alloc<T extends Buffer> { T allocate(int len) ; }
    static Alloc<ByteBuffer> allocByteBufferArray = new Alloc<ByteBuffer>() {
        @Override public ByteBuffer allocate(int len) { return ByteBuffer.allocate(len) ; }
     } ;
     static Alloc<ByteBuffer> allocByteBufferDirect = new Alloc<ByteBuffer>() {
         @Override public ByteBuffer allocate(int len) { return ByteBuffer.allocateDirect(len) ; }
     } ;
     static Alloc<CharBuffer> allocCharBufferArray = new Alloc<CharBuffer>() {
         @Override public CharBuffer allocate(int len) { return CharBuffer.allocate(len) ; }
      } ;
      static Alloc<CharBuffer> allocCharBufferDirect = new Alloc<CharBuffer>() {
          @Override public CharBuffer allocate(int len) { return ByteBuffer.allocateDirect(2*len).asCharBuffer() ; }
    } ;
   
    static void testOut(String x, Alloc<ByteBuffer> allocBB, Alloc<CharBuffer> allocCB)
    {
        testBinary(stringAsBytes(x)) ;

        int N = x.length() ;
        // First - get bytes the Java way.
        ByteBuffer bytes = ByteBuffer.wrap(stringAsBytes(x)) ;
        CharBuffer cb = allocCB.allocate(N) ;

        BlockUTF8.toChars(bytes, cb) ;
        cb.flip() ;
        bytes.flip() ;

        String str = cb.toString() ;
        ByteBuffer bytes2 = allocBB.allocate(bytes.capacity()) ;
        BlockUTF8.fromChars(cb, bytes2) ;
        bytes2.flip() ;
        assertTrue("Chars", sameBytes(bytes, bytes2)) ;
    }

    static void testBinary(byte[] binary, CharBuffer chars)
    {
        int N = binary.length ;
        ByteBuffer bytes = ByteBuffer.wrap(binary) ;
        CharBuffer cb = CharBuffer.allocate(N) ;
        BlockUTF8.toChars(bytes, cb) ;
        cb.flip() ;
        assertTrue("Binary", sameChars(chars, cb));
    }

    static void testBinary(byte[] binary)
    {
        testBinary(binary, binary) ;
    }

    static void testBinary(byte[] binary, byte[] expected)
    {
        int N = binary.length ;
        ByteBuffer bytes = ByteBuffer.wrap(binary) ;
        CharBuffer cb = CharBuffer.allocate(N) ;
        BlockUTF8.toChars(bytes, cb) ;
        cb.flip() ;
        bytes.position(0) ;
        ByteBuffer bytes2 = ByteBuffer.allocate(2*N) // Null bytes get expanded.
        BlockUTF8.fromChars(cb, bytes2) ;
        bytes2.flip() ;
        sameBytes(bytes, bytes2) ;
        assertTrue("Binary", sameBytes(ByteBuffer.wrap(expected), bytes2)) ;
    }

    // Does not move position.
    static boolean sameBytes(ByteBuffer bb1, ByteBuffer bb2)
    {
        if ( bb1.remaining() != bb2.remaining() ) return false ;
   
        for ( int i = 0 ; i < bb1.remaining() ; i++ )
            if ( bb1.get(i+bb1.position()) != bb2.get(i+bb2.position()) ) return false ;
        return true ;
    }
    // Does not move position.
    static boolean sameChars(CharBuffer cb1, CharBuffer cb2)
    {
        if ( cb1.remaining() != cb2.remaining() ) return false ;
   
        for ( int i = 0 ; i < cb1.remaining() ; i++ )
            if ( cb1.get(i+cb1.position()) != cb2.get(i+cb2.position()) ) return false ;
        return true ;
    }
    static byte[] stringAsBytes(String x)
    {
        try {
            ByteArrayOutputStream bout = new ByteArrayOutputStream() ;
            Writer out = new OutputStreamWriter(bout, utf8) ;
            out.write(x) ;
            out.close() ;
            byte[] bytes = bout.toByteArray() ;
            return bytes ;
        } catch (IOException ex) { throw new RuntimeException(ex) ; }
    }
}
TOP

Related Classes of org.openjena.atlas.io.TestBlockUTF8$Alloc

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.