Package org.gedcom4j.io

Source Code of org.gedcom4j.io.GedcomFileWriterTest

/*
* Copyright (c) 2009-2014 Matthew R. Harrah
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package org.gedcom4j.io;

import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.gedcom4j.model.Gedcom;
import org.gedcom4j.model.StringWithCustomTags;
import org.gedcom4j.model.Submission;
import org.gedcom4j.model.Submitter;
import org.gedcom4j.writer.GedcomWriter;
import org.gedcom4j.writer.GedcomWriterException;
import org.junit.Test;

/**
* Test for {@link GedcomFileWriter}. Doesn't actually test writing GEDCOM data per se, but tests reading and writing
* various encodings (including ANSEL which has no direct Java support) and ensuring that non ASCII characters are
* handled appropriately (which for most cases means preserving the characters).
*
* @author frizbog1
*/
public class GedcomFileWriterTest {

    /**
     * Test when there is no data
     */
    @Test
    public void testEmptyLines() {
        List<String> lines = new ArrayList<String>();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        assertNotNull(gfw.terminator);
    }

    /**
     * Test encoding detection when ANSEL is explicitly asked for
     *
     * @throws IOException
     *             if anything goes wrong with the writing of the data
     */
    @Test
    public void testEncodingDetectionAnselExplicit() throws IOException {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR ANSEL");
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof AnselWriter);
    }

    /**
     * Test encoding detection for ASCII
     *
     * @throws IOException
     *             if anything goes wrong with the writing of the data
     */
    @Test
    public void testEncodingDetectionAscii() throws IOException {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR ASCII");
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.useLittleEndianForUnicode = true;
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof AsciiWriter);
        // Changing little-endian flag should have no effect since it's not unicode
        gfw.useLittleEndianForUnicode = false;
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof AsciiWriter);
    }

    /**
     * Test encoding detection when no format is explicitly asked for
     *
     * @throws IOException
     */
    @Test
    public void testEncodingDetectionDefault() throws IOException {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof AnselWriter);
        // Changing little-endian flag should have no effect since it's not unicode
        gfw.useLittleEndianForUnicode = false;
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof AnselWriter);
    }

    /**
     * Test encoding detection for UNICODE
     *
     * @throws IOException
     */
    @Test
    public void testEncodingDetectionUnicode() throws IOException {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR UNICODE");
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof UnicodeLittleEndianWriter);
        gfw.useLittleEndianForUnicode = false;
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof UnicodeBigEndianWriter);
    }

    /**
     * Test encoding detection for UTF-8
     *
     * @throws IOException
     */
    @Test
    public void testEncodingDetectionUtf8() throws IOException {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR UTF-8");
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof Utf8Writer);
        // Changing little-endian flag should have no effect since it's not unicode
        gfw.useLittleEndianForUnicode = false;
        gfw.write(new NullOutputStream());
        assertTrue(gfw.encodingSpecificWriter instanceof Utf8Writer);
    }

    /**
     * Test writing out ANSEL bytes with CRLF line terminators. Includes an unmappable character in line 3, and a
     * mappable extended character in line 4
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAnselCrLf() throws IOException {
        List<String> lines = getAnselGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.CRLF;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0D,
                0x0A,
                // End of line 1
                0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x4E, 0x53, 0x45, 0x4C, 0x0D,
                0x0A,
                // End of line 2
                0x40, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20, 0x61,
                0x6E, 0x73, 0x65, 0x6C, 0x0D, 0x0A,
                // End of line 3
                (byte) 0xA1, 0x20, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20, 0x61, 0x6E,
                0x73, 0x65, 0x6C, 0x0D, 0x0A,
                // End of line 4
                0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0D, 0x0A,
        // End of line 5
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out ANSEL bytes with only CR line terminators. Includes an unmappable character in line 3, and a
     * mappable extended character in line 4
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAnselCrOnly() throws IOException {
        List<String> lines = getAnselGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.CR_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44,
                        0x0D,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x4E, 0x53, 0x45, 0x4C,
                        0x0D,
                        // End of line 2
                        0x40, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20,
                        0x61, 0x6E, 0x73, 0x65, 0x6C, 0x0D,
                        // End of line 3
                        (byte) 0xA1, 0x20, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20,
                        0x61, 0x6E, 0x73, 0x65, 0x6C, 0x0D,
                        // End of line 4
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0D,
                // End of line 5
                        }, out.toByteArray()));
    }

    /**
     * Test writing out ANSEL bytes with LFCR line terminators. Includes an unmappable character in line 3, and a
     * mappable extended character in line 4
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAnselLfCr() throws IOException {
        List<String> lines = getAnselGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.LFCR;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0A,
                0x0D,
                // End of line 1
                0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x4E, 0x53, 0x45, 0x4C, 0x0A,
                0x0D,
                // End of line 2
                0x40, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20, 0x61,
                0x6E, 0x73, 0x65, 0x6C, 0x0A, 0x0D,
                // End of line 3
                (byte) 0xA1, 0x20, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20, 0x61, 0x6E,
                0x73, 0x65, 0x6C, 0x0A, 0x0D,
                // End of line 4
                0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0A, 0x0D,
        // End of line 5
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out ANSEL bytes with LF line terminators. Includes an unmappable character in line 3, and a mappable
     * extended character in line 4
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAnselLfOnly() throws IOException {
        List<String> lines = getAnselGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.LF_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44,
                        0x0A,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x4E, 0x53, 0x45, 0x4C,
                        0x0A,
                        // End of line 2
                        0x40, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20,
                        0x61, 0x6E, 0x73, 0x65, 0x6C, 0x0A,
                        // End of line 3
                        (byte) 0xA1, 0x20, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69, 0x6E, 0x20,
                        0x61, 0x6E, 0x73, 0x65, 0x6C, 0x0A,
                        // End of line 4
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0A,
                // End of line 5
                        }, out.toByteArray()));
    }

    /**
     * Test writing out ASCII bytes with CRLF line terminators. Includes an unmappable character in line 3.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAsciiCrLf() throws IOException {
        List<String> lines = getAsciiGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.CRLF;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0D, 0x0A,
                // End of line 1
                0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x0D, 0x0A,
                // End of line 2
                /*
                 * The unmappable character , shown here as a question mark
                 */
                0x3F, 0x20, 0x69, 0x73, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69,
                0x6E, 0x20, 0x61, 0x73, 0x63, 0x69, 0x69, 0x0D, 0x0A,
                // End of line 3
                0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0D, 0x0A,
        // End of line 4
        };

        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out ASCII bytes with CR-only line terminators. Includes an unmappable character in line 3.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAsciiCrOnly() throws IOException {
        List<String> lines = getAsciiGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.CR_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0D,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x0D,
                        // End of line 2
                        /*
                         * The unmappable character , shown here as a question mark
                         */
                        0x3F, 0x20, 0x69, 0x73, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20,
                        0x69, 0x6E, 0x20, 0x61, 0x73, 0x63, 0x69, 0x69, 0x0D,
                        // End of line
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0D,
                // End of line
                        }, out.toByteArray()));
    }

    /**
     * Test writing out ASCII bytes with LFCR line terminators. Includes an unmappable character in line 3.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAsciiLfCr() throws IOException {
        List<String> lines = getAsciiGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.LFCR;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0A, 0x0D,
                // End of line 1
                0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x0A, 0x0D,
                // End of line 2
                /*
                 * The unmappable character , shown here as a question mark
                 */
                0x3F, 0x20, 0x69, 0x73, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x69,
                0x6E, 0x20, 0x61, 0x73, 0x63, 0x69, 0x69, 0x0A, 0x0D,
                // End of line 3
                0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0A, 0x0D,
        // End of line 4
        };

        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out ASCII bytes with LF-only line terminators. Includes an unmappable character in line 3.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputAsciiLfOnly() throws IOException {
        List<String> lines = getAsciiGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.LF_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0A,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x0A,
                        // End of line 2
                        /*
                         * The unmappable character , shown here as a question mark
                         */
                        0x3F, 0x20, 0x69, 0x73, 0x20, 0x75, 0x6E, 0x6D, 0x61, 0x70, 0x70, 0x61, 0x62, 0x6C, 0x65, 0x20,
                        0x69, 0x6E, 0x20, 0x61, 0x73, 0x63, 0x69, 0x69, 0x0A,
                        // End of line 3
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0A,
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out big-endian unicode bytes, using CR-only line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeBigEndianCrLF() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.useLittleEndianForUnicode = false;
        gfw.terminator = LineTerminator.CRLF;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x00, 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44,
                        0x00, 0x0D, 0x00,
                        0x0A, // End of line 1
                        0x00, 0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55,
                        0x00, 0x4E, 0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0D, 0x00, 0x0A,
                        // End of line 2
                        0x00, 0x41, 0x00, (byte) 0xC4, // Capital A, Capital A-umlaut
                        0x00, 0x61, 0x00, (byte) 0xE4, // lowercase a, lowercase a-umlaut
                        0x00, 0x0D, 0x00, 0x0A,
                        // End of line 3
                        0x00, 0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0D, 0x00, 0x0A,
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out big-endian unicode bytes, using CR-only line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeBigEndianCrOnly() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.useLittleEndianForUnicode = false;
        gfw.terminator = LineTerminator.CR_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x00, 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44,
                        0x00, 0x0D,
                        // End of line 1
                        0x00, 0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55,
                        0x00, 0x4E, 0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0D,
                        // End of line 2
                        0x00, 0x41, 0x00, (byte) 0xC4, // Capital A, Capital A-umlaut
                        0x00, 0x61, 0x00, (byte) 0xE4, // lowercase a, lowercase a-umlaut
                        0x00, 0x0D,
                        // End of line 3
                        0x00, 0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0D,
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out big-endian unicode bytes, using LFCR line terminators.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeBigEndianLfCr() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.useLittleEndianForUnicode = false;
        gfw.terminator = LineTerminator.LFCR;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x00, 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44, 0x00,
                0x0A, 0x00, 0x0D,
                // End of line 1
                0x00, 0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55, 0x00,
                0x4E, 0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0A, 0x00, 0x0D,
                // End of line 2
                0x00, 0x41, 0x00, (byte) 0xC4, // Capital A, Capital A-umlaut
                0x00, 0x61, 0x00, (byte) 0xE4, // lowercase a, lowercase a-umlaut
                0x00, 0x0A, 0x00, 0x0D, // LFCR
                // End of line 3
                0x00, 0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0A, 0x00, 0x0D,
        // End of line 4
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out big-endian unicode bytes, using CR-only line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeBigEndianLfOnly() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.useLittleEndianForUnicode = false;
        gfw.terminator = LineTerminator.LF_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue(
                "Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x00, 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44,
                        0x00, 0x0A,
                        // End of line 1
                        0x00, 0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55,
                        0x00, 0x4E, 0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0A,
                        // End of line 2
                        0x00, 0x41, 0x00, (byte) 0xC4, // Capital A, Capital A-umlaut
                        0x00, 0x61, 0x00, (byte) 0xE4, // lowercase a, lowercase a-umlaut
                        0x00, 0x0A, // LF
                        // End of line 3
                        0x00, 0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0A,
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out little-endian unicode bytes, using CRLF line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeLittleEndianCrLf() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);

        // Not necessary, little endian is default, but good for explicitness
        gfw.useLittleEndianForUnicode = true;
        gfw.terminator = LineTerminator.CRLF;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44, 0x00, 0x0D,
                0x00, 0x0A, 0x00,
                // End of line 1
                0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55, 0x00, 0x4E,
                0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0D, 0x00, 0x0A, 0x00,
                // End of line 2
                0x41, 0x00, (byte) 0xC4, 0x00, // Capital A, Capital A-umlaut
                0x61, 0x00, (byte) 0xE4, 0x00, // lowercase a, lowercase a-umlaut
                0x0D, 0x00, 0x0A, 0x00, // CRLF
                // End of line 3
                0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0D, 0x00, 0x0A, 0x00
        // End of line 4
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out little-endian unicode bytes, using CR only line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeLittleEndianCrOnly() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);

        // Not necessary, little endian is default, but good for explicitness
        gfw.useLittleEndianForUnicode = true;
        gfw.terminator = LineTerminator.CR_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44, 0x00, 0x0D,
                0x00,
                // End of line 1
                0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55, 0x00, 0x4E,
                0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0D, 0x00,
                // End of line 2
                0x41, 0x00, (byte) 0xC4, 0x00, // Capital A, Capital A-umlaut
                0x61, 0x00, (byte) 0xE4, 0x00, // lowercase a, lowercase a-umlaut
                0x0D, 0x00, // CR
                // End of line 3
                0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0D, 0x00
        // End of line 4
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out little-endian unicode bytes, using LFCR line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeLittleEndianLfCr() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);

        // Not necessary, little endian is default, but good for explicitness
        gfw.useLittleEndianForUnicode = true;
        gfw.terminator = LineTerminator.LFCR;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44, 0x00, 0x0A,
                0x00, 0x0D, 0x00,
                // End of line 1
                0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55, 0x00, 0x4E,
                0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0A, 0x00, 0x0D, 0x00,
                // End of line 2
                0x41, 0x00, (byte) 0xC4, 0x00, // Capital A, Capital A-umlaut
                0x61, 0x00, (byte) 0xE4, 0x00, // lowercase a, lowercase a-umlaut
                0x0A, 0x00, 0x0D, 0x00, // LFCR
                // End of line 3
                0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0A, 0x00, 0x0D, 0x00
        // End of line 4
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out little-endian unicode bytes, using LF only line terminators
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUnicodeLittleEndianLfOnly() throws IOException {
        List<String> lines = getUnicodeGedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);

        // Not necessary, little endian is default, but good for explicitness
        gfw.useLittleEndianForUnicode = true;
        gfw.terminator = LineTerminator.LF_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        byte[] expected = new byte[] { 0x30, 0x00, 0x20, 0x00, 0x48, 0x00, 0x45, 0x00, 0x41, 0x00, 0x44, 0x00, 0x0A,
                0x00,
                // End of line 1
                0x31, 0x00, 0x20, 0x00, 0x43, 0x00, 0x48, 0x00, 0x41, 0x00, 0x52, 0x00, 0x20, 0x00, 0x55, 0x00, 0x4E,
                0x00, 0x49, 0x00, 0x43, 0x00, 0x4F, 0x00, 0x44, 0x00, 0x45, 0x00, 0x0A, 0x00,
                // End of line 2
                0x41, 0x00, (byte) 0xC4, 0x00, // Capital A, Capital A-umlaut
                0x61, 0x00, (byte) 0xE4, 0x00, // lowercase a, lowercase a-umlaut
                0x0A, 0x00,
                // End of line 3
                0x30, 0x00, 0x20, 0x00, 0x54, 0x00, 0x52, 0x00, 0x4C, 0x00, 0x52, 0x00, 0x0A, 0x00
        // End of line 4
        };
        assertTrue("Output bytes are not the expected value", Arrays.equals(expected, out.toByteArray()));
    }

    /**
     * Test writing out UTF_8 bytes with CRLF line terminators.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUtf8CrLf() throws IOException {
        List<String> lines = getUtf8GedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.CRLF;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue("Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0D, 0x0A,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x55, 0x54, 0x46, 0x2D, 0x38, 0x0D, 0x0A,
                        // End of line 2
                        0x41, (byte) 0xC3, (byte) 0x84, // Capital A, Capital A-umlaut
                        0x61, (byte) 0xC3, (byte) 0xA4, // lowercase a, lowercase a-umlaut
                        0x0D, 0x0A,
                        // End of line 3
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0D, 0x0A
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out UTF_8 bytes with only CR line terminators.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUtf8CrOnly() throws IOException {
        List<String> lines = getUtf8GedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.CR_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue("Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0D,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x55, 0x54, 0x46, 0x2D, 0x38, 0x0D,
                        // End of line 2
                        0x41, (byte) 0xC3, (byte) 0x84, // Capital A, Capital A-umlaut
                        0x61, (byte) 0xC3, (byte) 0xA4, // lowercase a, lowercase a-umlaut
                        0x0D,
                        // End of line 3
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0D
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out UTF_8 bytes with LFCR line terminators. 4
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUtf8LfCr() throws IOException {
        List<String> lines = getUtf8GedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.LFCR;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue("Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0A, 0x0D,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x55, 0x54, 0x46, 0x2D, 0x38, 0x0A, 0x0D,
                        // End of line 2
                        0x41, (byte) 0xC3, (byte) 0x84, // Capital A, Capital A-umlaut
                        0x61, (byte) 0xC3, (byte) 0xA4, // lowercase a, lowercase a-umlaut
                        0x0A, 0x0D,
                        // End of line 3
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0A, 0x0D
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing out UTF_8 bytes with LF line terminators.
     *
     * @throws IOException
     *             if the data can't be written
     */
    @Test
    public void testOutputUtf8LfOnly() throws IOException {
        List<String> lines = getUtf8GedcomLines();
        GedcomFileWriter gfw = new GedcomFileWriter(lines);
        gfw.terminator = LineTerminator.LF_ONLY;

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        gfw.write(out);
        out.close();

        assertTrue("Output bytes are not the expected value",
                Arrays.equals(new byte[] { 0x30, 0x20, 0x48, 0x45, 0x41, 0x44, 0x0A,
                        // End of line 1
                        0x31, 0x20, 0x43, 0x48, 0x41, 0x52, 0x20, 0x55, 0x54, 0x46, 0x2D, 0x38, 0x0A,
                        // End of line 2
                        0x41, (byte) 0xC3, (byte) 0x84, // Capital A, Capital A-umlaut
                        0x61, (byte) 0xC3, (byte) 0xA4, // lowercase a, lowercase a-umlaut
                        0x0A,
                        // End of line 3
                        0x30, 0x20, 0x54, 0x52, 0x4C, 0x52, 0x0A
                // End of line 4
                        }, out.toByteArray()));
    }

    /**
     * Test writing a gedcom to a file by its filename
     *
     * @throws GedcomWriterException
     *             if there is a writing failure
     * @throws IOException
     *             if there is an io failure
     */
    @Test
    public void testWriteFileWithName() throws IOException, GedcomWriterException {
        String fn = System.getProperty("java.io.tmpdir") + System.getProperty("file.separator")
                + "gedcomfilewritertest.ged";
        Gedcom g = new Gedcom();
        g.submission = new Submission("@SUBN0001@");
        g.header.submission = g.submission;
        Submitter s = new Submitter();
        s.xref = "@SUBM0001@";
        s.name = new StringWithCustomTags("Joe Tester");
        g.submitters.put(s.xref, s);
        g.header.submitter = s;
        GedcomWriter gw = new GedcomWriter(g);
        gw.write(fn);
    }

    /**
     * Get a List of strings representing GEDCOM text for an ANSEL-encoded file. Note that the lines do NOT represent a
     * well-formed GEDCOM, but we're only testing the encoding.
     *
     * @return a list of strings representing GEDCOM text for a ANSEL-encoded file
     */
    private List<String> getAnselGedcomLines() {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR ANSEL");
        lines.add("\u0140 unmappable in ansel");
        lines.add("\u0141 mappable in ansel");
        lines.add("0 TRLR");
        return lines;
    }

    /**
     * Get a List of strings representing GEDCOM text for an ASCII-encoded file. Note that the lines do NOT represent a
     * well-formed GEDCOM, but we're only testing the encoding.
     *
     * @return a list of strings representing GEDCOM text for an ASCII-encoded file
     */
    private List<String> getAsciiGedcomLines() {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR ASCII");
        lines.add("\u0141 is unmappable in ascii");
        lines.add("0 TRLR");
        return lines;
    }

    /**
     * Get a List of strings representing GEDCOM text for a Unicode-encoded file. Note that the lines do NOT represent a
     * well-formed GEDCOM, but we're only testing the encoding.
     *
     * @return a list of strings representing GEDCOM text for a Unicode-encoded file
     */
    private List<String> getUnicodeGedcomLines() {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR UNICODE");
        lines.add("A\u00C4a\u00E4"); // Capital A, capital A-umlaut, lowercase a, lowercase a-umlaut
        lines.add("0 TRLR");
        return lines;
    }

    /**
     * Get a List of strings representing GEDCOM text for a UTF-8-encoded file. Note that the lines do NOT represent a
     * well-formed GEDCOM, but we're only testing the encoding.
     *
     * @return a list of strings representing GEDCOM text for a UTF-8-encoded file
     */
    private List<String> getUtf8GedcomLines() {
        List<String> lines = new ArrayList<String>();
        lines.add("0 HEAD");
        lines.add("1 CHAR UTF-8");
        lines.add("A\u00C4a\u00E4"); // Capital A, capital A-umlaut, lowercase a, lowercase a-umlaut
        lines.add("0 TRLR");
        return lines;
    }

}
TOP

Related Classes of org.gedcom4j.io.GedcomFileWriterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.