Package org.apache.hcatalog.pig

Source Code of org.apache.hcatalog.pig.TestHCatLoaderStorer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hcatalog.pig;

import org.apache.hadoop.fs.FileUtil;
import org.apache.hcatalog.HcatTestUtils;
import org.apache.hcatalog.mapreduce.HCatBaseTest;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecJob;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.junit.Assert;
import org.junit.Test;

import java.io.File;
import java.util.Iterator;
import java.util.List;

/**
* Test that require both HCatLoader and HCatStorer. For read or write only functionality,
* please consider @{link TestHCatLoader} or @{link TestHCatStorer}.
*/
public class TestHCatLoaderStorer extends HCatBaseTest {

    /**
     * Ensure Pig can read/write tinyint/smallint columns.
     */
    @Test
    public void testSmallTinyInt() throws Exception {

        String readTblName = "test_small_tiny_int";
        File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData");
        File dataFile = new File(dataDir, "testSmallTinyInt.tsv");

        String writeTblName = "test_small_tiny_int_write";
        File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv");

        FileUtil.fullyDelete(dataDir); // Might not exist
        Assert.assertTrue(dataDir.mkdir());

        HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{
            String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE),
            String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE)
        });

        // Create a table with smallint/tinyint columns, load data, and query from Hive.
        Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode());
        Assert.assertEquals(0, driver.run("create external table " + readTblName +
            " (my_small_int smallint, my_tiny_int tinyint)" +
            " row format delimited fields terminated by '\t' stored as textfile").getResponseCode());
        Assert.assertEquals(0, driver.run("load data local inpath '" +
            dataDir.getAbsolutePath() + "' into table " + readTblName).getResponseCode());

        PigServer server = new PigServer(ExecType.LOCAL);
        server.registerQuery(
            "data = load '" + readTblName + "' using org.apache.hcatalog.pig.HCatLoader();");

        // Ensure Pig schema is correct.
        Schema schema = server.dumpSchema("data");
        Assert.assertEquals(2, schema.getFields().size());
        Assert.assertEquals("my_small_int", schema.getField(0).alias);
        Assert.assertEquals(DataType.INTEGER, schema.getField(0).type);
        Assert.assertEquals("my_tiny_int", schema.getField(1).alias);
        Assert.assertEquals(DataType.INTEGER, schema.getField(1).type);

        // Ensure Pig can read data correctly.
        Iterator<Tuple> it = server.openIterator("data");
        Tuple t = it.next();
        Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0));
        Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1));
        t = it.next();
        Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0));
        Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1));
        Assert.assertFalse(it.hasNext());

        // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the
        // bounds of the column type are written, and values outside throw an exception.
        Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode());
        Assert.assertEquals(0, driver.run("create table " + writeTblName +
            " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode());

        // Values within the column type bounds.
        HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{
            String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE),
            String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE)
        });
        smallTinyIntBoundsCheckHelper(writeDataFile.getAbsolutePath(), ExecJob.JOB_STATUS.COMPLETED);

        // Values outside the column type bounds will fail at runtime.
        HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{
            String.format("%d\t%d", Short.MIN_VALUE - 1, 0)});
        smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);

        HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{
            String.format("%d\t%d", Short.MAX_VALUE + 1, 0)});
        smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED);

        HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{
            String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)});
        smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);

        HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{
            String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)});
        smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED);
    }

    private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus)
        throws Exception {
        Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode());
        Assert.assertEquals(0, driver.run("create table test_tbl" +
            " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode());

        PigServer server = new PigServer(ExecType.LOCAL);
        server.setBatchOn();
        server.registerQuery("data = load '" + data +
            "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);");
        server.registerQuery(
            "store data into 'test_tbl' using org.apache.hcatalog.pig.HCatStorer();");
        List<ExecJob> jobs = server.executeBatch();
        Assert.assertEquals(expectedStatus, jobs.get(0).getStatus());
    }
}
TOP

Related Classes of org.apache.hcatalog.pig.TestHCatLoaderStorer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.