Package org.apache.hcatalog.pig

Source Code of org.apache.hcatalog.pig.TestHCatLoaderStorer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hcatalog.pig;

import org.apache.hadoop.fs.FileUtil;
import org.apache.hcatalog.HcatTestUtils;
import org.apache.hcatalog.mapreduce.HCatBaseTest;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecJob;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.junit.Assert;
import org.junit.Test;

import java.io.File;
import java.util.Iterator;
import java.util.List;

/**
* Test that require both HCatLoader and HCatStorer. For read or write only functionality,
* please consider @{link TestHCatLoader} or @{link TestHCatStorer}.
* @deprecated Use/modify {@link org.apache.hive.hcatalog.pig.TestHCatLoaderStorer} instead
*/
public class TestHCatLoaderStorer extends HCatBaseTest {

  /**
   * Ensure Pig can read/write tinyint/smallint columns.
   */
  @Test
  public void testSmallTinyInt() throws Exception {

    String readTblName = "test_small_tiny_int";
    File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData");
    File dataFile = new File(dataDir, "testSmallTinyInt.tsv");

    String writeTblName = "test_small_tiny_int_write";
    File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv");

    FileUtil.fullyDelete(dataDir); // Might not exist
    Assert.assertTrue(dataDir.mkdir());

    HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{
      String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE),
      String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE)
    });

    // Create a table with smallint/tinyint columns, load data, and query from Hive.
    Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode());
    Assert.assertEquals(0, driver.run("create external table " + readTblName +
      " (my_small_int smallint, my_tiny_int tinyint)" +
      " row format delimited fields terminated by '\t' stored as textfile").getResponseCode());
    Assert.assertEquals(0, driver.run("load data local inpath '" +
      dataDir.getAbsolutePath() + "' into table " + readTblName).getResponseCode());

    PigServer server = new PigServer(ExecType.LOCAL);
    server.registerQuery(
      "data = load '" + readTblName + "' using org.apache.hcatalog.pig.HCatLoader();");

    // Ensure Pig schema is correct.
    Schema schema = server.dumpSchema("data");
    Assert.assertEquals(2, schema.getFields().size());
    Assert.assertEquals("my_small_int", schema.getField(0).alias);
    Assert.assertEquals(DataType.INTEGER, schema.getField(0).type);
    Assert.assertEquals("my_tiny_int", schema.getField(1).alias);
    Assert.assertEquals(DataType.INTEGER, schema.getField(1).type);

    // Ensure Pig can read data correctly.
    Iterator<Tuple> it = server.openIterator("data");
    Tuple t = it.next();
    Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0));
    Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1));
    t = it.next();
    Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0));
    Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1));
    Assert.assertFalse(it.hasNext());

    // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the
    // bounds of the column type are written, and values outside throw an exception.
    Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode());
    Assert.assertEquals(0, driver.run("create table " + writeTblName +
      " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode());

    // Values within the column type bounds.
    HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{
      String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE),
      String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE)
    });
    smallTinyIntBoundsCheckHelper(writeDataFile.getAbsolutePath(), ExecJob.JOB_STATUS.COMPLETED);

    // Values outside the column type bounds will fail at runtime.
    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{
      String.format("%d\t%d", Short.MIN_VALUE - 1, 0)});
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);

    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{
      String.format("%d\t%d", Short.MAX_VALUE + 1, 0)});
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED);

    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{
      String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)});
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);

    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{
      String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)});
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED);
  }

  private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus)
    throws Exception {
    Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode());
    Assert.assertEquals(0, driver.run("create table test_tbl" +
      " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode());

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("data = load '" + data +
      "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);");
    server.registerQuery(
      "store data into 'test_tbl' using org.apache.hcatalog.pig.HCatStorer();");
    List<ExecJob> jobs = server.executeBatch();
    Assert.assertEquals(expectedStatus, jobs.get(0).getStatus());
  }
}
TOP

Related Classes of org.apache.hcatalog.pig.TestHCatLoaderStorer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.