Package org.apache.hadoop.hive.serde2.io

Source Code of org.apache.hadoop.hive.serde2.io.TestTimestampWritable

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.io;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.TimeZone;

import junit.framework.TestCase;

import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;

public class TestTimestampWritable extends TestCase {

  private static DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

  private static final int HAS_DECIMAL_MASK = 0x80000000;

  private static final long MAX_ADDITIONAL_SECONDS_BITS = 0x418937;

  private static long MIN_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("0001-01-01 00:00:00");
  private static long MAX_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("9999-01-01 00:00:00");

  private static int BILLION = 1000 * 1000 * 1000;

  private static long getSeconds(Timestamp ts) {
    // To compute seconds, we first subtract the milliseconds stored in the nanos field of the
    // Timestamp from the result of getTime().
    long seconds = (ts.getTime() - ts.getNanos() / 1000000) / 1000;

    // It should also be possible to calculate this based on ts.getTime() only.
    assertEquals(seconds, TimestampWritable.millisToSeconds(ts.getTime()));

    return seconds;
  }

  private static long parseToMillis(String s) {
    try {
      return DATE_FORMAT.parse(s).getTime();
    } catch (ParseException ex) {
      throw new RuntimeException(ex);
    }
  }

  @Override
  protected void setUp() {
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
  }

  private static String normalizeTimestampStr(String timestampStr) {
    if (timestampStr.endsWith(".0")) {
      return timestampStr.substring(0, timestampStr.length() - 2);
    }
    return timestampStr;
  }

  private static void assertTSWEquals(TimestampWritable expected, TimestampWritable actual) {
    assertEquals(normalizeTimestampStr(expected.toString()),
                 normalizeTimestampStr(actual.toString()));
    assertEquals(expected, actual);
    assertEquals(expected.getTimestamp(), actual.getTimestamp());
  }

  private static TimestampWritable deserializeFromBytes(byte[] tsBytes) throws IOException {
    ByteArrayInputStream bais = new ByteArrayInputStream(tsBytes);
    DataInputStream dis = new DataInputStream(bais);
    TimestampWritable deserTSW = new TimestampWritable();
    deserTSW.readFields(dis);
    return deserTSW;
  }

  private static int reverseNanos(int nanos) {
    if (nanos == 0) {
      return 0;
    }
    if (nanos < 0 || nanos >= 1000 * 1000 * 1000) {
      throw new IllegalArgumentException("Invalid nanosecond value: " + nanos);
    }

    int x = nanos;
    StringBuilder reversed = new StringBuilder();
    while (x != 0) {
      reversed.append((char)('0' + x % 10));
      x /= 10;
    }

    int result = Integer.parseInt(reversed.toString());
    while (nanos < 100 * 1000 * 1000) {
      result *= 10;
      nanos *= 10;
    }
    return result;
  }

  private static byte[] serializeToBytes(Writable w) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    w.write(dos);
    return baos.toByteArray();
  }

  private static List<Byte> toList(byte[] a) {
    List<Byte> list = new ArrayList<Byte>(a.length);
    for (byte b : a) {
      list.add(b);
    }
    return list;
  }

  /**
   * Pad the given byte array with the given number of bytes in the beginning. The padding bytes
   * deterministically depend on the passed data.
   */
  private static byte[] padBytes(byte[] bytes, int count) {
    byte[] result = new byte[bytes.length + count];
    for (int i = 0; i < count; ++i) {
      // Fill the prefix bytes with deterministic data based on the actual meaningful data.
      result[i] = (byte) (bytes[i % bytes.length] * 37 + 19);
    }
    System.arraycopy(bytes, 0, result, count, bytes.length);
    return result;
  }

  private static TimestampWritable serializeDeserializeAndCheckTimestamp(Timestamp ts)
      throws IOException {
    TimestampWritable tsw = new TimestampWritable(ts);
    assertEquals(ts, tsw.getTimestamp());

    byte[] tsBytes = serializeToBytes(tsw);
    TimestampWritable deserTSW = deserializeFromBytes(tsBytes);
    assertTSWEquals(tsw, deserTSW);
    assertEquals(ts, deserTSW.getTimestamp());
    assertEquals(tsBytes.length, tsw.getTotalLength());

    // Also convert to/from binary-sortable representation.
    int binarySortableOffset = Math.abs(tsw.hashCode()) % 10;
    byte[] binarySortableBytes = padBytes(tsw.getBinarySortable(), binarySortableOffset);
    TimestampWritable fromBinSort = new TimestampWritable();
    fromBinSort.setBinarySortable(binarySortableBytes, binarySortableOffset);
    assertTSWEquals(tsw, fromBinSort);

    long timeSeconds = ts.getTime() / 1000;
    if (0 <= timeSeconds && timeSeconds <= Integer.MAX_VALUE) {
      assertEquals(new Timestamp(timeSeconds * 1000),
        fromIntAndVInts((int) timeSeconds, 0).getTimestamp());

      int nanos = reverseNanos(ts.getNanos());
      assertEquals(ts,
        fromIntAndVInts((int) timeSeconds | (nanos != 0 ? HAS_DECIMAL_MASK : 0),
          nanos).getTimestamp());
    }

    assertEquals(ts.getNanos(), tsw.getNanos());
    assertEquals(getSeconds(ts), tsw.getSeconds());

    // Test various set methods and copy constructors.
    {
      TimestampWritable tsSet1 = new TimestampWritable();
      // make the offset non-zero to keep things interesting.
      int offset = Math.abs(ts.hashCode() % 32);
      byte[] shiftedBytes = padBytes(tsBytes, offset);
      tsSet1.set(shiftedBytes, offset);
      assertTSWEquals(tsw, tsSet1);

      TimestampWritable tswShiftedBytes = new TimestampWritable(shiftedBytes, offset);
      assertTSWEquals(tsw, tswShiftedBytes);
      assertTSWEquals(tsw, deserializeFromBytes(serializeToBytes(tswShiftedBytes)));
    }

    {
      TimestampWritable tsSet2 = new TimestampWritable();
      tsSet2.set(ts);
      assertTSWEquals(tsw, tsSet2);
    }

    {
      TimestampWritable tsSet3 = new TimestampWritable();
      tsSet3.set(tsw);
      assertTSWEquals(tsw, tsSet3);
    }

    {
      TimestampWritable tsSet4 = new TimestampWritable();
      tsSet4.set(deserTSW);
      assertTSWEquals(tsw, tsSet4);
    }

    double expectedDbl = getSeconds(ts) + 1e-9d * ts.getNanos();
    assertTrue(Math.abs(tsw.getDouble() - expectedDbl) < 1e-10d);

    return deserTSW;
  }

  private static int randomNanos(Random rand, int decimalDigits) {
    // Only keep the most significant decimalDigits digits.
    int nanos = rand.nextInt(BILLION);
    return nanos - nanos % (int) Math.pow(10, 9 - decimalDigits);
  }

  private static int randomNanos(Random rand) {
    return randomNanos(rand, rand.nextInt(10));
  }

  private static void checkTimestampWithAndWithoutNanos(Timestamp ts, int nanos)
      throws IOException {
    serializeDeserializeAndCheckTimestamp(ts);

    ts.setNanos(nanos);
    assertEquals(serializeDeserializeAndCheckTimestamp(ts).getNanos(), nanos);
  }

  private static TimestampWritable fromIntAndVInts(int i, long... vints) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    dos.writeInt(i);
    if ((i & HAS_DECIMAL_MASK) != 0) {
      for (long vi : vints) {
        WritableUtils.writeVLong(dos, vi);
      }
    }
    byte[] bytes = baos.toByteArray();
    TimestampWritable tsw = deserializeFromBytes(bytes);
    assertEquals(toList(bytes), toList(serializeToBytes(tsw)));
    return tsw;
  }

  public void testReverseNanos() {
    assertEquals(0, reverseNanos(0));
    assertEquals(120000000, reverseNanos(21));
    assertEquals(32100000, reverseNanos(1230));
    assertEquals(5, reverseNanos(500000000));
    assertEquals(987654321, reverseNanos(123456789));
    assertEquals(12345678, reverseNanos(876543210));
  }

  /**
   * Test serializing and deserializing timestamps that can be represented by a number of seconds
   * from 0 to 2147483647 since the UNIX epoch.
   */
  public void testTimestampsWithinPositiveIntRange() throws IOException {
    Random rand = new Random(294722773L);
    for (int i = 0; i < 10000; ++i) {
      long millis = ((long) rand.nextInt(Integer.MAX_VALUE)) * 1000;
      checkTimestampWithAndWithoutNanos(new Timestamp(millis), randomNanos(rand));
    }
  }

  private static long randomMillis(long minMillis, long maxMillis, Random rand) {
    return minMillis + (long) ((maxMillis - minMillis) * rand.nextDouble());
  }

  /**
   * Test timestamps that don't necessarily fit between 1970 and 2038. This depends on HIVE-4525
   * being fixed.
   */
  public void testTimestampsOutsidePositiveIntRange() throws IOException {
    Random rand = new Random(789149717L);
    for (int i = 0; i < 10000; ++i) {
      long millis = randomMillis(MIN_FOUR_DIGIT_YEAR_MILLIS, MAX_FOUR_DIGIT_YEAR_MILLIS, rand);
      checkTimestampWithAndWithoutNanos(new Timestamp(millis), randomNanos(rand));
    }
  }

  public void testTimestampsInFullRange() throws IOException {
    Random rand = new Random(2904974913L);
    for (int i = 0; i < 10000; ++i) {
      checkTimestampWithAndWithoutNanos(new Timestamp(rand.nextLong()), randomNanos(rand));
    }
  }

  public void testToFromDouble() {
    Random rand = new Random(294729777L);
    for (int nanosPrecision = 0; nanosPrecision <= 4; ++nanosPrecision) {
      for (int i = 0; i < 10000; ++i) {
        long millis = randomMillis(MIN_FOUR_DIGIT_YEAR_MILLIS, MAX_FOUR_DIGIT_YEAR_MILLIS, rand);
        Timestamp ts = new Timestamp(millis);
        int nanos = randomNanos(rand, nanosPrecision);
        ts.setNanos(nanos);
        TimestampWritable tsw = new TimestampWritable(ts);
        double asDouble = tsw.getDouble();
        int recoveredNanos =
          (int) (Math.round((asDouble - Math.floor(asDouble)) * Math.pow(10, nanosPrecision)) *
            Math.pow(10, 9 - nanosPrecision));
        assertEquals(String.format("Invalid nanosecond part recovered from %f", asDouble),
          nanos, recoveredNanos);
        assertEquals(ts, TimestampWritable.doubleToTimestamp(asDouble));
        // decimalToTimestamp should be consistent with doubleToTimestamp for this level of
        // precision.
        assertEquals(ts, TimestampWritable.decimalToTimestamp(
            HiveDecimal.create(BigDecimal.valueOf(asDouble))));
      }
    }
  }

  private static HiveDecimal timestampToDecimal(Timestamp ts) {
    BigDecimal d = new BigDecimal(getSeconds(ts));
    d = d.add(new BigDecimal(ts.getNanos()).divide(new BigDecimal(BILLION)));
    return HiveDecimal.create(d);
  }

  public void testDecimalToTimestampRandomly() {
    Random rand = new Random(294729777L);
    for (int i = 0; i < 10000; ++i) {
      Timestamp ts = new Timestamp(
          randomMillis(MIN_FOUR_DIGIT_YEAR_MILLIS, MAX_FOUR_DIGIT_YEAR_MILLIS, rand));
      ts.setNanos(randomNanos(rand, 9))// full precision
      assertEquals(ts, TimestampWritable.decimalToTimestamp(timestampToDecimal(ts)));
    }
  }

  public void testDecimalToTimestampCornerCases() {
    Timestamp ts = new Timestamp(parseToMillis("1969-03-04 05:44:33"));
    assertEquals(0, ts.getTime() % 1000);
    for (int nanos : new int[] { 100000, 900000, 999100000, 999900000 }) {
      ts.setNanos(nanos);
      HiveDecimal d = timestampToDecimal(ts);
      assertEquals(ts, TimestampWritable.decimalToTimestamp(d));
      assertEquals(ts, TimestampWritable.doubleToTimestamp(d.bigDecimalValue().doubleValue()));
    }
  }

  public void testSerializationFormatDirectly() throws IOException {
    assertEquals("1970-01-01 00:00:00", fromIntAndVInts(0).toString());
    assertEquals("1970-01-01 00:00:01", fromIntAndVInts(1).toString());
    assertEquals("1970-01-01 00:05:00", fromIntAndVInts(300).toString());
    assertEquals("1970-01-01 02:00:00", fromIntAndVInts(7200).toString());
    assertEquals("2000-01-02 03:04:05", fromIntAndVInts(946782245).toString());

    // This won't have a decimal part because the HAS_DECIMAL_MASK bit is not set.
    assertEquals("2000-01-02 03:04:05", fromIntAndVInts(946782245, 3210).toString());

    assertEquals("2000-01-02 03:04:05.0123",
      fromIntAndVInts(946782245 | HAS_DECIMAL_MASK, 3210).toString());

    assertEquals("2038-01-19 03:14:07", fromIntAndVInts(Integer.MAX_VALUE).toString());
    assertEquals("2038-01-19 03:14:07.012345678",
      fromIntAndVInts(Integer.MAX_VALUE | HAS_DECIMAL_MASK,  // this is really just -1
        876543210).toString());

    // Timestamps with a second VInt storing additional bits of the seconds field.
    long seconds = 253392390415L;
    assertEquals("9999-09-08 07:06:55",
      fromIntAndVInts((int) (seconds & 0x7fffffff) | (1 << 31), -1L, seconds >> 31).toString());
    assertEquals("9999-09-08 07:06:55.0123",
      fromIntAndVInts((int) (seconds & 0x7fffffff) | (1 << 31),
                      -3210 - 1, seconds >> 31).toString());
  }

  public void testMaxSize() {
    // This many bytes are necessary to store the reversed nanoseconds.
    assertEquals(5, WritableUtils.getVIntSize(999999999));
    assertEquals(5, WritableUtils.getVIntSize(-2 - 999999999));

    // Bytes necessary to store extra bits of the second timestamp if storing a timestamp
    // before 1970 or after 2038.
    assertEquals(3, WritableUtils.getVIntSize(Short.MAX_VALUE));
    assertEquals(3, WritableUtils.getVIntSize(Short.MIN_VALUE));

    // Test that MAX_ADDITIONAL_SECONDS_BITS is really the maximum value of the
    // additional bits (beyond 31 bits) of the seconds-since-epoch part of timestamp.
    assertTrue((((long) MAX_ADDITIONAL_SECONDS_BITS) << 31) * 1000 < Long.MAX_VALUE);
    assertTrue((((double) MAX_ADDITIONAL_SECONDS_BITS + 1) * (1L << 31)) * 1000 >
      Long.MAX_VALUE);

    // This is how many bytes we need to store those additonal bits as a VInt.
    assertEquals(4, WritableUtils.getVIntSize(MAX_ADDITIONAL_SECONDS_BITS));

    // Therefore, the maximum total size of a serialized timestamp is 4 + 5 + 4 = 13.
  }

  public void testMillisToSeconds() {
    assertEquals(0, TimestampWritable.millisToSeconds(0));
    assertEquals(-1, TimestampWritable.millisToSeconds(-1));
    assertEquals(-1, TimestampWritable.millisToSeconds(-999));
    assertEquals(-1, TimestampWritable.millisToSeconds(-1000));
    assertEquals(-2, TimestampWritable.millisToSeconds(-1001));
    assertEquals(-2, TimestampWritable.millisToSeconds(-1999));
    assertEquals(-2, TimestampWritable.millisToSeconds(-2000));
    assertEquals(-3, TimestampWritable.millisToSeconds(-2001));
    assertEquals(-99, TimestampWritable.millisToSeconds(-99000));
    assertEquals(-100, TimestampWritable.millisToSeconds(-99001));
    assertEquals(-100, TimestampWritable.millisToSeconds(-100000));
    assertEquals(1, TimestampWritable.millisToSeconds(1500));
    assertEquals(19, TimestampWritable.millisToSeconds(19999));
    assertEquals(20, TimestampWritable.millisToSeconds(20000));
  }

  private static int compareEqualLengthByteArrays(byte[] a, byte[] b) {
    assertEquals(a.length, b.length);
    for (int i = 0; i < a.length; ++i) {
      if (a[i] != b[i]) {
        return (a[i] & 0xff) - (b[i] & 0xff);
      }
    }
    return 0;
  }

  private static int normalizeComparisonResult(int result) {
    return result < 0 ? -1 : (result > 0 ? 1 : 0);
  }

  public void testBinarySortable() {
    Random rand = new Random(5972977L);
    List<TimestampWritable> tswList = new ArrayList<TimestampWritable>();
    for (int i = 0; i < 50; ++i) {
      Timestamp ts = new Timestamp(rand.nextLong());
      ts.setNanos(randomNanos(rand));
      tswList.add(new TimestampWritable(ts));
    }
    for (TimestampWritable tsw1 : tswList) {
      byte[] bs1 = tsw1.getBinarySortable();
      for (TimestampWritable tsw2 : tswList) {
        byte[] bs2 = tsw2.getBinarySortable();
        int binaryComparisonResult =
          normalizeComparisonResult(compareEqualLengthByteArrays(bs1, bs2));
        int comparisonResult = normalizeComparisonResult(tsw1.compareTo(tsw2));
        if (binaryComparisonResult != comparisonResult) {
          throw new AssertionError("TimestampWritables " + tsw1 + " and " + tsw2 + " compare as " +
            comparisonResult + " using compareTo but as " + binaryComparisonResult + " using " +
            "getBinarySortable");
        }
      }
    }
  }

}
TOP

Related Classes of org.apache.hadoop.hive.serde2.io.TestTimestampWritable

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.