Package org.apache.hadoop.hive.serde2.columnar

Examples of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable


        RCFileOutputFormat.setColumnNumber(conf, columnNum);
        RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);

        byte[][] columnRandom;

        BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
        columnRandom = new byte[columnNum][];
        for (int i = 0; i < columnNum; i++) {
            BytesRefWritable cu = new BytesRefWritable();
            bytes.set(i, cu);
        }

        for (int i = 0; i < rowCount; i++) {
            nextRandomRow(columnRandom, bytes, columnCount);
            rowsWritten++;
View Full Code Here


            new Configuration(false), new org.apache.hadoop.mapred.FileSplit(splitPath,
                fileSplit.getStart(), fileSplit.getLength(),
                new org.apache.hadoop.mapred.JobConf(conf)));

        LongWritable key = rcFileRecordReader.createKey();
        BytesRefArrayWritable value = rcFileRecordReader.createValue();
        rcFileRecordReader.next(key, value);
        rcFileRecordReader.close();

        ColumnarStruct struct = readColumnarStruct(value, schema);
        return struct;
View Full Code Here

        RCFileOutputFormat.setColumnNumber(conf, columnNum);
        RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);

        byte[][] columnRandom;

        BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
        columnRandom = new byte[columnNum][];
        for (int i = 0; i < columnNum; i++) {
            BytesRefWritable cu = new BytesRefWritable();
            bytes.set(i, cu);
        }

        for (int i = 0; i < rowCount; i++) {

            bytes.resetValid(columnRandom.length);
            for (int j = 0; j < columnRandom.length; j++) {
                columnRandom[j]= "Sample value".getBytes();
                bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
            }
            rowsWritten++;
            writer.append(bytes);
        }
        writer.close();
View Full Code Here

  Tuple tuple = null;

  try {
      if (reader.nextKeyValue()) {

    BytesRefArrayWritable buff = reader.getCurrentValue();
    ColumnarStruct struct = readColumnarStruct(buff);

    tuple = readColumnarTuple(struct, reader.getSplitPath());
      }
View Full Code Here

            if (numColumns < 1) {
                throw new IOException("number of columns is not set");
            }

            byteStream = new ByteStream.Output();
            rowWritable = new BytesRefArrayWritable();
            colValRefs = new BytesRefWritable[numColumns];

            for (int i = 0; i < numColumns; i++) {
                colValRefs[i] = new BytesRefWritable();
                rowWritable.set(i, colValRefs[i]);
View Full Code Here

                        RCFile.createMetadata(new Text("apple"),
                                              new Text("block"),
                                              new Text("cat"),
                                              new Text("dog")),
                        new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_1[i], 0,
          record_1[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_2[i], 0,
          record_2[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    Object[] expectedRecord_1 = {new ByteWritable((byte) 123),
        new ShortWritable((short) 456), new IntWritable(789),
        new LongWritable(1000), new DoubleWritable(5.3),
        new Text("hive and hadoop"), null, null};

    Object[] expectedRecord_2 = {new ByteWritable((byte) 100),
        new ShortWritable((short) 200), new IntWritable(123),
        new LongWritable(1000), new DoubleWritable(5.3),
        new Text("hive and hadoop"), null, null};

    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"),
                 reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"),
                 reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"),
                 reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();

    for (int i = 0; i < 2; i++) {
      reader.next(rowID);
      BytesRefArrayWritable cols = new BytesRefArrayWritable();
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
View Full Code Here

    byte[][] record = {null, null, null, null, null, null, null, null};

    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
        new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length);
    final int recCount = 100;
    Random rand = new Random();
    for (int recIdx = 0; recIdx < recCount; recIdx++) {
      for (int i = 0; i < record.length; i++) {
        record[i] = new Integer(rand.nextInt()).toString().getBytes("UTF-8");
      }
      for (int i = 0; i < record.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record[i], 0,
            record[i].length);
        bytes.set(i, cu);
      }
      writer.append(bytes);
      bytes.clear();
    }
    writer.close();

    // Insert junk in middle of file. Assumes file is on local disk.
    RandomAccessFile raf = new RandomAccessFile(file.toUri().getPath(), "rw");
    long corruptOffset = raf.length() / 2;
    LOG.info("corrupting " + raf + " at offset " + corruptOffset);
    raf.seek(corruptOffset);
    raf.writeBytes("junkjunkjunkjunkjunkjunkjunkjunk");
    raf.close();

    // Set the option for tolerating corruptions. The read should succeed.
    Configuration tmpConf = new Configuration(conf);
    tmpConf.setBoolean("hive.io.rcfile.tolerate.corruptions", true);
    RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf);

    LongWritable rowID = new LongWritable();

    while (true) {
      boolean more = reader.next(rowID);
      if (!more) {
        break;
      }
      BytesRefArrayWritable cols = new BytesRefArrayWritable();
      reader.getCurrentRow(cols);
      cols.resetValid(8);
    }

    reader.close();
  }
View Full Code Here

    RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
        new DefaultCodec());

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
      BytesRefWritable cu = null;
      cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
      bytes.set(i, cu);
    }

    for (int i = 0; i < count; i++) {
      writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size()
        + " number columns and " + count + " number rows is " + fileLen);
  }
View Full Code Here

    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    int actualRead = 0;
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData = ObjectInspectorUtils
            .copyToStandardObject(fieldData, fieldRefs.get(i)
            .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData, expectedFieldsData[i]);
      }
      // Serialize
      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class, serDe.getSerializedClass());
      BytesRefArrayWritable serializedText = (BytesRefArrayWritable) serDe
          .serialize(row, oi);
      assertEquals("Serialized data", s, serializedText);
      actualRead++;
    }
    reader.close();
View Full Code Here

    readCols.add(Integer.valueOf(3));
    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();

    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());

      for (int i : readCols) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData = ObjectInspectorUtils
            .copyToStandardObject(fieldData, fieldRefs.get(i)
            .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData,
            expectedPartitalFieldsData[i]);
      }

      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class, serDe.getSerializedClass());
      BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe
          .serialize(row, oi);
      assertEquals("Serialized data", patialS, serializedBytes);
    }
    reader.close();
    long cost = System.currentTimeMillis() - start;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.