Package com.splout.db.hadoop

Source Code of com.splout.db.hadoop.TestTablespaceGeneratorJavaScript

package com.splout.db.hadoop;

/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/

import static org.junit.Assert.assertEquals;

import java.util.HashMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Before;
import org.junit.Test;

import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.TupleFile;
import com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat;
import com.datasalt.pangool.utils.HadoopUtils;
import com.splout.db.common.JSONSerDe;
import com.splout.db.common.PartitionMap;
import com.splout.db.hadoop.TupleSampler.SamplingType;

public class TestTablespaceGeneratorJavaScript {

  public final static String INPUT = "in-" + TestTablespaceGeneratorJavaScript.class.getName();
  public final static String OUTPUT = "out-" + TestTablespaceGeneratorJavaScript.class.getName();
  static Schema theSchema1 = new Schema("schema1", Fields.parse("id:string, value:string"));

  @Before
  public void test() {
    System.out.println(System.getProperty("java.library.path"));
  }

  @Test
  public void simpleTest() throws Exception {
    simpleTest(SamplingType.FULL_SCAN);
    simpleTest(SamplingType.RANDOM);
  }

  public void simpleTest(SamplingType samplingType) throws Exception {
    Runtime.getRuntime().exec("rm -rf " + INPUT);
    Runtime.getRuntime().exec("rm -rf " + OUTPUT);

    Configuration conf = new Configuration();
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT),
        theSchema1);

    writer.append(TestTablespaceGenerator.getTuple("aa1", "value1"));
    writer.append(TestTablespaceGenerator.getTuple("aa2", "value2"));

    writer.append(TestTablespaceGenerator.getTuple("ab1", "value3"));
    writer.append(TestTablespaceGenerator.getTuple("ab2", "value4"));

    writer.append(TestTablespaceGenerator.getTuple("bb1", "value5"));
    writer.append(TestTablespaceGenerator.getTuple("bb2", "value6"));

    writer.close();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.setNPartitions(3);
    TableBuilder tableBuilder = new TableBuilder(theSchema1);
    tableBuilder.addFile(new TableInput(new TupleInputFormat(), new HashMap<String, String>(),
        theSchema1, new IdentityRecordProcessor(), new Path(INPUT)));
    // Partition by a javascript that returns the first two characters
    tableBuilder
        .partitionByJavaScript("function partition(record) { var str = record.get('id').toString(); return str.substring(0, 2); }");
    builder.add(tableBuilder.build());

    TablespaceGenerator viewGenerator = new TablespaceGenerator(builder.build(), new Path(OUTPUT),
        this.getClass());
    viewGenerator.generateView(conf, samplingType, new TupleSampler.RandomSamplingOptions());

    PartitionMap partitionMap = JSONSerDe.deSer(
        HadoopUtils.fileToString(FileSystem.getLocal(conf), new Path(OUTPUT, "partition-map")),
        PartitionMap.class);

    assertEquals(null, partitionMap.getPartitionEntries().get(0).getMin());
    assertEquals("aa", partitionMap.getPartitionEntries().get(0).getMax());

    assertEquals("aa", partitionMap.getPartitionEntries().get(1).getMin());
    assertEquals("ab", partitionMap.getPartitionEntries().get(1).getMax());

    assertEquals("ab", partitionMap.getPartitionEntries().get(2).getMin());
    assertEquals(null, partitionMap.getPartitionEntries().get(2).getMax());

    Runtime.getRuntime().exec("rm -rf " + INPUT);
    Runtime.getRuntime().exec("rm -rf " + OUTPUT);
  }
}
TOP

Related Classes of com.splout.db.hadoop.TestTablespaceGeneratorJavaScript

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.