package com.splout.db.hadoop;
/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import static org.junit.Assert.assertEquals;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Before;
import org.junit.Test;
import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.TupleFile;
import com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat;
import com.datasalt.pangool.utils.HadoopUtils;
import com.splout.db.common.JSONSerDe;
import com.splout.db.common.PartitionMap;
import com.splout.db.hadoop.TupleSampler.SamplingType;
public class TestTablespaceGeneratorJavaScript {
public final static String INPUT = "in-" + TestTablespaceGeneratorJavaScript.class.getName();
public final static String OUTPUT = "out-" + TestTablespaceGeneratorJavaScript.class.getName();
static Schema theSchema1 = new Schema("schema1", Fields.parse("id:string, value:string"));
@Before
public void test() {
System.out.println(System.getProperty("java.library.path"));
}
@Test
public void simpleTest() throws Exception {
simpleTest(SamplingType.FULL_SCAN);
simpleTest(SamplingType.RANDOM);
}
public void simpleTest(SamplingType samplingType) throws Exception {
Runtime.getRuntime().exec("rm -rf " + INPUT);
Runtime.getRuntime().exec("rm -rf " + OUTPUT);
Configuration conf = new Configuration();
TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT),
theSchema1);
writer.append(TestTablespaceGenerator.getTuple("aa1", "value1"));
writer.append(TestTablespaceGenerator.getTuple("aa2", "value2"));
writer.append(TestTablespaceGenerator.getTuple("ab1", "value3"));
writer.append(TestTablespaceGenerator.getTuple("ab2", "value4"));
writer.append(TestTablespaceGenerator.getTuple("bb1", "value5"));
writer.append(TestTablespaceGenerator.getTuple("bb2", "value6"));
writer.close();
TablespaceBuilder builder = new TablespaceBuilder();
builder.setNPartitions(3);
TableBuilder tableBuilder = new TableBuilder(theSchema1);
tableBuilder.addFile(new TableInput(new TupleInputFormat(), new HashMap<String, String>(),
theSchema1, new IdentityRecordProcessor(), new Path(INPUT)));
// Partition by a javascript that returns the first two characters
tableBuilder
.partitionByJavaScript("function partition(record) { var str = record.get('id').toString(); return str.substring(0, 2); }");
builder.add(tableBuilder.build());
TablespaceGenerator viewGenerator = new TablespaceGenerator(builder.build(), new Path(OUTPUT),
this.getClass());
viewGenerator.generateView(conf, samplingType, new TupleSampler.RandomSamplingOptions());
PartitionMap partitionMap = JSONSerDe.deSer(
HadoopUtils.fileToString(FileSystem.getLocal(conf), new Path(OUTPUT, "partition-map")),
PartitionMap.class);
assertEquals(null, partitionMap.getPartitionEntries().get(0).getMin());
assertEquals("aa", partitionMap.getPartitionEntries().get(0).getMax());
assertEquals("aa", partitionMap.getPartitionEntries().get(1).getMin());
assertEquals("ab", partitionMap.getPartitionEntries().get(1).getMax());
assertEquals("ab", partitionMap.getPartitionEntries().get(2).getMin());
assertEquals(null, partitionMap.getPartitionEntries().get(2).getMax());
Runtime.getRuntime().exec("rm -rf " + INPUT);
Runtime.getRuntime().exec("rm -rf " + OUTPUT);
}
}