Source Code of com.splout.db.hadoop.TestTablespaceGeneratorJavaScript

package com.splout.db.hadoop;


/*
 * #%L
 * Splout SQL Hadoop library
 * %%
 * Copyright (C) 2012 Datasalt Systems S.L.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */


import static org.junit.Assert.assertEquals;


import java.util.HashMap;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Before;
import org.junit.Test;


import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.TupleFile;
import com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat;
import com.datasalt.pangool.utils.HadoopUtils;
import com.splout.db.common.JSONSerDe;
import com.splout.db.common.PartitionMap;
import com.splout.db.hadoop.TupleSampler.SamplingType;


public class TestTablespaceGeneratorJavaScript {


  public final static String INPUT = "in-" + TestTablespaceGeneratorJavaScript.class.getName();
  public final static String OUTPUT = "out-" + TestTablespaceGeneratorJavaScript.class.getName();
  static Schema theSchema1 = new Schema("schema1", Fields.parse("id:string, value:string"));


  @Before
  public void test() {
    System.out.println(System.getProperty("java.library.path"));
  }


  @Test
  public void simpleTest() throws Exception {
    simpleTest(SamplingType.FULL_SCAN);
    simpleTest(SamplingType.RANDOM);
  }


  public void simpleTest(SamplingType samplingType) throws Exception {
    Runtime.getRuntime().exec("rm -rf " + INPUT);
    Runtime.getRuntime().exec("rm -rf " + OUTPUT);


    Configuration conf = new Configuration();
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT),
        theSchema1);


    writer.append(TestTablespaceGenerator.getTuple("aa1", "value1"));
    writer.append(TestTablespaceGenerator.getTuple("aa2", "value2"));


    writer.append(TestTablespaceGenerator.getTuple("ab1", "value3"));
    writer.append(TestTablespaceGenerator.getTuple("ab2", "value4"));


    writer.append(TestTablespaceGenerator.getTuple("bb1", "value5"));
    writer.append(TestTablespaceGenerator.getTuple("bb2", "value6"));


    writer.close();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.setNPartitions(3);
    TableBuilder tableBuilder = new TableBuilder(theSchema1);
    tableBuilder.addFile(new TableInput(new TupleInputFormat(), new HashMap<String, String>(),
        theSchema1, new IdentityRecordProcessor(), new Path(INPUT)));
    // Partition by a javascript that returns the first two characters
    tableBuilder
        .partitionByJavaScript("function partition(record) { var str = record.get('id').toString(); return str.substring(0, 2); }");
    builder.add(tableBuilder.build());


    TablespaceGenerator viewGenerator = new TablespaceGenerator(builder.build(), new Path(OUTPUT),
        this.getClass());
    viewGenerator.generateView(conf, samplingType, new TupleSampler.RandomSamplingOptions());


    PartitionMap partitionMap = JSONSerDe.deSer(
        HadoopUtils.fileToString(FileSystem.getLocal(conf), new Path(OUTPUT, "partition-map")),
        PartitionMap.class);


    assertEquals(null, partitionMap.getPartitionEntries().get(0).getMin());
    assertEquals("aa", partitionMap.getPartitionEntries().get(0).getMax());


    assertEquals("aa", partitionMap.getPartitionEntries().get(1).getMin());
    assertEquals("ab", partitionMap.getPartitionEntries().get(1).getMax());


    assertEquals("ab", partitionMap.getPartitionEntries().get(2).getMin());
    assertEquals(null, partitionMap.getPartitionEntries().get(2).getMax());


    Runtime.getRuntime().exec("rm -rf " + INPUT);
    Runtime.getRuntime().exec("rm -rf " + OUTPUT);
  }
}
Source Code of com.splout.db.hadoop.TestTablespaceGeneratorJavaScript

Related Classes of com.splout.db.hadoop.TestTablespaceGeneratorJavaScript