Package org.apache.pig.test

Source Code of org.apache.pig.test.TestPruneColumn

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.log4j.FileAppender;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.SimpleLayout;
import org.apache.pig.FilterFunc;
import org.apache.pig.LoadFunc;
import org.apache.pig.LoadPushDown;
import org.apache.pig.PigServer;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.newplan.logical.rules.ColumnPruneVisitor;
import org.apache.pig.test.utils.TestHelper;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class TestPruneColumn {
    private PigServer pigServer;
    File tmpFile1;
    File tmpFile2;
    File tmpFile3;
    File tmpFile4;
    File tmpFile5;
    File tmpFile6;
    File tmpFile7;
    File tmpFile8;
    File tmpFile9;
    File tmpFile10;
    File tmpFile11;
    File tmpFile12;
    File logFile;

    private static final String simpleEchoStreamingCommand;
    static {
        String quote = "'";
        if (Util.WINDOWS) {
            quote = "\"";
        }

        simpleEchoStreamingCommand = "perl -ne " + quote + "print $_" + quote;
    }

    static public class MyFilterFunc extends FilterFunc {
        @Override
        public Boolean exec(Tuple input) {
            return true;
        }
    }

    @Before
    public void setUp() throws Exception{
        Logger logger = Logger.getLogger(ColumnPruneVisitor.class);
        logger.removeAllAppenders();
        logger.setLevel(Level.INFO);
        SimpleLayout layout = new SimpleLayout();
        logFile = File.createTempFile("log", "");
        FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0);
        logger.addAppender(appender);

        pigServer = new PigServer("local");
        tmpFile1 = File.createTempFile("prune", "txt");
        PrintStream ps = new PrintStream(new FileOutputStream(tmpFile1));
        ps.println("1\t2\t3");
        ps.println("2\t5\t2");
        ps.close();

        tmpFile2 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile2));
        ps.println("1\t1");
        ps.println("2\t2");
        ps.close();

        tmpFile3 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile3));
        ps.println("1\t[key1#1,key2#2]");
        ps.println("2\t[key1#2,key2#4]");
        ps.close();

        tmpFile4 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile4));
        ps.println("1\t2\t3");
        ps.println("1\t2\t3");
        ps.close();

        tmpFile5 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile5));
        ps.println("1\t2\t3\t4");
        ps.println("2\t3\t4\t5");
        ps.close();

        tmpFile6 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile6));
        ps.println("\t2\t3");
        ps.println("2\t3\t4");
        ps.close();

        tmpFile7 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile7));
        ps.println("1\t1\t1");
        ps.println("2\t2\t2");
        ps.close();

        tmpFile8 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile8));
        ps.println("1\t2\t3\t4");
        ps.println("2\t5\t2\t3");
        ps.close();

        tmpFile9 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile9));
        ps.println("1\t[key1#1,key2#2]\t[key3#8,key4#9]");
        ps.println("2\t[key1#2,key2#4]\t[key3#8,key4#9]");
        ps.close();

        tmpFile10 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile10));
        ps.println("1\t[1#1,2#1]\t2");
        ps.close();

        tmpFile11 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile11));
        ps.println("1\t2\t3");
        ps.println("1\t3\t2");
        ps.println("2\t5\t2");
        ps.close();

        tmpFile12 = File.createTempFile("prune", "txt");
        ps = new PrintStream(new FileOutputStream(tmpFile12));
        ps.println("[key1#1,key2#2,cond#1]");
        ps.println("[key1#2,key2#3,cond#1]");
        ps.close();
    }

    @After
    public void tearDown() throws Exception{
        tmpFile1.delete();
        tmpFile2.delete();
        tmpFile3.delete();
        tmpFile4.delete();
        tmpFile5.delete();
        tmpFile6.delete();
        tmpFile7.delete();
        tmpFile8.delete();
        tmpFile9.delete();
        tmpFile10.delete();
        tmpFile11.delete();
        tmpFile12.delete();
        logFile.delete();
    }

    public boolean checkLogFileMessage(String[] messages) {
        BufferedReader reader = null;

        try {
            reader = new BufferedReader(new FileReader(logFile));
            List<String> logMessages=new ArrayList<String>();
            String line;
            while ((line=reader.readLine())!=null)
            {
                logMessages.add(line);
            }
            if (logMessages.size() > 0) {
                logMessages = TestHelper.sortSubFields(logMessages);
            }

            // Check if all messages appear in the log
            for (int i=0;i<messages.length;i++)
            {
                boolean found = false;
                for (int j=0;j<logMessages.size();j++)
                if (logMessages.get(j).contains(messages[i])) {
                    found = true;
                    break;
                }
                if (!found)
                    return false;
            }

            // Check no other log besides messages
            for (int i=0;i<logMessages.size();i++) {
                boolean found = false;
                for (int j=0;j<messages.length;j++) {
                    if (logMessages.get(i).contains(messages[j])) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    if (logMessages.get(i).contains("Columns pruned for")||
                            logMessages.get(i).contains("Map key required for")) {
                        return false;
                    }
                }
            }
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    public boolean emptyLogFileMessage()
    {
        if (!logFile.exists())
            return true;
        BufferedReader reader = null;
        String line;
        try {
            reader = new BufferedReader(new FileReader(logFile));
            while ((line=reader.readLine())!=null)
            {
                if (line!=null && !line.equals(""))
                    return false;
            }
            return true;
        }
        catch (IOException e) {
            return false;
        }
    }

    @Test
    public void testLoadForEach1() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = foreach A generate a1, a2;");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals(2, t.get(0));
        assertEquals(3, t.get(1));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals(5, t.get(0));
        assertEquals(2, t.get(1));

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0"}));
    }

    @Test
    public void testLoadForEach2() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = foreach A generate a0, a2;");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals(1, t.get(0));
        assertEquals(3, t.get(1));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals(2, t.get(0));
        assertEquals(2, t.get(1));

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    @Test
    public void testLoadForEach3() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = foreach A generate a0, a1;");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals(1, t.get(0));
        assertEquals(2, t.get(1));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals(2, t.get(0));
        assertEquals(5, t.get(1));

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $2"}));
    }

    @Test
    public void testJoin1() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' as (b0:int, b1:int);");
        pigServer.registerQuery("C = join A by a1, B by b1;");
        pigServer.registerQuery("D = foreach C generate a1, a2, b0, b1;");

        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(4, t.size());
        assertEquals(2, t.get(0));
        assertEquals(3, t.get(1));
        assertEquals(2, t.get(2));
        assertEquals(2, t.get(3));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0"}));
    }

    @Test
    public void testJoin2() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' as (b0:int, b1:int);");
        pigServer.registerQuery("C = join A by a1, B by b1;");
        pigServer.registerQuery("D = foreach C generate a1, a2, b1;");

        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(3, t.size());
        assertEquals(2, t.get(0));
        assertEquals(3, t.get(1));
        assertEquals(2, t.get(2));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0",
                "Columns pruned for B: $0"}));
    }

    @Test
    public void testForEachFilter() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = filter A by a2==3;");
        pigServer.registerQuery("C = foreach B generate a0, a1;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals(1, t.get(0));
        assertEquals(2, t.get(1));

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testForEach1() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = foreach A generate a0, a1+a2;");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals(1, t.get(0));
        assertEquals(5, t.get(1));

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals(2, t.get(0));
        assertEquals(7, t.get(1));

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testForEach2() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = foreach A generate a0 as b0, *;");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(4, t.size());
        assertEquals(1, t.get(0));
        assertEquals(1, t.get(1));
        assertEquals(2, t.get(2));
        assertEquals(3, t.get(3));

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(4, t.size());
        assertEquals(2, t.get(0));
        assertEquals(2, t.get(1));
        assertEquals(5, t.get(2));
        assertEquals(2, t.get(3));

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testSplit1() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.registerQuery("split A into B if $0<=1, C if $0>1;");
        pigServer.registerQuery("D = foreach B generate $1;");

        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $2"}));
    }

    @Test
    public void testSplit2() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.registerQuery("split A into B if $0<=1, C if $0>1;");
        pigServer.registerQuery("D = foreach B generate $1;");

        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $2"}));
    }

    @Test
    public void testForeachNoSchema1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "';");
        pigServer.registerQuery("B = foreach A generate $1, $2;");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("3", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("5", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testForeachNoSchema2() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "';");
        pigServer.registerQuery("B = foreach A generate $1, 'aoeuaoeu';");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("aoeuaoeu", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("5", t.get(0).toString());
        assertEquals("aoeuaoeu", t.get(1).toString());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCoGroup1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1:int, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1:int);");
        pigServer.registerQuery("C = cogroup A by $1, B by $1;");
        pigServer.registerQuery("D = foreach C generate AVG($1.$1);");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertNull(t.get(0));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2.0", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("5.0", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for B: $0"}));
    }

    @Test
    public void testCoGroup2() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1:int, a2);");
        pigServer.registerQuery("B = group A all;");
        pigServer.registerQuery("C = foreach B generate $1;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        String[] expected = new String[] {
                "({(1,2,3),(2,5,2)})"
        };
        assertTrue(iter.hasNext());
        Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("C")));

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCoGroup3() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1:int, a2);");
        pigServer.registerQuery("B = group A by $1;");
        pigServer.registerQuery("C = foreach B generate $1, '1';");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("{(1,2,3)}", t.get(0).toString());
        assertEquals("1", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("{(2,5,2)}", t.get(0).toString());
        assertEquals("1", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCoGroup4() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1:int, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1:int);");
        pigServer.registerQuery("C = cogroup A by ($1), B by ($1);");
        pigServer.registerQuery("D = foreach C generate $1.$1, $2.$1;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("{}", t.get(0).toString());
        assertEquals("{(1)}", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("{(2)}", t.get(0).toString());
        assertEquals("{(2)}", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("{(5)}", t.get(0).toString());
        assertEquals("{}", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCoGroup5() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = group A by (a0, a1);");
        pigServer.registerQuery("C = foreach B generate flatten(group);");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("5", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $2"}));
    }

    @Test
    public void testDistinct1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile4.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = distinct A;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testStream1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = stream A through `" + simpleEchoStreamingCommand + "`;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testBinCond1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile5.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2, a3);");
        pigServer.registerQuery("B = foreach A generate ($1 == '2'? $2 : $3);");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("3", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("5", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0"}));
    }

    @Test
    public void testCoGroup6() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1);");
        pigServer.registerQuery("C = cogroup A by ($1), B by ($1);");
        pigServer.registerQuery("D = foreach C generate A, flatten(B.($0, $1));");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(3, t.size());
        assertEquals("{}", t.get(0).toString());
        assertEquals("1", t.get(1).toString());
        assertEquals("1", t.get(2).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(3, t.size());
        assertEquals("{(1,2,3)}", t.get(0).toString());
        assertEquals("2", t.get(1).toString());
        assertEquals("2", t.get(2).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCoGroup7() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1);");
        pigServer.registerQuery("C = cogroup A by ($1), B by ($1);");
        pigServer.registerQuery("D = foreach C {B = order B by $0;generate FLATTEN(A), B.($1);};");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(4, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("2", t.get(1).toString());
        assertEquals("3", t.get(2).toString());
        assertEquals("{(2)}", t.get(3).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(4, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("5", t.get(1).toString());
        assertEquals("2", t.get(2).toString());
        assertEquals("{}", t.get(3).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCross1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1);");
        pigServer.registerQuery("C = cross A, B;");
        pigServer.registerQuery("D = foreach C generate $0, $3;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        Collection<String> results = new HashSet<String>();
        results.add("(1,1)");
        results.add("(2,1)");
        results.add("(1,2)");
        results.add("(2,2)");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2",
            "Columns pruned for B: $1"}));
    }

    @Test
    public void testUnion1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile4.toString()), pigServer.getPigContext()) + "' AS (b0, b1, b2);");
        pigServer.registerQuery("C = union A, B;");
        pigServer.registerQuery("D = foreach C generate $0, $2;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        Collection<String> results = new HashSet<String>();
        results.add("(1,3)");
        results.add("(2,2)");
        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        results.contains(t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        results.contains(t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        results.contains(t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        results.contains(t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1",
            "Columns pruned for B: $1"}));
    }

    @Test
    public void testFRJoin1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1);");
        pigServer.registerQuery("C = join A by $0, B by $0 using 'replicated';");
        pigServer.registerQuery("D = foreach C generate $0, $3;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("1", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2",
            "Columns pruned for B: $1"}));
    }

    @Test
    public void testFilter1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = order A by a1;");
        pigServer.registerQuery("C = limit B 10;");
        pigServer.registerQuery("D = foreach C generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $2"}));
    }

    @Test
    public void testFilter2() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = filter A by a0+a2 == 4;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    @Test
    public void testOrderBy1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = order A by $0;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2"}));
    }

    @Test
    public void testOrderBy2() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = order A by *;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testCogroup8() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = group A by *;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("(1,2,3)", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("(2,5,2)", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testJoin3() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile4.toString()), pigServer.getPigContext()) + "' AS (b0, b1, b2);");
        pigServer.registerQuery("C = join A by *, B by * using 'replicated';");
        pigServer.registerQuery("D = foreach C generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testLoadForEach4() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = foreach A generate *;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2"}));
    }

    @Test
    public void testForEachUDF() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0:chararray, a1:chararray, a2:chararray);");
        pigServer.registerQuery("B = foreach A generate StringSize(*);");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testOutJoin1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile6.toString()), pigServer.getPigContext()) + "' AS (a0:chararray, a1:chararray, a2:chararray);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (a0:chararray, a1:chararray, a2:chararray);");
        pigServer.registerQuery("C = join A by $0 left, B by $0;");
        pigServer.registerQuery("D = foreach C generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        Collection<String> results = new HashSet<String>();
        results.add("(2)");
        results.add("()");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2",
            "Columns pruned for B: $1, $2"}));
    }

    @Test
    public void testFilter3() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = filter A by " + MyFilterFunc.class.getName() + "(*) ;");
        pigServer.registerQuery("C = foreach B generate $0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testMapKey1() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:map[]);");
        pigServer.registerQuery("B = foreach A generate a0, a1#'key1';");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals(1, t.get(0));
        assertEquals("1", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals(2, t.get(0));
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Map key required for A: $1->[key1]"}));
    }

    @Test
    public void testMapKey2() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:map[]);");
        pigServer.registerQuery("B = foreach A generate a1, a1#'key1';");
        pigServer.registerQuery("C = foreach B generate $0#'key2', $1;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("1", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("4", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0",
                "Map key required for A: $1->[key1, key2]"}));
    }

    @Test
    public void testMapKey3() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:map[]);");
        pigServer.registerQuery("B = foreach A generate a1, a1#'key1';");
        pigServer.registerQuery("C = group B all;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        String[] expected = new String[] {
                "(all,{([key2#2,key1#1],1),([key2#4,key1#2],2)})"
        };

        Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("C")));

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0"}));
    }

    @Test
    public void testMapKey4() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:map[]);");
        pigServer.registerQuery("B = limit A 10;");
        pigServer.registerQuery("C = foreach B generate $0, $1#'key1';");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("1", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Map key required for A: $1->[key1]"}));
    }

    @Test
    public void testMapKey5() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:map[]);");
        pigServer.registerQuery("B = foreach A generate $0, $1#'key1';");
        pigServer.registerQuery("C = stream B through `" + simpleEchoStreamingCommand + "`;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("1", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Map key required for A: $1->[key1]"}));
    }

    @Test
    public void testMapKeyInSplit1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile12.toString()), pigServer.getPigContext()) + "' as (m:map[]);");
        pigServer.registerQuery("B = foreach A generate m#'key1' as key1;");
        pigServer.registerQuery("C = foreach A generate m#'key2' as key2;");
        pigServer.registerQuery("D = join B by key1, C by key2;");

        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Map key required for A: $0->[key1, key2]"}));
    }

    @SuppressWarnings("rawtypes")
    @Test
    public void testMapKeyInSplit2() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile12.toString()), pigServer.getPigContext()) + "' as (m:map[]);");
        pigServer.registerQuery("B = filter A by m#'cond'==1;");
        pigServer.registerQuery("C = filter B by m#'key1'==1;");
        pigServer.registerQuery("D = filter B by m#'key2'==2;");
        pigServer.registerQuery("E = join C by m#'key1', D by m#'key1';");

        Iterator<Tuple> iter = pigServer.openIterator("E");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", ((Map)t.get(0)).get("key1").toString());
        assertEquals("2", ((Map)t.get(0)).get("key2").toString());
        assertEquals("1", ((Map)t.get(0)).get("cond").toString());
        assertEquals("1", ((Map)t.get(1)).get("key1").toString());
        assertEquals("2", ((Map)t.get(1)).get("key2").toString());
        assertEquals("1", ((Map)t.get(1)).get("cond").toString());

        assertFalse(iter.hasNext());

        assertTrue(this.emptyLogFileMessage());
    }

    @Test
    public void testConstantPlan() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.registerQuery("B = foreach A generate 1, a2;");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("3", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("2", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0, $1"}));
    }

    @Test
    public void testPlainPlan() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.registerQuery("B = order A by $0;");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(3, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("2", t.get(1).toString());
        assertEquals("3", t.get(2).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(3, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("5", t.get(1).toString());
        assertEquals("2", t.get(2).toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testBinStorage1() throws Exception {
        // get a temp intermediate filename
        File intermediateFile = File.createTempFile("intemediate", "txt");
        intermediateFile.delete(); // delete since we don't want the file to be present
        String clusterPath = Util.removeColon(intermediateFile.getAbsolutePath());

        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.store("A", clusterPath, "BinStorage()");

        pigServer.registerQuery("A = load '"+ Util.encodeEscape(clusterPath)
                + "' using BinStorage() as (a0, a1, a2);");

        pigServer.registerQuery("B = foreach A generate a0;");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2"}));
    }

    @Test
    public void testBinStorage2() throws Exception {
        File intermediateFile = File.createTempFile("intemediate", "txt");
        intermediateFile.delete(); // delete since we don't want the file to be present
        String clusterPath = Util.removeColon(intermediateFile.getAbsolutePath());

        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.store("A", clusterPath, "BinStorage()");

        pigServer.registerQuery("A = load '"+ Util.encodeEscape(clusterPath)
                + "' using BinStorage() as (a0, a1, a2);");

        pigServer.registerQuery("B = foreach A generate a2, a0, a1;");
        pigServer.registerQuery("C = foreach B generate a0, a2;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("3", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }


    @Test
    public void testProjectCastKeyLookup() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext())
                + "' as (a0, a1);");

        pigServer.registerQuery("B = foreach A generate a1#'key1';");

        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0",
                "Map key required for A: $1->[key1]"}));

    }

    @Test
    public void testRelayFlattenMap() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile3.toString()), pigServer.getPigContext())
                + "' as (a0, a1:map[]);");

        pigServer.registerQuery("B = foreach A generate flatten(a1);");
        pigServer.registerQuery("C = foreach B generate a1#'key1';");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertEquals("1", t.get(0).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(1, t.size());
        assertEquals("2", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0",
                "Map key required for A: $1->[key1]"}));
    }

    @Test
    public void testCrossAtLeastOneColumnOneInput() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' as (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' as (b0:int, b1:int);");
        pigServer.registerQuery("C = cross A, B;");
        pigServer.registerQuery("D = foreach C generate $0;");

        Iterator<Tuple> iter = pigServer.openIterator("D");
        Collection<String> results = new HashSet<String>();
        results.add("(1)");
        results.add("(2)");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1, $2",
                "Columns pruned for B: $1"}));
    }

    @Test
    public void testComplex1() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile7.toString()), pigServer.getPigContext()) + "' as (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile8.toString()), pigServer.getPigContext()) + "' as (b0, b1, b2, b3);");
        pigServer.registerQuery("B1 = foreach B generate b2, b0+b3;");
        pigServer.registerQuery("C = join A by $0, B1 by $0;");
        pigServer.registerQuery("D = order C by $4;");
        pigServer.registerQuery("E = foreach D generate $0, $2;");
        pigServer.registerQuery("F = filter E by $1<10;");
        pigServer.registerQuery("G = group F by $0;");
        pigServer.registerQuery("H = foreach G generate $1;");

        Iterator<Tuple> iter = pigServer.openIterator("H");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(1, t.size());
        assertEquals("{(2,2)}", t.get(0).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1",
                "Columns pruned for B: $1"}));
    }

    @Test
    public void testCoGroup8() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1:int, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1:int);");
        pigServer.registerQuery("C = cogroup A by ($1), B by ($1);");
        pigServer.registerQuery("D = foreach C generate $0, $1;");

        Iterator<Tuple> iter = pigServer.openIterator("D");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals(2, t.size());
        assertEquals("1", t.get(0).toString());
        assertEquals("{}", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("2", t.get(0).toString());
        assertEquals("{(1,2,3)}", t.get(1).toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals(2, t.size());
        assertEquals("5", t.get(0).toString());
        assertEquals("{(2,5,2)}", t.get(1).toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for B: $0"}));
    }

    // See PIG-1128
    @Test
    public void testUserDefinedSchema() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS ( c1 : chararray, c2 : int);");
        pigServer.registerQuery("B = foreach A generate c1 as c1 : chararray, c2 as c2 : int, 'CA' as state : chararray;");
        pigServer.registerQuery("C = foreach B generate c1 as c1 : chararray;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(1)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(2)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    // See PIG-1127
    @Test
    public void testSharedSchemaObject() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile10.toString()), pigServer.getPigContext()) + "' AS (a0, a1:map[], a2);");
        pigServer.registerQuery("B = foreach A generate a1;");
        pigServer.registerQuery("C = limit B 10;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("([1#1, 2#1])", TestHelper.sortString("\\[(.*)\\]", t.toString(), ","));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0, $2"}));
    }

    // See PIG-1142
    @Test
    public void testJoin4() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (b0, b1, b2);");
        pigServer.registerQuery("C = join A by a2, B by b2;");
        pigServer.registerQuery("D = foreach C generate $0,  $1,  $2;");

        Iterator<Tuple> iter = pigServer.openIterator("D");
        Collection<String> results = new HashSet<String>();
        results.add("(1,2,3)");
        results.add("(2,5,2)");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();
        assertTrue(results.contains(t.toString()));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for B: $0, $1"}));
    }

    @Test
    public void testFilter4() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2:int);");
        pigServer.registerQuery("B = filter A by a2==3;");
        pigServer.registerQuery("C = foreach B generate $2;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(3)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0, $1"}));
    }

    @Test
    public void testSplit3() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2:int);");
        pigServer.registerQuery("split A into B if a2==3, C if a2<3;");
        pigServer.registerQuery("C = foreach B generate $2;");

        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(3)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0, $1"}));
    }

    @Test
    public void testOrderBy3() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = order A by a2;");
        pigServer.registerQuery("C = foreach B generate a2;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertEquals("(2)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertEquals("(3)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0, $1"}));
    }

    @Test
    public void testCogroup9() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (b0, b1, b2);");
        pigServer.registerQuery("C = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (c0, c1, c2);");
        pigServer.registerQuery("D = cogroup A by a2, B by b2, C by c2;");
        pigServer.registerQuery("E = foreach D generate $1, $2;");
        Iterator<Tuple> iter = pigServer.openIterator("E");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("({(2,5,2)},{(2,5,2)})", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertEquals("({(1,2,3)},{(1,2,3)})", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for C: $0, $1"}));
    }

    // See PIG-1165
    @Test
    public void testOrderbyWrongSignature() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b1);");
        pigServer.registerQuery("C = order A by a1;");
        pigServer.registerQuery("D = join C by a1, B by b0;");
        pigServer.registerQuery("E = foreach D generate a1, b0, b1;");
        Iterator<Tuple> iter = pigServer.openIterator("E");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(3, t.size());
        assertEquals("(2,2,2)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0, $2"}));
    }

    // See PIG-1146
    @Test
    public void testUnionMixedPruning() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1:chararray, a2);");
        pigServer.registerQuery("B = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (b0, b2);");
        pigServer.registerQuery("C = foreach B generate b0, 'hello', b2;");
        pigServer.registerQuery("D = union A, C;");
        pigServer.registerQuery("E = foreach D generate $0, $2;");
        Iterator<Tuple> iter = pigServer.openIterator("E");
        Collection<String> results = new HashSet<String>();
        results.add("(1,3)");
        results.add("(2,2)");
        results.add("(1,1)");
        results.add("(2,2)");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(2, t.size());
        assertTrue(results.contains(t.toString()));

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    // See PIG-1176
    @Test
    public void testUnionMixedSchemaPruning() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = foreach A generate a0;;");
        pigServer.registerQuery("C = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "';");
        pigServer.registerQuery("D = foreach C generate $0;");
        pigServer.registerQuery("E = union B, D;");
        Iterator<Tuple> iter = pigServer.openIterator("E");
        Collection<String> results = new HashSet<String>();
        results.add("(1)");
        results.add("(2)");
        results.add("(1)");
        results.add("(2)");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertTrue(iter.hasNext());
        t = iter.next();

        assertEquals(1, t.size());
        assertTrue(results.contains(t.toString()));

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    // See PIG-1184
    @Test
    public void testForEachFlatten() throws Exception {
        File inputFile = Util.createInputFile("table_testForEachFlatten", "", new String[]{"oiue\tM\t{(3),(4)}\t{(toronto),(montreal)}"});

        pigServer.registerQuery("A = load '"+Util.encodeEscape(inputFile.toString())+"' as (a0:chararray, a1:chararray, a2:bag{t:tuple(id:chararray)}, a3:bag{t:tuple(loc:chararray)});");
        pigServer.registerQuery("B = foreach A generate a0, a1, flatten(a2), flatten(a3), 10;");
        pigServer.registerQuery("C = foreach B generate a0, $4;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(oiue,10)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(oiue,10)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(oiue,10)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(oiue,10)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    // See PIG-1210
    @Test
    public void testFieldsToReadDuplicatedEntry() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = foreach A generate a0+a0, a1, a2;");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(2.0,2,3)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(4.0,5,2)", t.toString());

        assertFalse(iter.hasNext());

        assertTrue(emptyLogFileMessage());
    }

    // See PIG-1272
    @Test
    public void testSplit4() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2);");
        pigServer.registerQuery("B = foreach A generate a0;");
        pigServer.registerQuery("C = join A by a0, B by a0;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(1,2,3,1)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(2,5,2,2)", t.toString());

        assertTrue(emptyLogFileMessage());
    }

    @Test
    public void testSplit5() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile11.toString()), pigServer.getPigContext()) + "' AS (a0:int, a1:int, a2:int);");
        pigServer.registerQuery("B = foreach A generate a0, a1;");
        pigServer.registerQuery("C = join A by a0, B by a0;");
        pigServer.registerQuery("D = filter C by A::a1>=B::a1;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        String [] expected = new String[] {
                "(1,2,3,1,2)",
                "(1,3,2,1,2)",
                "(1,3,2,1,3)",
                "(2,5,2,2,5)"
        };

        Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("D")));

        assertTrue(emptyLogFileMessage());
    }


    // See PIG-1493
    @Test
    public void testInconsistentPruning() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' AS (a0:chararray, a1:chararray, a2);");
        pigServer.registerQuery("B = foreach A generate CONCAT(a0,a1) as b0, a0, a2;");
        pigServer.registerQuery("C = foreach B generate a0, a2;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();
        assertEquals("(1,3)", t.toString());

        assertTrue(iter.hasNext());
        t = iter.next();
        assertEquals("(2,2)", t.toString());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    // See PIG-1644
    @Test
    public void testSplitOutputWithForEach() throws Exception {
        Path output1 = FileLocalizer.getTemporaryPath(pigServer.getPigContext());
        Path output2 = FileLocalizer.getTemporaryPath(pigServer.getPigContext());
        pigServer.setBatchOn();
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile5.toString()), pigServer.getPigContext()) + "' AS (a0, a1, a2, a3);");
        pigServer.registerQuery("B = foreach A generate a0, a1, a2;");
        pigServer.registerQuery("store B into '" + Util.generateURI(Util.encodeEscape(output1.toString()), pigServer.getPigContext()) + "';");
        pigServer.registerQuery("C = order B by a2;");
        pigServer.registerQuery("D = foreach C generate a2;");
        pigServer.registerQuery("store D into '" + Util.generateURI(Util.encodeEscape(output2.toString()), pigServer.getPigContext()) + "';");
        pigServer.executeBatch();

        BufferedReader reader1 = new BufferedReader(new InputStreamReader(FileLocalizer.openDFSFile(output1.toString(), pigServer.getPigContext().getProperties())));
        String line = reader1.readLine();
        assertEquals("1\t2\t3", line);

        line = reader1.readLine();
        assertEquals("2\t3\t4", line);

        assertNull(reader1.readLine());

        BufferedReader reader2 = new BufferedReader(new InputStreamReader(FileLocalizer.openDFSFile(output2.toString(), pigServer.getPigContext().getProperties())));
        line = reader2.readLine();
        assertEquals("3", line);

        line = reader2.readLine();
        assertEquals("4", line);

        assertNull(reader2.readLine());

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $3"}));

        reader1.close();
        reader2.close();
    }

    static public class PruneColumnEvalFunc extends LoadFunc implements LoadPushDown {
        String[] aliases;
        String signature;
        public PruneColumnEvalFunc() {}
        @Override
        public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException {
            aliases = new String[requiredFieldList.getFields().size()];
            for (int i=0; i<requiredFieldList.getFields().size(); i++) {
                RequiredField fs = requiredFieldList.getFields().get(i);
                aliases[i] = fs.getAlias();
            }
            try {
                UDFContext.getUDFContext().getUDFProperties(this.getClass()).setProperty(signature, ObjectSerializer.serialize(aliases));
            } catch (IOException e) {
                throw new FrontendException(e);
            }
            return new RequiredFieldResponse(true);
        }

        @Override
        public List<OperatorSet> getFeatures() {
            return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION);
        }

        @Override
        public void setLocation(String location, Job job) throws IOException {
            FileInputFormat.setInputPaths(job, location);
        }

        @Override
        public InputFormat getInputFormat() throws IOException {
            return new PigTextInputFormat();
        }

        @Override
        public void prepareToRead(RecordReader reader, PigSplit split)
                throws IOException {
        }

        @Override
        public void setUDFContextSignature(String signature) {
            this.signature = signature;
        }

        @Override
        public Tuple getNext() throws IOException {
            if (aliases==null) {
                aliases = (String[])ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass()).getProperty(signature));
                Tuple t = TupleFactory.getInstance().newTuple();
                for (String s : aliases)
                    t.append(s);
                return t;
            }
            return null;
        }
    }

    public void testAliasInRequiredFieldList() throws Exception{
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile1.toString()), pigServer.getPigContext()) + "' using "
                + PruneColumnEvalFunc.class.getName() +"() as (a0, a1, a2);");
        pigServer.registerQuery("B = foreach A generate a1, a2;");
        Iterator<Tuple> iter = pigServer.openIterator("B");

        assertTrue(iter.hasNext());
        Tuple t = iter.next();

        assertEquals(2, t.size());
        assertEquals("a1", t.get(0));
        assertEquals("a2", t.get(1));

        assertFalse(iter.hasNext());
    }

    @Test
    public void testCogroup10() throws Exception {
        pigServer.registerQuery("A = load '"+ Util.generateURI(Util.encodeEscape(tmpFile2.toString()), pigServer.getPigContext()) + "' AS (a0, a1:double);");
        pigServer.registerQuery("B = foreach A generate a0, a1, 0 as joinField;");
        pigServer.registerQuery("C = group B all;");
        pigServer.registerQuery("D = foreach C generate 0 as joinField, SUM(B.a1) as total;");
        pigServer.registerQuery("E = join B by joinField, D by joinField;");
        pigServer.registerQuery("F = foreach E generate a0;");
        Iterator<Tuple> iter = pigServer.openIterator("F");

        String[] expected = new String[] {"(1)", "(2)"};

        Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("F")));

        assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"}));
    }

    // See PIG-2534
    @Test
    public void testStream2() throws Exception {
        File input1 = File.createTempFile("tmp", "");
        input1.delete();
        File input2 = File.createTempFile("tmp", "");
        input2.delete();

        Util.createLocalInputFile(input1.getAbsolutePath(), new String[]
                {"[key1#0,key2#5,key3#val3,key4#val4,key5#val5]"});
        Util.createLocalInputFile(input2.getAbsolutePath(), new String[]
                {"[key1#0,key2#5,key3#val3,key4#val4,key5#val5]"});

        pigServer.registerQuery("event_serve = LOAD '" + Util.encodeEscape(input1.getAbsolutePath()) +
                "' AS (s, m, l);");
        pigServer.registerQuery("cm_data_raw = LOAD '" + Util.encodeEscape(input2.getAbsolutePath()) +
                "' AS (s, m, l);");
        pigServer.registerQuery("cm_serve = FOREACH cm_data_raw GENERATE  s#'key3' AS f1,  s#'key4' AS f2, s#'key5' AS f3 ;");
        pigServer.registerQuery("cm_serve_lowercase = stream cm_serve through `tr [:upper:] [:lower:]`;");
        pigServer.registerQuery("cm_serve_final = FOREACH cm_serve_lowercase GENERATE  $0 AS cm_event_guid, $1 AS cm_receive_time, $2 AS cm_ctx_url;");
        pigServer.registerQuery("event_serve_project = FOREACH  event_serve GENERATE  s#'key3' AS event_guid, s#'key4' AS receive_time;");
        pigServer.registerQuery("event_serve_join = join cm_serve_final by (cm_event_guid), event_serve_project by (event_guid);");
        Iterator<Tuple> iter = pigServer.openIterator("event_serve_join");

        String[] expected = new String[] {"(val3,val4,val5,val3,val4)"};

        Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("event_serve_join")));

        assertTrue(checkLogFileMessage(new String[]{"Map key required for event_serve: $0->[key3, key4]",
                "Map key required for cm_data_raw: $0->[key3, key4, key5]"}));
    }

    // See PIG-2535
    @Test
    public void testStream3() throws Exception {
        pigServer.registerQuery("event_serve = LOAD 'input1' AS (s, m, l);");
        pigServer.registerQuery("raw = LOAD 'input2' AS (s, m, l);");

        pigServer.registerQuery("SPLIT raw INTO " +
            "serve_raw IF (( (chararray) (s#'type') == '0') AND ( (chararray) (s#'source') == '5'))," +
            "cm_click_raw IF (( (chararray) (s#'type') == '1') AND ( (chararray) (s#'source') == '5'));");
        pigServer.registerQuery("cm_serve = FOREACH serve_raw GENERATE  s#'cm_serve_id' AS cm_event_guid,  s#'cm_serve_timestamp_ms' AS cm_receive_time, s#'p_url' AS ctx ;");
        pigServer.registerQuery("cm_serve_lowercase = stream cm_serve through `tr [:upper:] [:lower:]`;");
        pigServer.registerQuery("cm_serve_final = FOREACH cm_serve_lowercase GENERATE  $0 AS cm_event_guid, $1 AS cm_receive_time, $2 AS ctx;");
        pigServer.registerQuery("filtered = FILTER event_serve BY (chararray) (s#'filter_key') neq 'xxxx' AND (chararray) (s#'filter_key') neq 'yyyy';");
        pigServer.registerQuery("event_serve_project = FOREACH filtered GENERATE s#'event_guid' AS event_guid, s#'receive_time' AS receive_time;");
        pigServer.registerQuery("event_serve_join = join cm_serve_final by (cm_event_guid), event_serve_project by (event_guid);");


        pigServer.explain("event_serve_join", System.out);

        assertTrue(checkLogFileMessage(new String[]{"Map key required for event_serve: $0->[event_guid, filter_key, receive_time]",
                "Map key required for raw: $0->[cm_serve_id, cm_serve_timestamp_ms, p_url, source, type]"}));
    }
}
TOP

Related Classes of org.apache.pig.test.TestPruneColumn

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
nalytics.com/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');