Examples of LogicalOperator


Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

                                                    = cg.getGroupByPlans() ;
        List<LogicalOperator> inputs = cg.getInputs() ;

        // Type checking internal plans.
        for(int i=0;i < inputs.size(); i++) {
            LogicalOperator input = inputs.get(i) ;
            List<LogicalPlan> innerPlans
                        = new ArrayList<LogicalPlan>(groupByPlans.get(input)) ;

            for(int j=0; j < innerPlans.size(); j++) {

                LogicalPlan innerPlan = innerPlans.get(j) ;
               
                // Check that the inner plan has only 1 output port
                if (!innerPlan.isSingleLeafPlan()) {
                    int errCode = 1057;
                    String msg = "COGroup's inner plans can only"
                                 + "have one output (leaf)" ;
                    msgCollector.collect(msg, MessageType.Error) ;
                    throw new TypeCheckerException(msg, errCode, PigException.INPUT) ;
                }

                checkInnerPlan(cg.getAlias(), innerPlans.get(j)) ;
            }

        }

        try {

            if (!cg.isTupleGroupCol()) {
                // merge all the inner plan outputs so we know what type
                // our group column should be

                // TODO: Don't recompute schema here
                //byte groupType = schema.getField(0).type ;
                byte groupType = cg.getAtomicGroupByType() ;

                // go through all inputs again to add cast if necessary
                for(int i=0;i < inputs.size(); i++) {
                    LogicalOperator input = inputs.get(i) ;
                    List<LogicalPlan> innerPlans
                                = new ArrayList<LogicalPlan>(groupByPlans.get(input)) ;
                    // Checking innerPlan size already done above
                    byte innerType = innerPlans.get(0).getSingleLeafPlanOutputType() ;
                    if (innerType != groupType) {
                        insertAtomicCastForCOGroupInnerPlan(innerPlans.get(0),
                                                            cg,
                                                            groupType) ;
                    }
                }
            }
            else {

                // TODO: Don't recompute schema here
                //Schema groupBySchema = schema.getField(0).schema ;
                Schema groupBySchema = cg.getTupleGroupBySchema() ;

                // go through all inputs again to add cast if necessary
                for(int i=0;i < inputs.size(); i++) {
                    LogicalOperator input = inputs.get(i) ;
                    List<LogicalPlan> innerPlans
                                = new ArrayList<LogicalPlan>(groupByPlans.get(input)) ;
                    for(int j=0;j < innerPlans.size(); j++) {
                        LogicalPlan innerPlan = innerPlans.get(j) ;
                        byte innerType = innerPlan.getSingleLeafPlanOutputType() ;
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

        }
        byte groupType = DataType.BYTEARRAY ;
        // merge all the inner plan outputs so we know what type
        // our group column should be
        for(int i=0;i < cg.getInputs().size(); i++) {
            LogicalOperator input = cg.getInputs().get(i) ;
            List<LogicalPlan> innerPlans
                        = new ArrayList<LogicalPlan>(cg.getGroupByPlans().get(input)) ;
            if (innerPlans.size() != 1) {
                int errCode = 2062;
                String msg = "Each COGroup input has to have "
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

        }

        // merge all the inner plan outputs so we know what type
        // our group column should be
        for(int i=0;i < cg.getInputs().size(); i++) {
            LogicalOperator input = cg.getInputs().get(i) ;
            List<LogicalPlan> innerPlans
                        = new ArrayList<LogicalPlan>(cg.getGroupByPlans().get(input)) ;

            for(int j=0;j < innerPlans.size(); j++) {
                byte innerType = innerPlans.get(j).getSingleLeafPlanOutputType() ;
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

                    throw new TypeCheckerException(msg, errCode, PigException.INPUT) ;
                }

                List<LogicalOperator> rootList = plan.getRoots() ;
                for(int j=0; j<rootList.size(); j++) {
                    LogicalOperator innerRoot = rootList.get(j) ;
                    // TODO: Support MAP dereference
                    if (innerRoot instanceof LOProject) {
                        resolveLOProjectType((LOProject) innerRoot) ;
                    }
                    else if (innerRoot instanceof LOConst || innerRoot instanceof LOUserFunc) {
                        // it's ok because LOConst always has
                        // the right type information
                    }
                    else {
                        int errCode = 2064;
                        String msg = "Unsupported root type in "
                            +"LOForEach: " + innerRoot.getClass().getSimpleName();
                        throw new TypeCheckerException(msg, errCode, PigException.BUG) ;
                    }
                }

                checkInnerPlan(f.getAlias(), plan) ;
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

    }

    @Override
    public boolean check(List<LogicalOperator> nodes) throws OptimizerException {      
        try {
            LogicalOperator op = getOperator(nodes);
            Schema s = op.getSchema();
            if (s == null) return false;
   
            boolean sawOne = false;
            List<Schema.FieldSchema> fss = s.getFields();
            List<Byte> types = new ArrayList<Byte>(s.size());
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }
       
        LogicalOperator lo = nodes.get(0);
        if(LOLoad.class.getName().equals(operatorClassName)) {
            if (lo == null || !(lo instanceof LOLoad)) {
                int errCode = 2005;
                String msg = "Expected " + LOLoad.class.getSimpleName()
                        + ", got "
                        + (lo == null ? lo : lo.getClass().getSimpleName());
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
   
            return lo;
        } else if(LOStream.class.getName().equals(operatorClassName)){
            if (lo == null || !(lo instanceof LOStream)) {
                int errCode = 2005;
                String msg = "Expected " + LOStream.class.getSimpleName()
                        + ", got "
                        + (lo == null ? lo : lo.getClass().getSimpleName());
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
   
            return lo;
        } else {
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

    }

    @Override
    public void transform(List<LogicalOperator> nodes) throws OptimizerException {
        try {
            LogicalOperator lo = getOperator(nodes);
            Schema s = lo.getSchema();
            String scope = lo.getOperatorKey().scope;
            // For every field, build a logical plan.  If the field has a type
            // other than byte array, then the plan will be cast(project).  Else
            // it will just be project.
            ArrayList<LogicalPlan> genPlans = new ArrayList<LogicalPlan>(s.size());
            ArrayList<Boolean> flattens = new ArrayList<Boolean>(s.size());
            Map<String, Byte> typeChanges = new HashMap<String, Byte>();
            // if we are inserting casts in a load and if the loader
            // implements determineSchema(), insert casts only where necessary
            // Note that in this case, the data coming out of the loader is not
            // a BYTEARRAY but is whatever determineSchema() says it is.
            Schema determinedSchema = null;
            if(LOLoad.class.getName().equals(operatorClassName)) {
                determinedSchema = ((LOLoad)lo).getDeterminedSchema();
            }
            for (int i = 0; i < s.size(); i++) {
                LogicalPlan p = new LogicalPlan();
                genPlans.add(p);
                flattens.add(false);
                List<Integer> toProject = new ArrayList<Integer>(1);
                toProject.add(i);
                LOProject proj = new LOProject(p, OperatorKey.genOpKey(scope),
                    lo, toProject);
                p.add(proj);
                Schema.FieldSchema fs = s.getField(i);
                if (fs.type != DataType.BYTEARRAY) {
                    if(determinedSchema == null || (fs.type != determinedSchema.getField(i).type)) {
                            // Either no schema was determined by loader OR the type
                            // from the "determinedSchema" is different
                            // from the type specified - so we need to cast
                            LOCast cast = new LOCast(p,
                                        OperatorKey.genOpKey(scope), fs.type);
                            cast.setFieldSchema(fs);
                            p.add(cast);
                            p.connect(proj, cast);
                           
                            cast.setFieldSchema(fs.clone());
                            FuncSpec loadFuncSpec = null;
                            if(lo instanceof LOLoad) {
                                loadFuncSpec = ((LOLoad)lo).getInputFile().getFuncSpec();
                            } else if (lo instanceof LOStream) {
                                StreamingCommand command = ((LOStream)lo).getStreamingCommand();
                                HandleSpec streamOutputSpec = command.getOutputSpec();
                                loadFuncSpec = new FuncSpec(streamOutputSpec.getSpec());
                            } else {
                                int errCode = 2006;
                                String msg = "TypeCastInserter invoked with an invalid operator class name: " + lo.getClass().getSimpleName();
                                throw new OptimizerException(msg, errCode, PigException.BUG);
                            }
                            cast.setLoadFuncSpec(loadFuncSpec);
                            typeChanges.put(fs.canonicalName, fs.type);
                            if(determinedSchema == null) {
                                // Reset the loads field schema to byte array so that it
                                // will reflect reality.
                                fs.type = DataType.BYTEARRAY;
                            } else {
                                // Reset the type to what determinedSchema says it is
                                fs.type = determinedSchema.getField(i).type;
                            }
                        }
                }
            }

            // Build a foreach to insert after the load, giving it a cast for each
            // position that has a type other than byte array.
            LOForEach foreach = new LOForEach(mPlan,
                OperatorKey.genOpKey(scope), genPlans, flattens);
            foreach.setAlias(lo.getAlias());
            // Insert the foreach into the plan and patch up the plan.
            insertAfter(lo, foreach, null);

            rebuildSchemas();
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

            // return false
            if (successors == null || successors.size() == 0 || successors.size() > 1) {
                return false;
            }

            LogicalOperator successor = successors.get(0);

            List<LogicalOperator> peers = (mPlan.getPredecessors(successor) == null ? null
                    : new ArrayList<LogicalOperator>(mPlan.getPredecessors(successor)));
           
            // check if any of the foreach's peers is a foreach flatten
            // if so then this rule does not apply
            if (peers != null){
                for(LogicalOperator peer: peers) {
                    if(!peer.equals(foreach)) {
                        if(peer instanceof LOForEach) {
                            LOForEach peerForeach = (LOForEach)peer;
                            if(peerForeach.hasFlatten().first) {
                                return false;
                            }
                        }
                    }
                }
            }
           
            IndexHelper<LogicalOperator> indexHelper = new IndexHelper<LogicalOperator>(peers);
            Integer foreachPosition = indexHelper.getIndex(foreach);
           
            // Check if flattened fields is required by successor, if so, don't optimize
            List<RequiredFields> requiredFieldsList = ((RelationalOperator)successor).getRequiredFields();
            RequiredFields requiredFields = requiredFieldsList.get(foreachPosition.intValue());
           
            MultiMap<Integer, Column> foreachMappedFields = foreachProjectionMap.getMappedFields();
           
            if (requiredFields.getFields()!=null) {
                for (Pair<Integer, Integer> pair : requiredFields.getFields()) {
                    Collection<Column> columns = foreachMappedFields.get(pair.second);
                    if (columns!=null) {
                        for (Column column : columns) {
                            Pair<Integer, Integer> foreachInputColumn = column.getInputColumn();
                            if (foreach.isInputFlattened(foreachInputColumn.second))
                                return false;
                        }
                    }
                }
            }
           
            // the foreach with flatten can be swapped with an order by
            // as the order by will have lesser number of records to sort
            // also the sort does not alter the records that are processed
           
            // the foreach with flatten can be pushed down a cross or a join
            // for the same reason. In this case the foreach has to be first
            // unflattened and then a new foreach has to be inserted after
            // the cross or join. In both cross and foreach the actual columns
            // from the foreach are not altered but positions might be changed
           
            // in the case of union the column is transformed and as a result
            // the foreach flatten cannot be pushed down
           
            // for distinct the output before flattening and the output
            // after flattening might be different. For example, consider
            // {(1), (1)}. Distinct of this bag is still {(1), (1)}.
            // distinct(flatten({(1), (1)})) is (1). However,
            // flatten(distinct({(1), (1)})) is (1), (1)
           
            // in both cases correctness is not affected
            if(successor instanceof LOSort) {
                LOSort sort = (LOSort) successor;
                RequiredFields sortRequiredField = sort.getRequiredFields().get(0);
               
                if(sortRequiredField.getNeedAllFields()) {
                    return false;
                }
               
                List<Pair<Integer, Integer>> sortInputs = sortRequiredField.getFields();
                Set<Integer> requiredInputs = new HashSet<Integer>();
                for(Pair<Integer, Integer> pair: sortInputs) {
                    requiredInputs.add(pair.second);
                }
               
                requiredInputs.retainAll(flattenedColumnSet);
                // the intersection of the sort's required inputs
                // and the flattened columns in the foreach should
                // be null, i.e., the size of required inputs == 0
                if(requiredInputs.size() != 0) {
                    return false;
                }
               
                mSwap = true;
                return true;
            } else if (successor instanceof LOCross
                    || successor instanceof LOJoin) {
               
                List<LogicalOperator> children = mPlan.getSuccessors(successor);
               
                if(children == null || children.size() > 1) {
                    return false;
                }
               
                ProjectionMap succProjectionMap = successor.getProjectionMap();
               
                if(succProjectionMap == null) {
                    return false;
                }
               
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }

        LogicalOperator lo = nodes.get(0);
        if (lo == null || !(lo instanceof LOForEach)) {
            // we should never be called with any other operator class name
            int errCode = 2005;
            String msg = "Expected " + LOForEach.class.getSimpleName()
                    + ", got "
                    + (lo == null ? lo : lo.getClass().getSimpleName());
            throw new OptimizerException(msg, errCode, PigException.INPUT);
        } else {
            return lo;
        }
View Full Code Here

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator

    @Override
    public void transform(List<LogicalOperator> nodes)
            throws OptimizerException {
        try {
            LOForEach foreach = (LOForEach) getOperator(nodes);
            LogicalOperator successor = mPlan.getSuccessors(foreach).get(0);
            if (mSwap) {
                mPlan.swap(successor, foreach);
            } else if (mInsertBetween) {
                // mark the flattened columns as not flattened in the foreach
                // create a new foreach operator that projects each column of the
                // successor. Mark the remapped flattened columns as flattened
                // in the new foreach operator
               
                if(mFlattenedColumnReMap == null) {
                    int errCode = 2153;
                    String msg = "Internal error. The mapping for the flattened columns is empty";
                    throw new OptimizerException(msg, errCode, PigException.BUG);
                }
               
                // set flatten to false for all columns in the mapping
               
                ArrayList<Boolean> flattenList = (ArrayList<Boolean>)foreach.getFlatten();               
                for(Integer key: mFlattenedColumnReMap.keySet()) {
                    flattenList.set(key, false);
                }
               
                // rebuild schemas of the foreach and the successor after the foreach modification
                foreach.regenerateSchema();
                successor.regenerateSchema();
               
                Schema successorSchema = successor.getSchema();
               
                if(successorSchema == null) {
                    int errCode = 2154;
                    String msg = "Internal error. Schema of successor cannot be null for pushing down foreach with flatten.";
                    throw new OptimizerException(msg, errCode, PigException.BUG);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.