Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad


                FileSpec fSpec = getTempFileSpec();
                ((POStore)mpLeaf).setSFile(fSpec);
                ((POStore)mpLeaf).setIsTmpStore(true);
                mr.setReduceDone(true);
                MapReduceOper limitAdjustMROp = getMROp();
                POLoad ld = getLoad();
                ld.setLFile(fSpec);
                limitAdjustMROp.mapPlan.add(ld);
                POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
                pLimit.setLimit(mr.limit);
                limitAdjustMROp.mapPlan.addAsLeaf(pLimit);
                if (mr.isGlobalSort())
View Full Code Here


    private MapReduceOper getMROp(){
        return new MapReduceOper(new OperatorKey(scope,nig.getNextNodeId(scope)));
    }
   
    private POLoad getLoad(){
        POLoad ld = new POLoad(new OperatorKey(scope,nig.getNextNodeId(scope)), true);
        ld.setPc(pigContext);
        return ld;
    }
View Full Code Here

            leaf = leaves.get(0);

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
View Full Code Here

     * @return
     * @throws IOException
     * @throws PlanException
     */
    private MapReduceOper startNew(FileSpec fSpec, MapReduceOper old) throws PlanException{
        POLoad ld = getLoad();
        ld.setLFile(fSpec);
        MapReduceOper ret = getMROp();
        ret.mapPlan.add(ld);
        MRPlan.add(ret);
        MRPlan.connect(old, ret);
        return ret;
View Full Code Here

            }
           
            joinOp.setupRightPipeline(rightPipelinePlan);
                       
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
            joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());

            // Replace POLoad with  indexer.
            String[] indexerArgs = new String[3];
            indexerArgs[0] = rightLoader.getLFile().getFuncSpec().toString();
             if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof SamplableLoader)){
                 int errCode = 1104;
                 String errMsg = "Right input of merge-join must implement SamplableLoader interface. The specified loader " + indexerArgs[0] + " doesn't implement it";
                 throw new MRCompilerException(errMsg,errCode);
             }
            List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
            indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
            indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
            FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
            rightLoader.setLFile(lFile);

            // Loader of mro will return a tuple of form (key1, key2, ..,filename, offset)
            // Now set up a POLocalRearrange which has "all" as the key and tuple fetched
            // by loader as the "value" of POLocalRearrange
            // Sorting of index can possibly be achieved by using Hadoop sorting between map and reduce instead of Pig doing sort. If that is so,
View Full Code Here

    private MapReduceOper getMROp(){
        return new MapReduceOper(new OperatorKey(scope,nig.getNextNodeId(scope)));
    }
   
    private POLoad getLoad(){
        POLoad ld = new POLoad(new OperatorKey(scope,nig.getNextNodeId(scope)), true);
        ld.setPc(pigContext);
        return ld;
    }
View Full Code Here

            leaf = leaves.get(0);

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
View Full Code Here

     * @return
     * @throws IOException
     * @throws PlanException
     */
    private MapReduceOper startNew(FileSpec fSpec, MapReduceOper old) throws PlanException{
        POLoad ld = getLoad();
        ld.setLFile(fSpec);
        MapReduceOper ret = getMROp();
        ret.mapPlan.add(ld);
        MRPlan.add(ret);
        MRPlan.connect(old, ret);
        return ret;
View Full Code Here

           
            joinOp.setupRightPipeline(rightPipelinePlan);
      rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.       
           
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);           
            LoadFunc rightLoadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(rightLoader.getLFile().getFuncSpec());
            joinOp.setSignature(rightLoader.getSignature());
            if(rightLoadFunc instanceof IndexableLoadFunc) {
                joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
                joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
               
                // we don't need the right MROper since
                // the right loader is an IndexableLoadFunc which can handle the index
                // itself
                MRPlan.remove(rightMROpr);
                if(rightMROpr == compiledInputs[0]) {
                    compiledInputs[0] = null;
                } else if(rightMROpr == compiledInputs[1]) {
                    compiledInputs[1] = null;
                }
                rightMROpr = null;
               
                // validate that the join keys in merge join are only                                                                                                                                                                             
                // simple column projections or '*' and not expression - expressions                                                                                                                                                              
                // cannot be handled when the index is built by the storage layer on the sorted                                                                                                                                                   
                // data when the sorted data (and corresponding index) is written.                                                                                                                                                                
                // So merge join will be restricted not have expressions as                                                                                                                                                                       
                // join keys     
                int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
                for(int i = 0; i < numInputs; i++) {
                    List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
                    for (PhysicalPlan keyPlan : keyPlans) {
                        for(PhysicalOperator op : keyPlan) {
                            if(!(op instanceof POProject)) {
                                int errCode = 1106;
                                String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
                                rightLoader.getLFile().getFuncSpec() + " as the loader";
                                throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
                            }
                        }
                    }
                }
            } else {
                // Replace POLoad with  indexer.
                String[] indexerArgs = new String[3];
                FileSpec origRightLoaderFileSpec = rightLoader.getLFile();
                indexerArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                 if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof SamplableLoader)){
                     int errCode = 1104;
                     String errMsg = "Right input of merge-join must implement SamplableLoader interface. The specified loader " + indexerArgs[0] + " doesn't implement it";
                     throw new MRCompilerException(errMsg,errCode);
                 }
                List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
                indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
                indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
                FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
                rightLoader.setLFile(lFile);
   
                // Loader of mro will return a tuple of form (key1, key2, ..,filename, offset)
                // Now set up a POLocalRearrange which has "all" as the key and tuple fetched
                // by loader as the "value" of POLocalRearrange
                // Sorting of index can possibly be achieved by using Hadoop sorting between map and reduce instead of Pig doing sort. If that is so,
View Full Code Here

           
        }
       
        private void setUpHashTable() throws IOException {
            FileSpec replFile = new FileSpec(repl,new FuncSpec(PigStorage.class.getName()+"()"));
            POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile, false);
            PigContext pc = new PigContext(ExecType.MAPREDUCE,ConfigurationUtil.toProperties(PigMapReduce.sJobConf));
            try {
                pc.connect();
           
                ld.setPc(pc);
                Tuple dummyTuple = null;
                for(Result res=ld.getNext(dummyTuple);res.returnStatus!=POStatus.STATUS_EOP;res=ld.getNext(dummyTuple)){
                    Tuple tup = (Tuple) res.result;
                    LoadFunc lf = ((LoadFunc)pc.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
                    String key = lf.bytesToCharArray(((DataByteArray)tup.get(keyField)).get());
                    Tuple csttup = TupleFactory.getInstance().newTuple(2);
                    csttup.set(0, key);
                    csttup.set(1, lf.bytesToInteger(((DataByteArray)tup.get(1)).get()));
                    DataBag vals = null;
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.