Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad


            leaf = leaves.get(0);

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
View Full Code Here


     * @return
     * @throws IOException
     * @throws PlanException
     */
    private MapReduceOper startNew(FileSpec fSpec, MapReduceOper old) throws PlanException{
        POLoad ld = getLoad();
        ld.setLFile(fSpec);
        MapReduceOper ret = getMROp();
        ret.mapPlan.add(ld);
        MRPlan.add(ret);
        MRPlan.connect(old, ret);
        return ret;
View Full Code Here

        int numFiles = 0;
        boolean ret = false;
        try {
            for (PhysicalOperator root : roots) {
                POLoad ld = (POLoad) root;
                String fileName = ld.getLFile().getFileName();

                if(UriUtil.isHDFSFile(fileName)){
                    // Only if the input is an hdfs file, this optimization is
                    // useful (to reduce load on namenode)

                    //separate out locations separated by comma
                    String [] locations = LoadFunc.getPathStrings(fileName);
                    for(String location : locations){
                        if(!UriUtil.isHDFSFile(location))
                            continue;
                        Path path = new Path(location);
                        FileSystem fs = path.getFileSystem(conf);
                        if (fs.exists(path)) {
                            LoadFunc loader = (LoadFunc) PigContext
                            .instantiateFuncFromSpec(ld.getLFile()
                                    .getFuncSpec());
                            Job job = new Job(conf);
                            loader.setUDFContextSignature(ld.getSignature());
                            loader.setLocation(location, job);
                            InputFormat inf = loader.getInputFormat();
                            List<InputSplit> splits = inf.getSplits(HadoopShims.cloneJobContext(job));
                            List<List<InputSplit>> results = MapRedUtil
                            .getCombinePigSplits(splits,
View Full Code Here

                    int errCode = 2172;
                    String errMsg = "Expected physical operator at root to be POLoad. Found : "+rootPOOp.getClass().getCanonicalName();
                    throw new MRCompilerException(errMsg,errCode);
                }

                POLoad sideLoader = (POLoad)rootPOOp;
                FileSpec loadFileSpec = sideLoader.getLFile();
                FuncSpec funcSpec = loadFileSpec.getFuncSpec();
                LoadFunc loadfunc = sideLoader.getLoadFunc();
                if(i == 0){

                    if(!(CollectableLoadFunc.class.isAssignableFrom(loadfunc.getClass()))){
                        int errCode = 2252;
                        throw new MRCompilerException("Base loader in Cogroup must implement CollectableLoadFunc.", errCode);
                    }

                    ((CollectableLoadFunc)loadfunc).ensureAllKeyInstancesInSameSplit();
                    continue;
                }
                if(!(IndexableLoadFunc.class.isAssignableFrom(loadfunc.getClass()))){
                    int errCode = 2253;
                    throw new MRCompilerException("Side loaders in cogroup must implement IndexableLoadFunc.", errCode);
                }

                funcSpecs.add(funcSpec);
                fileSpecs.add(loadFileSpec.getFileName());
                loaderSigns.add(sideLoader.getSignature());
                MRPlan.remove(mrOper);
            }

            poCoGrp.setSideLoadFuncs(funcSpecs);
            poCoGrp.setSideFileSpecs(fileSpecs);
View Full Code Here

            final MapReduceOper baseMROp, final List<PhysicalPlan> mapperLRInnerPlans)
        throws MRCompilerException, PlanException, ExecException, IOException, CloneNotSupportedException {

        // First replace loader with  MergeJoinIndexer.
        PhysicalPlan baseMapPlan = baseMROp.mapPlan;
        POLoad baseLoader = (POLoad)baseMapPlan.getRoots().get(0);
        FileSpec origLoaderFileSpec = baseLoader.getLFile();
        FuncSpec funcSpec = origLoaderFileSpec.getFuncSpec();
        LoadFunc loadFunc = baseLoader.getLoadFunc();

        if (! (OrderedLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
            int errCode = 1104;
            String errMsg = "Base relation of merge-coGroup must implement " +
            "OrderedLoadFunc interface. The specified loader "
            + funcSpec + " doesn't implement it";
            throw new MRCompilerException(errMsg,errCode);
        }

        String[] indexerArgs = new String[6];
        indexerArgs[0] = funcSpec.toString();
        indexerArgs[1] = ObjectSerializer.serialize((Serializable)mapperLRInnerPlans);
        indexerArgs[3] = baseLoader.getSignature();
        indexerArgs[4] = baseLoader.getOperatorKey().scope;
        indexerArgs[5] = Boolean.toString(false); // we care for nulls.

        PhysicalPlan phyPlan;
        if (baseMapPlan.getSuccessors(baseLoader) == null
                || baseMapPlan.getSuccessors(baseLoader).isEmpty()){
         // Load-Load-Cogroup case.
            phyPlan = null;
        }

        else{ // We got something. Yank it and set it as inner plan.
            phyPlan = baseMapPlan.clone();
            PhysicalOperator root = phyPlan.getRoots().get(0);
            phyPlan.disconnect(root, phyPlan.getSuccessors(root).get(0));
            phyPlan.remove(root);

        }
        indexerArgs[2] = ObjectSerializer.serialize(phyPlan);

        POLoad idxJobLoader = getLoad();
        idxJobLoader.setLFile(new FileSpec(origLoaderFileSpec.getFileName(),
                new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs)));
        indexerMROp.mapPlan.add(idxJobLoader);
        indexerMROp.UDFs.add(baseLoader.getLFile().getFuncSpec().toString());

        // Loader of mro will return a tuple of form -
View Full Code Here

            joinOp.setupRightPipeline(rightPipelinePlan);
            rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.

            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
            joinOp.setSignature(rightLoader.getSignature());
            LoadFunc rightLoadFunc = rightLoader.getLoadFunc();
            List<String> udfs = new ArrayList<String>();
            if(IndexableLoadFunc.class.isAssignableFrom(rightLoadFunc.getClass())) {
                joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
                joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
                udfs.add(rightLoader.getLFile().getFuncSpec().toString());

                // we don't need the right MROper since
                // the right loader is an IndexableLoadFunc which can handle the index
                // itself
                MRPlan.remove(rightMROpr);
                if(rightMROpr == compiledInputs[0]) {
                    compiledInputs[0] = null;
                } else if(rightMROpr == compiledInputs[1]) {
                    compiledInputs[1] = null;
                }
                rightMROpr = null;

                // validate that the join keys in merge join are only
                // simple column projections or '*' and not expression - expressions
                // cannot be handled when the index is built by the storage layer on the sorted
                // data when the sorted data (and corresponding index) is written.
                // So merge join will be restricted not have expressions as
                // join keys
                int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
                for(int i = 0; i < numInputs; i++) {
                    List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
                    for (PhysicalPlan keyPlan : keyPlans) {
                        for(PhysicalOperator op : keyPlan) {
                            if(!(op instanceof POProject)) {
                                int errCode = 1106;
                                String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
                                rightLoader.getLFile().getFuncSpec() + " as the loader";
                                throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
                            }
                        }
                    }
                }
            } else {
                LoadFunc loadFunc = rightLoader.getLoadFunc();
                //Replacing POLoad with indexer is disabled for 'merge-sparse' joins.  While
                //this feature would be useful, the current implementation of DefaultIndexableLoader
                //is not designed to handle multiple calls to seekNear.  Specifically, it rereads the entire index
                //for each call.  Some refactoring of this class is required - and then the check below could be removed.
                if (joinOp.getJoinType() == LOJoin.JOINTYPE.MERGESPARSE) {
                    int errCode = 1104;
                    String errMsg = "Right input of merge-join must implement IndexableLoadFunc. " +
                    "The specified loader " + loadFunc + " doesn't implement it";
                    throw new MRCompilerException(errMsg,errCode);
                }

                // Replace POLoad with  indexer.

                if (! (OrderedLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
                    int errCode = 1104;
                    String errMsg = "Right input of merge-join must implement " +
                    "OrderedLoadFunc interface. The specified loader "
                    + loadFunc + " doesn't implement it";
                    throw new MRCompilerException(errMsg,errCode);
                }

                String[] indexerArgs = new String[6];
                List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
                FileSpec origRightLoaderFileSpec = rightLoader.getLFile();

                indexerArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
                indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
                indexerArgs[3] = rightLoader.getSignature();
                indexerArgs[4] = rightLoader.getOperatorKey().scope;
                indexerArgs[5] = Boolean.toString(true);

                FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
                rightLoader.setLFile(lFile);

                // Loader of mro will return a tuple of form -
                // (keyFirst1, keyFirst2, .. , position, splitIndex) See MergeJoinIndexer

                MRUtil.simpleConnectMapToReduce(rightMROpr, scope, nig);
View Full Code Here

       
        //Process the POLoads
        List<PhysicalOperator> lds = getRoots(mro.mapPlan);
        if(lds!=null && lds.size()>0){
            for (PhysicalOperator operator : lds) {
                POLoad ld = (POLoad)operator;
               
                Pair<FileSpec, Boolean> p = new Pair<FileSpec, Boolean>(ld.getLFile(), ld.isSplittable());
                //Store the inp filespecs
                inp.add(p);
               
                //Store the target operators for tuples read
                //from this input
View Full Code Here

        for (MapReduceOper succ : succs) {
            List<PhysicalOperator> roots = succ.mapPlan.getRoots();
            ArrayList<PhysicalOperator> rootsCopy =
                new ArrayList<PhysicalOperator>(roots);
            for (PhysicalOperator op : rootsCopy) {
                POLoad load = (POLoad)op;
                String ifile = load.getLFile().getFileName();
                if (ofile.compareTo(ifile) != 0) {
                    continue;
                }
                PhysicalOperator opSucc = succ.mapPlan.getSuccessors(op).get(0);
                PhysicalPlan clone = null;
View Full Code Here

                        
        MapReduceOper mrOper = getMROper();

        MapReduceOper splittee = mergeList.get(0);
        PhysicalPlan pl = splittee.mapPlan;
        POLoad load = (POLoad)pl.getRoots().get(0);
       
        mrOper.mapPlan.add(load);
      
        // add a dummy store operator, it'll be replaced by the split operator later.
        try {
View Full Code Here

            POStore storeOp = (POStore) mpLeaf;
            storeOp.setSFile(fSpec);
            storeOp.setIsTmpStore(true);
            mr.setReduceDone(true);
            MapReduceOper limitAdjustMROp = new MapReduceOper(new OperatorKey(scope,nig.getNextNodeId(scope)));
            POLoad ld = new POLoad(new OperatorKey(scope,nig.getNextNodeId(scope)));
            ld.setPc(pigContext);
            ld.setLFile(fSpec);
            ld.setIsTmpLoad(true);
            limitAdjustMROp.mapPlan.add(ld);
            if (mr.isGlobalSort()) {
                connectMapToReduceLimitedSort(limitAdjustMROp, mr);
            } else {
                MRUtil.simpleConnectMapToReduce(limitAdjustMROp, scope, nig);
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.