Package com.tdunning.plume.local.lazy.op

Examples of com.tdunning.plume.local.lazy.op.DeferredOp


        visited.add(current);
        if(current.isMaterialized()) { // condition for being a materialized input. This may change.
          inputs.add(current);
          continue;
        }
        DeferredOp op = current.getDeferredOp();
        if(op instanceof MultipleParallelDo) { // second condition for being an input
          MultipleParallelDo<?> mPDo = (MultipleParallelDo)current.getDeferredOp();
          if(((LazyCollection<?>)mPDo.getOrigin()).isMaterialized()) {
            inputs.add(mPDo.getOrigin()); // will be done in Mapper
          } else if(op instanceof ParallelDo) {
            inputs.add(current); // will be done in Reducer
          } else {
            inputs.add(mPDo.getOrigin()); // will be done in Mapper
          }
          // Check for bypass channels & output channels with no group-by
          for(Map.Entry entry: mPDo.getDests().entrySet()) {
            LazyCollection coll = (LazyCollection)entry.getKey();
            if(coll.getDownOps() == null || coll.getDownOps().size() == 0) {
              bypassChannels.add(coll); // leaf node
            } else if(coll.getDownOps().get(0) instanceof MultipleParallelDo) {
              bypassChannels.add(coll);
            /*
             * Case of an output channel that Flattens with no Group By
             */
            } else if(coll.getDownOps().get(0) instanceof Flatten) {
              Flatten<?> thisFlatten = (Flatten<?>)coll.getDownOps().get(0);
              LazyCollection ldest = (LazyCollection)thisFlatten.getDest();
              if(ldest.getDownOps() == null || ldest.getDownOps().size() == 0 ||
                  ldest.getDownOps().get(0) instanceof MultipleParallelDo) {
                unGroupedOutputChannels.add(thisFlatten);
                // Add the rest of this flatten's origins to the stack in order to possibly discover more output channels
                for(PCollection<?> col: thisFlatten.getOrigins()) {
                  if(!visited.contains(col)) {
                    toVisit.push((LazyCollection<?>)col);
                  }
                }
              }
            }
          }
          continue;
        }
        if(op instanceof GroupByKey) { // third condition for being an input - rare case when one GBK follows another
          inputs.add(current);
          continue;
        }
        if(op instanceof Flatten) {
          Flatten<?> flatten = (Flatten<?>)op;
          for(PCollection<?> input: flatten.getOrigins()) {
            LazyCollection<?> in = (LazyCollection<?>)input;
            if(!visited.contains(in)) {
              toVisit.push(in);
            }
          }
          continue;
        }
        if(op instanceof OneToOneOp) {
          LazyCollection<?> input = (LazyCollection<?>)((OneToOneOp<?, ?>)op).getOrigin();
          if(!visited.contains(input)) {
            toVisit.push(input);
          }
          continue;
        }
      }
      MSCR mscrToAdd = null;
      // Check if there is already one MSCR with at least one of this inputs
      for(MSCR mscr: mscrs) {
        for(PCollection<?> input: inputs) {
          if(mscr.hasInput(input)) {
            mscrToAdd = mscr;
            break;
          }
        }
      }
      if(mscrToAdd == null) { // otherwise create new MSCR
        mscrToAdd = new MSCR(mscrId);
        mscrId++;
      }
      // Add all missing input channels to current MSCR
      for(PCollection<?> input: inputs) {
        if(!mscrToAdd.hasInput(input)) {
          mscrToAdd.addInput(input);
        }
      }
      // Add all missing bypass outputs to current MSCR
      for(PCollection<?> col: bypassChannels) {
        if(!mscrToAdd.hasOutputChannel(col)) {
          // Create new by-pass channel
          MSCR.OutputChannel oC = new MSCR.OutputChannel(col);
          mscrToAdd.addOutputChannel(oC);
        }
      }
      // Add all missing flatten-with-no-groupby outputs to current MSCR
      for(Flatten flatten: unGroupedOutputChannels) {
        if(!mscrToAdd.hasOutputChannel(flatten.getDest())) {
          // Create new channel with flatten and nothing else
          MSCR.OutputChannel oC = new MSCR.OutputChannel(flatten.getDest());
          oC.output = flatten.getDest();
          oC.flatten = flatten;
          mscrToAdd.addOutputChannel(oC);
        }
      }
      // Add all missing output channels to current MSCR
      for(GroupByKey groupByKey: outputChannels) {
        if(!mscrToAdd.hasOutputChannel(groupByKey.getOrigin())) {
          // Create new channel with group by key. It might have combiner and reducer as well.
          MSCR.OutputChannel oC = new MSCR.OutputChannel(groupByKey);
          oC.output = groupByKey.getDest();
          if(groupByKey.getOrigin().getDeferredOp() instanceof Flatten) {
            oC.flatten = (Flatten)groupByKey.getOrigin().getDeferredOp();
          }
          if(groupByKey.getDest().getDownOps() != null && groupByKey.getDest().getDownOps().size() == 1) {
            DeferredOp op = (DeferredOp)groupByKey.getDest().getDownOps().get(0);
            if(op instanceof CombineValues) {
              oC.combiner = (CombineValues)op;
              oC.output = oC.combiner.getDest();
              LazyCollection dest = (LazyCollection)oC.combiner.getDest();
              if(dest.getDownOps() != null && dest.getDownOps().size() == 1) {
View Full Code Here


      LazyCollection<?> current = toVisit.pop();
      visited.add(current);
      if(current.isMaterialized()) {
        continue;
      }
      DeferredOp op = current.getDeferredOp();
      if(op.getClass().equals(getClass)) {
        // Found
        if(!retOps.contains(op)) {
          retOps.add(op);
        }
      }
View Full Code Here

  @SuppressWarnings({ "unchecked", "rawtypes" })
  public <T> Iterable<T> execute(LazyCollection<T> output) {
    if (output.isMaterialized()) {
      return output.getData(); // nothing else to execute
    } else {
      DeferredOp op = output.getDeferredOp();
      final List<T> result = Lists.newArrayList();
      // Flatten op
      if(op instanceof Flatten) {
        Flatten<T> flatten = (Flatten<T>)op;
        for(PCollection<T> col: flatten.getOrigins()) {
View Full Code Here

      if(op instanceof MultipleParallelDo) {
        MultipleParallelDo mPDo = ((MultipleParallelDo)op);
        for(Object entry: mPDo.getDests().entrySet()) {
          Map.Entry<PCollection, DoFn> en = (Map.Entry<PCollection, DoFn>)entry;
          LazyCollection<?> lCol = (LazyCollection<?>)en.getKey();
          DeferredOp childOp = null;
          if(lCol.getDownOps() != null && lCol.getDownOps().size() > 0) {
            childOp = lCol.getDownOps().get(0);
          }
          final Integer channel;
          if(childOp != null && childOp instanceof Flatten) {
View Full Code Here

  <T> void sinkFlattens(PCollection<T> arg) {
    LazyCollection<T> output = (LazyCollection<T>)arg;
    if(output.isMaterialized()) { // stop condition for recursive algorithm
      return;
    }
    DeferredOp dOp = output.getDeferredOp();
    if(!(dOp instanceof Flatten)) {
      if(dOp instanceof OneToOneOp) {
        // Recursively apply this function to parent
        sinkFlattens(((OneToOneOp)dOp).getOrigin());
        return;
      } else if(dOp instanceof ParallelDo) {
        // Recursively apply this function to parent
        sinkFlattens(((ParallelDo)dOp).getOrigin());
        return;       
      }
    }
    if(output.getDownOps() == null || output.getDownOps().size() != 1) {
      // Recursively apply this function to parent
      for(Object col: ((Flatten)dOp).getOrigins()) {
        sinkFlattens((PCollection)col);
      }
      return;     
    }
    DeferredOp downOp = output.getDownOps().get(0);
    if(!(downOp instanceof ParallelDo)) {
      return;   
    }
    ParallelDo<T, ?> op = (ParallelDo<T, ?>)downOp; // PDo below current node
    Flatten<T> flatten = (Flatten<T>)dOp; // Flatten above current node
View Full Code Here

  <T> void fuseSiblingParallelDos(PCollection<T> arg) {
    LazyCollection<T> output = (LazyCollection<T>)arg;
    if(output.isMaterialized()) { // stop condition for recursive algorithm
      return;
    }
    DeferredOp dOp = output.getDeferredOp();
    if(!(dOp instanceof ParallelDo)) { // not a ParallelDo
      if(dOp instanceof OneToOneOp) {
        // Recursively apply this function to parent
        fuseSiblingParallelDos(((OneToOneOp)dOp).getOrigin());
        return;
View Full Code Here

  <T> void fuseParallelDos(PCollection<T> arg) {
    LazyCollection<T> output = (LazyCollection<T>)arg;
    if(output.isMaterialized()) { // stop condition for recursive algorithm
      return;
    }
    DeferredOp dOp = output.getDeferredOp();
    if(!(dOp instanceof ParallelDo)) { // not a ParallelDo
      if(dOp instanceof OneToOneOp) {
        // Recursively apply this function to parent
        fuseParallelDos(((OneToOneOp)dOp).getOrigin());
        return;
View Full Code Here

    // Get an Optimizer
    Optimizer optimizer = new Optimizer();
    optimizer.fuseSiblingParallelDos(output1); // one output is enough to fuse both because they share the parent
    // Check that input child ops has shrinked to 1
    assertEquals(lInput.downOps.size(), 1);
    DeferredOp op = lInput.downOps.get(0);
    // Check that there is only one op pointing to both outputs
    assertEquals(op, lOutput1.deferredOp);
    assertEquals(op, lOutput2.deferredOp);
    assertTrue(op instanceof MultipleParallelDo);
    MultipleParallelDo<Integer> mPDo = (MultipleParallelDo<Integer>)op;
View Full Code Here

TOP

Related Classes of com.tdunning.plume.local.lazy.op.DeferredOp

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.