Package org.infinispan.distexec.mapreduce.spi

Examples of org.infinispan.distexec.mapreduce.spi.MapReduceTaskLifecycleService


      // final reduce
      //TODO parallelize across cluster
      Map<KOut, VOut> result = new HashMap<KOut, VOut>();
     
      //hook into lifecycle
      MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
      try {
         taskLifecycleService.onPreExecute(reducer);
         for (Entry<KOut, List<VOut>> entry : reduceMap.entrySet()) {
            VOut reduced = reducer.reduce(entry.getKey(), (entry.getValue()).iterator());
            result.put(entry.getKey(), reduced);
         }
      } finally {
         taskLifecycleService.onPostExecute(reducer);
      }
      return result;
   }
View Full Code Here


         throw new IllegalStateException("Reduce phase of MapReduceTask " + taskId + " on node " + cdl.getAddress()
               + " executed with empty input keys");
      } else {
         final Reducer<KOut, VOut> reducer = reduceCommand.getReducer();
         final boolean sharedTmpCacheUsed = reduceCommand.isUseIntermediateSharedCache();
         MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
         log.tracef("For m/r task %s invoking %s at %s", taskId, reduceCommand, cdl.getAddress());
         long start = log.isTraceEnabled() ? timeService.time() : 0;
         try {
            Cache<IntermediateKey<KOut>, List<VOut>> cache = cacheManager.getCache(reduceCommand.getCacheName());
            taskLifecycleService.onPreExecute(reducer, cache);
            KeyFilter<IntermediateKey<KOut>> filter = new IntermediateKeyFilter<KOut>(taskId, !sharedTmpCacheUsed);
            //iterate all tmp cache entries in memory, do it in parallel
            DataContainer<IntermediateKey<KOut>, List<VOut>> dc = cache.getAdvancedCache().getDataContainer();
            dc.executeTask(filter, new DataContainerTask<IntermediateKey<KOut>, List<VOut>>() {
               @Override
               public void apply(IntermediateKey<KOut> k, InternalCacheEntry<IntermediateKey<KOut>, List<VOut>> v) {
                  KOut key = k.getKey();
                  //resolve Iterable<VOut> for iterated key stored in tmp cache
                  Iterable<VOut> value = getValue(v);
                  if (value == null) {
                     throw new IllegalStateException("Found invalid value in intermediate cache, for key " + key
                           + " during reduce phase execution on " + cacheManager.getAddress() + " for M/R task "
                           + taskId);
                  }
                  // and reduce it
                  VOut reduced = reducer.reduce(key, value.iterator());
                  result.put(key, reduced);
                  log.tracef("For m/r task %s reduced %s to %s at %s ", taskId, key, reduced, cdl.getAddress());
               }
            });

         } finally {
            if (log.isTraceEnabled()) {
               log.tracef("Reduce for task %s took %s milliseconds", reduceCommand.getTaskId(),
                     timeService.timeDuration(start, TimeUnit.MILLISECONDS));
            }
            taskLifecycleService.onPostExecute(reducer);
         }
      }
   }
View Full Code Here

      int maxCSize = mcc.getMaxCollectorSize();
      final Mapper<KIn, VIn, KOut, VOut> mapper = mcc.getMapper();
      final boolean inputKeysSpecified = keys != null && !keys.isEmpty();

      // hook map function into lifecycle and execute it
      MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
      final CollectableCollector<KOut, VOut> collector = new SynchronizedCollector<KOut, VOut>(
            new DefaultCollector<KIn, VIn, KOut, VOut>(mcc, maxCSize));
      DataContainer<KIn, VIn> dc = cache.getAdvancedCache().getDataContainer();
      log.tracef("For m/r task %s invoking %s with input keys %s",  mcc.getTaskId(), mcc, keys);
      long start = log.isTraceEnabled() ? timeService.time() : 0;
      try {
         taskLifecycleService.onPreExecute(mapper, cache);
         //User specified input taks keys, most likely a short list of input keys (<10^3), iterate serially
         if (inputKeysSpecified) {
            for (KIn key : keys) {
               VIn value = cache.get(key);
               if (value != null) {
                  mapper.map(key, value, collector);
               }
            }
         } else {
            // here we have to iterate all entries in memory, do it in parallel
            dc.executeTask(new PrimaryOwnerFilter<KIn>(cdl), new DataContainerTask<KIn, VIn>() {
               @Override
               public void apply(KIn key , InternalCacheEntry<KIn, VIn> v) {
                  VIn value = getValue(v);
                  if (value != null) {
                     mapper.map(key, value, collector);
                  }
               }
            });
         }
         // in case we have stores, we have to process key/values from there as well
         if (persistenceManager != null && !inputKeysSpecified) {
               KeyFilter<?> keyFilter = new CompositeKeyFilter<KIn>(new PrimaryOwnerFilter<KIn>(cdl), new CollectionKeyFilter<KIn>(dc.keySet()));
               persistenceManager.processOnAllStores(keyFilter, new MapReduceCacheLoaderTask<KIn, VIn, KOut, VOut>(mapper, collector),
                     true, false);
         }
      } finally {
         if (log.isTraceEnabled()) {
            log.tracef("Map phase for task %s took %s milliseconds",
                       mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS));
         }
         taskLifecycleService.onPostExecute(mapper);
      }
      return collector;
   }
View Full Code Here

      Set<KIn> keys = mcc.getKeys();
      int maxCSize = mcc.getMaxCollectorSize();
      final Mapper<KIn, VIn, KOut, VOut> mapper = mcc.getMapper();
      final boolean inputKeysSpecified = keys != null && !keys.isEmpty();
      // hook map function into lifecycle and execute it
      MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
      DataContainer<KIn, VIn>  dc = cache.getAdvancedCache().getDataContainer();
      log.tracef("For m/r task %s invoking %s with input keys %s", mcc.getTaskId(), mcc, mcc.getKeys());
      long start = log.isTraceEnabled() ? timeService.time() : 0;
      final Set<KOut> intermediateKeys = new HashSet<KOut>();
      try {
         taskLifecycleService.onPreExecute(mapper, cache);
         if (inputKeysSpecified) {
            DefaultCollector<KIn, VIn, KOut, VOut> c = new DefaultCollector<KIn, VIn, KOut, VOut>(mcc, maxCSize);
            for (KIn key : keys) {
               VIn value = cache.get(key);
               if (value != null) {
                  mapper.map(key, value, c);
               }
            }
            combine(mcc, c);
            Set<KOut> s = migrateIntermediateKeysAndValues(mcc, c.collectedValues());
            intermediateKeys.addAll(s);
         } else {
            MapCombineTask<KIn, VIn, KOut, VOut> task = new MapCombineTask<KIn, VIn, KOut, VOut>(mcc, maxCSize);
            dc.executeTask(new PrimaryOwnerFilter<KIn>(cdl), task);
            intermediateKeys.addAll(task.getMigratedIntermediateKeys());
            //the last chunk of remaining keys/values to migrate
            Map<KOut, List<VOut>> combinedValues = task.collectedValues();
            Set<KOut> lastOne = migrateIntermediateKeysAndValues(mcc, combinedValues);
            intermediateKeys.addAll(lastOne);
         }

         // in case we have stores, we have to process key/values from there as well
         if (persistenceManager != null && !inputKeysSpecified) {
            KeyFilter<KIn> keyFilter = new CompositeKeyFilter<KIn>(new PrimaryOwnerFilter<KIn>(cdl),
                  new CollectionKeyFilter<KIn>(dc.keySet()));

            MapCombineTask<KIn, VIn, KOut, VOut> task = new MapCombineTask<KIn, VIn, KOut, VOut>(mcc, maxCSize);
            persistenceManager.processOnAllStores(keyFilter, task, true, false);
            intermediateKeys.addAll(task.getMigratedIntermediateKeys());
            //the last chunk of remaining keys/values to migrate
            Map<KOut, List<VOut>> combinedValues =  task.collectedValues();
            Set<KOut> lastOne = migrateIntermediateKeysAndValues(mcc, combinedValues);
            intermediateKeys.addAll(lastOne);
         }
      } finally {
         if (log.isTraceEnabled()) {
            log.tracef("Map phase for task %s took %s milliseconds", mcc.getTaskId(),
                  timeService.timeDuration(start, TimeUnit.MILLISECONDS));
         }
         taskLifecycleService.onPostExecute(mapper);
      }
      return intermediateKeys;
   }
View Full Code Here

         CollectableCollector<KOut, VOut> c) {
      if (mcc.hasCombiner()) {
         Reducer<KOut, VOut> combiner = mcc.getCombiner();
         Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName());
         log.tracef("For m/r task %s invoking combiner %s at %s", mcc.getTaskId(), mcc, cdl.getAddress());
         MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
         long start = log.isTraceEnabled() ? timeService.time() : 0;
         try {
            taskLifecycleService.onPreExecute(combiner, cache);
            for (Entry<KOut, List<VOut>> e : c.collectedValues().entrySet()) {
               List<VOut> mapped = e.getValue();
               if (mapped.size() > 1) {
                   VOut reduced = combiner.reduce(e.getKey(), mapped.iterator());
                   c.emitReduced(e.getKey(), reduced);
               }
            }
         } finally {
            if (log.isTraceEnabled()) {
               log.tracef("Combine for task %s took %s milliseconds", mcc.getTaskId(),
                     timeService.timeDuration(start, TimeUnit.MILLISECONDS));
            }
            taskLifecycleService.onPostExecute(combiner);
         }
      }
   }
View Full Code Here

            keys = new HashSet<Object>();

         keys.addAll(selectedKeys);
      }
      //hook into lifecycle
      MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
      log.tracef("For %s at %s invoking mapper on keys %s", this, localAddress, keys);
      DefaultCollector<Object, Object> collector = new DefaultCollector<Object, Object>();
      try {
         taskLifecycleService.onPreExecute(mapper);        
         for (Object key : keys) {
            GetKeyValueCommand command = commandsFactory.buildGetKeyValueCommand(key,
                     ctx.getFlags());
            command.setReturnCacheEntry(false);
            Object value = invoker.invoke(ctx, command);
            mapper.map(key, value, collector);
         }
      } finally {
         taskLifecycleService.onPostExecute(mapper);
      }
     
      Map<Object, Object> reducedMap = new HashMap<Object, Object>();
      try {
         taskLifecycleService.onPreExecute(reducer);
         Map<Object, List<Object>> collectedValues = collector.collectedValues();        
         for (Entry<Object, List<Object>> e : collectedValues.entrySet()) {
            List<Object> list = e.getValue();
            if (list.size() > 1) {
               Object reduced = reducer.reduce(e.getKey(), list.iterator());
               reducedMap.put(e.getKey(), reduced);
            } else {
               reducedMap.put(e.getKey(), list.get(0));
            }
         }
      } finally {
         taskLifecycleService.onPostExecute(reducer);
      }
      log.tracef("%s executed at %s was reduced to %s", this, localAddress, reducedMap);
      return reducedMap;
   }
View Full Code Here

         //illegal state, raise exception
         throw new IllegalStateException("Reduce phase of MapReduceTask " + taskId + " on node "
                  + cdl.getAddress() + " executed with empty input keys");
      } else{
         //first hook into lifecycle
         MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
         log.tracef("For m/r task %s invoking %s at %s",  taskId, reduceCommand, cdl.getAddress());
         int interruptCount = 0;
         long start = log.isTraceEnabled() ? timeService.time() : 0;
         try {
            taskLifecycleService.onPreExecute(reducer, cache);
            for (KOut key : keys) {
               interruptCount++;
               if (checkInterrupt(interruptCount++) && Thread.currentThread().isInterrupted())
                  throw new InterruptedException();
               //load result value from map phase
               List<VOut> value;
               if(useIntermediateKeys) {
                  value = tmpCache.get(new IntermediateCompositeKey<KOut>(taskId, key));
               } else {
                  value = tmpCache.get(key);
               }
               // and reduce it
               VOut reduced = reducer.reduce(key, value.iterator());
               result.put(key, reduced);
               log.tracef("For m/r task %s reduced %s to %s at %s ", taskId, key, reduced, cdl.getAddress());
            }
         } finally {
            if (log.isTraceEnabled()) {
               log.tracef("Reduce for task %s took %s milliseconds", reduceCommand.getTaskId(),
                          timeService.timeDuration(start, TimeUnit.MILLISECONDS));
            }
            taskLifecycleService.onPostExecute(reducer);
         }
      }
      return result;
   }
View Full Code Here

         inputKeys = filterLocalPrimaryOwner(cache.getAdvancedCache().withFlags(Flag.SKIP_CACHE_LOAD).keySet(), dm);
      } else {
         inputKeysCopy = new HashSet<KIn>(keys);
      }
      // hook map function into lifecycle and execute it
      MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
      DefaultCollector<KOut, VOut> collector = new DefaultCollector<KOut, VOut>();
      log.tracef("For m/r task %s invoking %s with input keys %s",  mcc.getTaskId(), mcc, inputKeys);
      int interruptCount = 0;
      long start = log.isTraceEnabled() ? timeService.time() : 0;
      try {
         taskLifecycleService.onPreExecute(mapper, cache);
         for (KIn key : inputKeys) {
            if (checkInterrupt(interruptCount++) && Thread.currentThread().isInterrupted())
               throw new InterruptedException();

            VIn value = cache.get(key);
            mapper.map(key, value, collector);
            if (inputKeysSpecified) {
               inputKeysCopy.remove(key);
            }
         }

         if (persistenceManager != null) {
            AdvancedCacheLoader.KeyFilter keyFilter;
            if (inputKeysSpecified) {
               keyFilter = new CollectionKeyFilter(filterLocalPrimaryOwner(inputKeysCopy, dm), true);
            } else {
               keyFilter = new CompositeFilter(new PrimaryOwnerFilter(cdl), new CollectionKeyFilter(inputKeys));
            }
            persistenceManager.processOnAllStores(keyFilter, new MapReduceCacheLoaderTask(mapper, collector),
                                                  new WithinThreadExecutor(), true, false);
         }
      } finally {
         if (log.isTraceEnabled()) {
            log.tracef("Map phase for task %s took %s milliseconds",
                       mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS));
         }
         taskLifecycleService.onPostExecute(mapper);
      }
      return collector;
   }
View Full Code Here

      DistributionManager dm = tmpCache.getAdvancedCache().getDistributionManager();

      if (combiner != null) {
         Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName());
         log.tracef("For m/r task %s invoking combiner %s at %s",  taskId, mcc, cdl.getAddress());
         MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
         Map<KOut, VOut> combinedMap = new ConcurrentHashMap<KOut, VOut>();
         long start = log.isTraceEnabled() ? timeService.time() : 0;
         try {
            taskLifecycleService.onPreExecute(combiner, cache);
            Map<KOut, List<VOut>> collectedValues = collector.collectedValues();
            for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) {
               List<VOut> list = e.getValue();
               VOut combined;
               if (list.size() > 1) {
                  combined = combiner.reduce(e.getKey(), list.iterator());
                  combinedMap.put(e.getKey(), combined);
               } else {
                  combined = list.get(0);
                  combinedMap.put(e.getKey(), combined);
               }
               log.tracef("For m/r task %s combined %s to %s at %s" , taskId, e.getKey(), combined, cdl.getAddress());
            }
         } finally {
            if (log.isTraceEnabled()) {
               log.tracef("Combine for task %s took %s milliseconds", mcc.getTaskId(),
                          timeService.timeDuration(start, TimeUnit.MILLISECONDS));
            }
            taskLifecycleService.onPostExecute(combiner);
         }
         Map<Address, List<KOut>> keysToNodes = mapKeysToNodes(dm, taskId, combinedMap.keySet(),
                  emitCompositeIntermediateKeys);

         start = log.isTraceEnabled() ? timeService.time() : 0;
View Full Code Here

      Map<KOut, List<VOut>> result;

      if (combiner != null) {
         result = new HashMap<KOut, List<VOut>>();
         log.tracef("For m/r task %s invoking combiner %s at %s",  taskId, mcc, cdl.getAddress());
         MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance();
         long start = log.isTraceEnabled() ? timeService.time() : 0;
         try {
            Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName());
            taskLifecycleService.onPreExecute(combiner, cache);
            Map<KOut, List<VOut>> collectedValues = collector.collectedValues();
            for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) {
               VOut combined;
               List<VOut> list = e.getValue();
               List<VOut> l = new LinkedList<VOut>();
               if (list.size() > 1) {
                  combined = combiner.reduce(e.getKey(), list.iterator());
               } else {
                  combined = list.get(0);
               }
               l.add(combined);
               result.put(e.getKey(), l);
               log.tracef("For m/r task %s combined %s to %s at %s" , taskId, e.getKey(), combined, cdl.getAddress());
            }
         } finally {
            if (log.isTraceEnabled()) {
               log.tracef("Combine for task %s took %s milliseconds", mcc.getTaskId(),
                          timeService.timeDuration(start, TimeUnit.MILLISECONDS));
            }
            taskLifecycleService.onPostExecute(combiner);
         }
      } else {
         // Combiner not specified
         result = collector.collectedValues();
      }
View Full Code Here

TOP

Related Classes of org.infinispan.distexec.mapreduce.spi.MapReduceTaskLifecycleService

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.