Package org.apache.mahout.h2obindings.ops

Source Code of org.apache.mahout.h2obindings.ops.MRTaskBMF

/*
*  Licensed to the Apache Software Foundation (ASF) under one or more
*  contributor license agreements.  See the NOTICE file distributed with
*  this work for additional information regarding copyright ownership.
*  The ASF licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.apache.mahout.h2obindings.ops;

import org.apache.mahout.math.Matrix;
import org.apache.mahout.h2obindings.H2OBlockMatrix;
import org.apache.mahout.h2obindings.drm.H2ODrm;

import water.MRTask;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.Chunk;
import water.fvec.NewChunk;

import java.io.Serializable;
import java.util.Arrays;

import scala.reflect.ClassTag;

/**
* MapBlock operator.
*/
public class MapBlock {
  /**
   * Execute a BlockMapFunction on DRM partitions to create a new DRM.
   *
   * @param drmA DRM representing matrix A.
   * @param ncol Number of columns output by BMF.
   * @param bmf BlockMapFunction which maps input DRM partition to output.
   * @param isRstr flag indicating if key type of output DRM is a String.
   * @param k ClassTag of intput DRM key type.
   * @param r ClassTag of output DRM key type.
   * @return new DRM constructed from mapped blocks of drmA through bmf.
   */
  public static <K,R> H2ODrm exec(H2ODrm drmA, int ncol, Object bmf, final boolean isRstr,
                                  final ClassTag<K> k, final ClassTag<R> r) {
    Frame A = drmA.frame;
    Vec keys = drmA.keys;

    /**
     * MRTask to execute bmf on partitions. Partitions are
     * made accessible to bmf in the form of H2OBlockMatrix.
     */
    class MRTaskBMF extends MRTask<MRTaskBMF> {
      Serializable bmf;
      Vec labels;
      MRTaskBMF(Object _bmf, Vec _labels) {
        // BlockMapFun does not implement Serializable,
        // but Scala closures are _always_ Serializable.
        //
        // So receive the object as a plain Object (else
        // compilation fails) and typcast it with conviction,
        // that Scala always tags the actually generated
        // closure functions with Serializable.
        bmf = (Serializable)_bmf;
        labels = _labels;
      }

      /** Create H2OBlockMatrix from the partition */
      private Matrix blockify(Chunk chks[]) {
        return new H2OBlockMatrix(chks);
      }

      /** Ingest the output of bmf into the output partition */
      private void deblockify(Matrix out, NewChunk ncs[]) {
        // assert (out.colSize() == ncs.length)
        for (int c = 0; c < out.columnSize(); c++) {
          for (int r = 0; r < out.rowSize(); r++) {
            ncs[c].addNum(out.getQuick(r, c));
          }
        }
      }

      // Input:
      // chks.length == A.numCols()
      //
      // Output:
      // ncs.length == (A.numCols() + 1) if String keyed
      //             (A.numCols() + 0) if Int or Long keyed
      //
      // First A.numCols() ncs[] elements are fed back the output
      // of bmf() output's _2 in deblockify()
      //
      // If String keyed, then MapBlockHelper.exec() would have
      // filled in the Strings into ncs[ncol] already
      //
      public void map(Chunk chks[], NewChunk ncs[]) {
        long start = chks[0].start();
        NewChunk nclabel = isRstr ? ncs[ncs.length - 1] : null;
        deblockify(MapBlockHelper.exec(bmf, blockify(chks), start, labels, nclabel, k, r), ncs);
        // assert chks[i]._len == ncs[j]._len
      }
    }

    int ncolRes = ncol + (isRstr ? 1 : 0);
    Frame fmap = new MRTaskBMF(bmf, keys).doAll(ncolRes, A).outputFrame(null, null);
    Vec vmap = null;
    if (isRstr) {
      // If output was String keyed, then the last Vec in fmap is the String vec.
      // If so, peel it out into a separate Vec (vmap) and set fmap to be the
      // Frame with just the first ncol Vecs
      vmap = fmap.vecs()[ncol];
      fmap = new Frame(Arrays.copyOfRange(fmap.vecs(), 0, ncol));
    }
    return new H2ODrm(fmap, vmap);
  }
}
TOP

Related Classes of org.apache.mahout.h2obindings.ops.MRTaskBMF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.