Source Code of org.encog.engine.opencl.kernels.KernelNetworkTrain

/*
 * Encog(tm) Core v2.5 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2010 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */


package org.encog.engine.opencl.kernels;


import java.util.Map;


import org.encog.engine.data.BasicEngineData;
import org.encog.engine.data.EngineData;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.network.activation.ActivationFunction;
import org.encog.engine.network.flat.FlatNetwork;
import org.encog.engine.network.train.prop.OpenCLTrainingProfile;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.EncogCLQueue;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.exceptions.OutOfOpenCLResources;
import org.encog.engine.util.EngineArray;
import org.encog.engine.util.ResourceLoader;
import org.jocl.CLException;
import org.jocl.cl_mem;


/**
 * An OpenCL kernel that is designed to calculate gradients and help train a
 * neural network.
 */
public class KernelNetworkTrain extends EncogKernel {


  /**
   * The input count.
   */
  public static final int PARRAY_INPUT_COUNT = 0;


  /**
   * The output count.
   */
  public static final int PARRAY_OUTPUT_COUNT = 1;


  /**
   * The layer count.
   */
  public static final int PARRAY_LAYER_COUNT = 2;


  /**
   * Are we learning? 0=no, 1 =yes.
   */
  public static final int PARRAY_LEARN = 3;


  /**
   * What is the starting index to train at.
   */
  public static final int PARRAY_START = 4;


  /**
   * Items to train per call.
   */
  public static final int PARRAY_ITEMS_PER = 5;


  /**
   * Items to train per call.
   */
  public static final int PARRAY_ITERATIONS = 6;


  /**
   * A buffer to communicate weights to the kernel.
   */
  private cl_mem weightInArrayBuffer;


  /**
   * A buffer to communicate weights from the kernel.
   */
  private cl_mem weightOutArrayBuffer;


  /**
   * A buffer to hold the layer index.
   */
  private cl_mem layerIndexBuffer;


  /**
   * A buffer to hold the layer counts.
   */
  private cl_mem layerCountBuffer;


  /**
   * A buffer to hold the layer feed counts.
   */
  private cl_mem layerFeedCountBuffer;


  /**
   * A buffer to hold the weight indexes.
   */
  private cl_mem weightIndexBuffer;


  /**
   * A buffer to hold the activations for each of the layers.
   */
  private cl_mem activationTypeBuffer;


  /**
   * The temp data in buffer. Temp data that is used while training.
   */
  private cl_mem tempDataInBuffer;


  /**
   * The temp data out buffer. Temp data that is used while training.
   */
  private cl_mem tempDataOutBuffer;


  /**
   * The weight and bias array for the network.
   */
  private final float[] weightInArray;


  /**
   * The weight output array.
   */
  private final float[] weightOutArray;


  /**
   * The temp data array. Temp data that is used while training.
   */
  private float[] tempDataArray;


  /**
   * The size of all layer deltas.
   */
  private int layerDeltaSize;


  /**
   * An array to hold the input to the neural network.
   */
  private final float[] inputArray;


  /**
   * An array to hold the ideal values expected from the network.
   */
  private final float[] idealArray;


  /**
   * The input buffer.
   */
  private cl_mem inputBuffer;


  /**
   * The ideal buffer.
   */
  private cl_mem idealBuffer;


  /**
   * Holds parameters passed to the kernel.
   */
  private final int[] paramArray;


  /**
   * A buffer to hold the parameters.
   */
  private cl_mem paramBuffer;


  /**
   * A buffer to hold the errors.
   */
  private cl_mem errorBuffer;


  /**
   * A buffer to hold the gradients.
   */
  private cl_mem gradientOutBuffer;


  /**
   * The gradient input buffer.
   */
  private cl_mem gradientInBuffer;


  /**
   * The network to train.
   */
  private final FlatNetwork flat;


  /**
   * The training errors for this workload.
   */
  private float[] errors;


  /**
   * The gradients.
   */
  private final float[] gradients;


  /**
   * The training data to use.
   */
  private final EngineIndexableSet training;


  /**
   * The device to train with.
   */
  private final EncogCLDevice device;


  /**
   * The length of the training data.
   */
  private final int trainingLength;


  /**
   * Construct a kernel to train the network.
   * 
   * @param device
   *            The OpenCL device to use.
   * @param flat
   *            The network to train.
   * @param training
   *            The training data.
   * @param tempDataSize
   *            How much temp data.
   */
  public KernelNetworkTrain(final EncogCLDevice device,
      final FlatNetwork flat, final EngineIndexableSet training,
      final int tempDataSize) {
    super(device, "org/encog/engine/resources/KernelNetTrain.txt",
        "NetworkTrain");


    this.training = training;
    this.trainingLength = (int) this.training.getRecordCount();
    this.device = device;
    this.flat = flat;
    this.weightInArray = new float[flat.getWeights().length];
    this.weightOutArray = new float[flat.getWeights().length];
    this.tempDataArray = new float[tempDataSize];
    this.gradients = new float[flat.getWeights().length];


    this.layerDeltaSize = 0;
    for (int i = 0; i < flat.getLayerCounts().length; i++) {
      this.layerDeltaSize += flat.getLayerCounts()[i];
    }


    final int inputSize = flat.getInputCount();
    final int idealSize = flat.getOutputCount();


    this.inputArray = new float[inputSize * this.trainingLength];
    this.idealArray = new float[idealSize * this.trainingLength];
    this.paramArray = new int[10];


    final EngineData pair = BasicEngineData.createPair(
        flat.getInputCount(), flat.getOutputCount());


    int inputIndex = 0;
    int idealIndex = 0;


    for (int i = 0; i < this.trainingLength; i++) {
      training.getRecord(i, pair);
      for (int col = 0; col < flat.getInputCount(); col++) {
        this.inputArray[inputIndex++] = (float) pair.getInputArray()[col];
      }


      for (int col = 0; col < flat.getOutputCount(); col++) {
        this.idealArray[idealIndex++] = (float) pair.getIdealArray()[col];
      }
    }


  }


  /**
   * Assign the workgroup sizes based on the training set size.
   * 
   * @param trainingSize
   *            The training set size.
   * @param requestedGlobalSize
   *            The requested global size.
   */
  public void assignWorkgroupSizes(final int trainingSize,
      final int requestedGlobalSize) {
    // Calculate the work-item dimensions
    final int threads = Math.min(trainingSize, requestedGlobalSize);
    setLocalWork(Math.min(getMaxWorkGroupSize(), threads));
    setGlobalWork(threads);
  }


  /**
   * Calculate one iteration over the specified range.
   * 
   * @param start
   *            The starting position to calculate for.
   * @param size
   *            The ending position to calculate for.
   * @param iterations
   *            The number of iterations to execute.
   * @param learn
   *            True, if we should learn.
   */
  public void calculate(final int start, final int size, final boolean learn,
      final int iterations) {
    prepareKernel();


    this.paramArray[KernelNetworkTrain.PARRAY_LEARN] = learn ? 1 : 0;
    this.paramArray[KernelNetworkTrain.PARRAY_START] = start;
    this.paramArray[KernelNetworkTrain.PARRAY_ITEMS_PER] = size;
    this.paramArray[KernelNetworkTrain.PARRAY_ITERATIONS] = iterations;


    EngineArray.arrayCopy(this.flat.getWeights(), this.weightInArray);


    setArg(0, this.paramBuffer);
    setArg(1, this.errorBuffer);
    setArg(2, this.layerIndexBuffer);
    setArg(3, this.layerCountBuffer);
    setArg(4, this.layerFeedCountBuffer);
    setArg(5, this.weightIndexBuffer);
    setArg(6, this.inputBuffer);
    setArg(7, this.idealBuffer);
    setArg(8, this.weightInArrayBuffer);
    setArg(9, this.weightOutArrayBuffer);
    setArg(10, this.gradientOutBuffer);
    setArg(11, this.activationTypeBuffer);
    setArg(12, this.tempDataInBuffer);
    setArg(13, this.tempDataOutBuffer);
    setArg(14, this.gradientInBuffer);


    try {
      final EncogCLQueue queue = this.device.getQueue();


      EngineArray.fill(this.gradients, 0);


      if (learn) {
        this.paramArray[3] = 1;
      } else {
        this.paramArray[3] = 0;
      }


      this.paramArray[4] = start;


      queue.array2Buffer(this.weightInArray, this.weightInArrayBuffer);
      queue.array2Buffer(this.tempDataArray, this.tempDataInBuffer);
      queue.array2Buffer(this.gradients, this.gradientInBuffer);
      queue.array2Buffer(this.paramArray, this.paramBuffer);


      // Execute the kernel
      queue.execute(this);
      queue.waitFinish();


      // Read the results
      queue.buffer2Array(this.errorBuffer, this.errors);
      queue.buffer2Array(this.weightOutArrayBuffer, this.weightOutArray);
      queue.buffer2Array(this.tempDataOutBuffer, this.tempDataArray);
      queue.buffer2Array(this.gradientOutBuffer, this.gradients);


    } catch (final CLException e) {
      if (e.getMessage().equals("CL_OUT_OF_RESOURCES")) {
        throw new OutOfOpenCLResources(e);
      } else {
        throw new OpenCLError(e);
      }
    } catch (final Exception e) {
      throw new OpenCLError(e);
    }
  }


  /**
   * Compile the kernel.
   * 
   * @param options
   *            The options.
   * @param profile
   *            The OpenCL training profile.
   * @param network
   *            The network to compile for.
   */
  public void compile(final Map<String, String> options,
      final OpenCLTrainingProfile profile, final FlatNetwork network) {


    final ActivationFunction activation = network.getActivationFunctions()[0];
    final StringBuilder source = new StringBuilder();


    source.append("#define ACTIVATION(x,slope)");
    source.append(activation.getOpenCLExpression(false));
    source.append("\r\n");


    source.append("#define DERIVATIVE(x,slope)");
    source.append(activation.getOpenCLExpression(true));
    source.append("\r\n");


    source.append(ResourceLoader.loadString(getSourceName()));
    setCLSource(source.toString());


    compile(options);
    profile.calculateKernelParams(this, this.training);
    // setup
    init(profile);
  }


  /**
   * @return the errors
   */
  public float[] getErrors() {
    return this.errors;
  }


  /**
   * @return the tempDataArray
   */
  public float[] getTempDataArray() {
    return this.tempDataArray;
  }


  /**
   * @return the weightOutArray
   */
  public float[] getWeightOutArray() {
    return this.weightOutArray;
  }


  /**
   * Setup the kernel.
   * @param profile The OpenCL training profile.
   */
  public void init(final OpenCLTrainingProfile profile) {
    final int errorSize = profile.getKernelGlobalWorkgroup();
    final int gradientSize = profile.getKernelGlobalWorkgroup()
        * this.flat.getWeights().length;


    this.errors = new float[errorSize];


    this.paramArray[0] = this.flat.getInputCount();
    this.paramArray[1] = this.flat.getOutputCount();
    this.paramArray[2] = this.flat.getLayerCounts().length;


    // create the buffers
    this.inputBuffer = createArrayReadOnly(this.inputArray);
    this.idealBuffer = createArrayReadOnly(this.idealArray);
    this.errorBuffer = createFloatArrayWriteOnly(errorSize);
    this.gradientOutBuffer = createFloatArrayWriteOnly(gradientSize);
    this.gradientInBuffer = createArrayReadOnly(this.gradients);
    this.paramBuffer = createArrayReadOnly(this.paramArray);
    this.layerIndexBuffer = createArrayReadOnly(this.flat.getLayerIndex());
    this.layerCountBuffer = createArrayReadOnly(this.flat.getLayerCounts());
    this.layerFeedCountBuffer = createArrayReadOnly(this.flat
        .getLayerFeedCounts());
    this.weightInArrayBuffer = createArrayReadOnly(this.weightInArray);
    this.weightOutArrayBuffer = createFloatArrayWriteOnly(this.weightInArray.length);
    this.weightIndexBuffer = createArrayReadOnly(this.flat.getWeightIndex());
    this.activationTypeBuffer = createArrayReadOnly(this.flat
        .getLayerCounts());
    this.tempDataInBuffer = createArrayReadOnly(this.tempDataArray);
    this.tempDataOutBuffer = createFloatArrayWriteOnly(this.tempDataArray.length);
  }


  /**
   * Release the kernel and all buffers.
   */
  @Override
  public void release() {
    super.release();
    releaseBuffer(this.activationTypeBuffer);
    releaseBuffer(this.errorBuffer);
    releaseBuffer(this.gradientOutBuffer);
    releaseBuffer(this.gradientInBuffer);
    releaseBuffer(this.idealBuffer);
    releaseBuffer(this.inputBuffer);
    releaseBuffer(this.layerCountBuffer);
    releaseBuffer(this.layerFeedCountBuffer);
    releaseBuffer(this.layerIndexBuffer);
    releaseBuffer(this.paramBuffer);
    releaseBuffer(this.tempDataInBuffer);
    releaseBuffer(this.tempDataOutBuffer);
    releaseBuffer(this.weightInArrayBuffer);
    releaseBuffer(this.weightIndexBuffer);
    releaseBuffer(this.weightOutArrayBuffer);
  }


  /**
   * @param tempDataArray
   *            the tempDataArray to set
   */
  public void setTempDataArray(final float[] tempDataArray) {
    this.tempDataArray = tempDataArray;
  }
}
Source Code of org.encog.engine.opencl.kernels.KernelNetworkTrain

Related Classes of org.encog.engine.opencl.kernels.KernelNetworkTrain