/*
* Encog(tm) Core v2.5 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2010 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.engine.opencl.kernels;
import java.util.Map;
import org.encog.engine.data.BasicEngineData;
import org.encog.engine.data.EngineData;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.network.activation.ActivationFunction;
import org.encog.engine.network.flat.FlatNetwork;
import org.encog.engine.network.train.prop.OpenCLTrainingProfile;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.EncogCLQueue;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.exceptions.OutOfOpenCLResources;
import org.encog.engine.util.EngineArray;
import org.encog.engine.util.ResourceLoader;
import org.jocl.CLException;
import org.jocl.cl_mem;
/**
* An OpenCL kernel that is designed to calculate gradients and help train a
* neural network.
*/
public class KernelNetworkTrain extends EncogKernel {
/**
* The input count.
*/
public static final int PARRAY_INPUT_COUNT = 0;
/**
* The output count.
*/
public static final int PARRAY_OUTPUT_COUNT = 1;
/**
* The layer count.
*/
public static final int PARRAY_LAYER_COUNT = 2;
/**
* Are we learning? 0=no, 1 =yes.
*/
public static final int PARRAY_LEARN = 3;
/**
* What is the starting index to train at.
*/
public static final int PARRAY_START = 4;
/**
* Items to train per call.
*/
public static final int PARRAY_ITEMS_PER = 5;
/**
* Items to train per call.
*/
public static final int PARRAY_ITERATIONS = 6;
/**
* A buffer to communicate weights to the kernel.
*/
private cl_mem weightInArrayBuffer;
/**
* A buffer to communicate weights from the kernel.
*/
private cl_mem weightOutArrayBuffer;
/**
* A buffer to hold the layer index.
*/
private cl_mem layerIndexBuffer;
/**
* A buffer to hold the layer counts.
*/
private cl_mem layerCountBuffer;
/**
* A buffer to hold the layer feed counts.
*/
private cl_mem layerFeedCountBuffer;
/**
* A buffer to hold the weight indexes.
*/
private cl_mem weightIndexBuffer;
/**
* A buffer to hold the activations for each of the layers.
*/
private cl_mem activationTypeBuffer;
/**
* The temp data in buffer. Temp data that is used while training.
*/
private cl_mem tempDataInBuffer;
/**
* The temp data out buffer. Temp data that is used while training.
*/
private cl_mem tempDataOutBuffer;
/**
* The weight and bias array for the network.
*/
private final float[] weightInArray;
/**
* The weight output array.
*/
private final float[] weightOutArray;
/**
* The temp data array. Temp data that is used while training.
*/
private float[] tempDataArray;
/**
* The size of all layer deltas.
*/
private int layerDeltaSize;
/**
* An array to hold the input to the neural network.
*/
private final float[] inputArray;
/**
* An array to hold the ideal values expected from the network.
*/
private final float[] idealArray;
/**
* The input buffer.
*/
private cl_mem inputBuffer;
/**
* The ideal buffer.
*/
private cl_mem idealBuffer;
/**
* Holds parameters passed to the kernel.
*/
private final int[] paramArray;
/**
* A buffer to hold the parameters.
*/
private cl_mem paramBuffer;
/**
* A buffer to hold the errors.
*/
private cl_mem errorBuffer;
/**
* A buffer to hold the gradients.
*/
private cl_mem gradientOutBuffer;
/**
* The gradient input buffer.
*/
private cl_mem gradientInBuffer;
/**
* The network to train.
*/
private final FlatNetwork flat;
/**
* The training errors for this workload.
*/
private float[] errors;
/**
* The gradients.
*/
private final float[] gradients;
/**
* The training data to use.
*/
private final EngineIndexableSet training;
/**
* The device to train with.
*/
private final EncogCLDevice device;
/**
* The length of the training data.
*/
private final int trainingLength;
/**
* Construct a kernel to train the network.
*
* @param device
* The OpenCL device to use.
* @param flat
* The network to train.
* @param training
* The training data.
* @param tempDataSize
* How much temp data.
*/
public KernelNetworkTrain(final EncogCLDevice device,
final FlatNetwork flat, final EngineIndexableSet training,
final int tempDataSize) {
super(device, "org/encog/engine/resources/KernelNetTrain.txt",
"NetworkTrain");
this.training = training;
this.trainingLength = (int) this.training.getRecordCount();
this.device = device;
this.flat = flat;
this.weightInArray = new float[flat.getWeights().length];
this.weightOutArray = new float[flat.getWeights().length];
this.tempDataArray = new float[tempDataSize];
this.gradients = new float[flat.getWeights().length];
this.layerDeltaSize = 0;
for (int i = 0; i < flat.getLayerCounts().length; i++) {
this.layerDeltaSize += flat.getLayerCounts()[i];
}
final int inputSize = flat.getInputCount();
final int idealSize = flat.getOutputCount();
this.inputArray = new float[inputSize * this.trainingLength];
this.idealArray = new float[idealSize * this.trainingLength];
this.paramArray = new int[10];
final EngineData pair = BasicEngineData.createPair(
flat.getInputCount(), flat.getOutputCount());
int inputIndex = 0;
int idealIndex = 0;
for (int i = 0; i < this.trainingLength; i++) {
training.getRecord(i, pair);
for (int col = 0; col < flat.getInputCount(); col++) {
this.inputArray[inputIndex++] = (float) pair.getInputArray()[col];
}
for (int col = 0; col < flat.getOutputCount(); col++) {
this.idealArray[idealIndex++] = (float) pair.getIdealArray()[col];
}
}
}
/**
* Assign the workgroup sizes based on the training set size.
*
* @param trainingSize
* The training set size.
* @param requestedGlobalSize
* The requested global size.
*/
public void assignWorkgroupSizes(final int trainingSize,
final int requestedGlobalSize) {
// Calculate the work-item dimensions
final int threads = Math.min(trainingSize, requestedGlobalSize);
setLocalWork(Math.min(getMaxWorkGroupSize(), threads));
setGlobalWork(threads);
}
/**
* Calculate one iteration over the specified range.
*
* @param start
* The starting position to calculate for.
* @param size
* The ending position to calculate for.
* @param iterations
* The number of iterations to execute.
* @param learn
* True, if we should learn.
*/
public void calculate(final int start, final int size, final boolean learn,
final int iterations) {
prepareKernel();
this.paramArray[KernelNetworkTrain.PARRAY_LEARN] = learn ? 1 : 0;
this.paramArray[KernelNetworkTrain.PARRAY_START] = start;
this.paramArray[KernelNetworkTrain.PARRAY_ITEMS_PER] = size;
this.paramArray[KernelNetworkTrain.PARRAY_ITERATIONS] = iterations;
EngineArray.arrayCopy(this.flat.getWeights(), this.weightInArray);
setArg(0, this.paramBuffer);
setArg(1, this.errorBuffer);
setArg(2, this.layerIndexBuffer);
setArg(3, this.layerCountBuffer);
setArg(4, this.layerFeedCountBuffer);
setArg(5, this.weightIndexBuffer);
setArg(6, this.inputBuffer);
setArg(7, this.idealBuffer);
setArg(8, this.weightInArrayBuffer);
setArg(9, this.weightOutArrayBuffer);
setArg(10, this.gradientOutBuffer);
setArg(11, this.activationTypeBuffer);
setArg(12, this.tempDataInBuffer);
setArg(13, this.tempDataOutBuffer);
setArg(14, this.gradientInBuffer);
try {
final EncogCLQueue queue = this.device.getQueue();
EngineArray.fill(this.gradients, 0);
if (learn) {
this.paramArray[3] = 1;
} else {
this.paramArray[3] = 0;
}
this.paramArray[4] = start;
queue.array2Buffer(this.weightInArray, this.weightInArrayBuffer);
queue.array2Buffer(this.tempDataArray, this.tempDataInBuffer);
queue.array2Buffer(this.gradients, this.gradientInBuffer);
queue.array2Buffer(this.paramArray, this.paramBuffer);
// Execute the kernel
queue.execute(this);
queue.waitFinish();
// Read the results
queue.buffer2Array(this.errorBuffer, this.errors);
queue.buffer2Array(this.weightOutArrayBuffer, this.weightOutArray);
queue.buffer2Array(this.tempDataOutBuffer, this.tempDataArray);
queue.buffer2Array(this.gradientOutBuffer, this.gradients);
} catch (final CLException e) {
if (e.getMessage().equals("CL_OUT_OF_RESOURCES")) {
throw new OutOfOpenCLResources(e);
} else {
throw new OpenCLError(e);
}
} catch (final Exception e) {
throw new OpenCLError(e);
}
}
/**
* Compile the kernel.
*
* @param options
* The options.
* @param profile
* The OpenCL training profile.
* @param network
* The network to compile for.
*/
public void compile(final Map<String, String> options,
final OpenCLTrainingProfile profile, final FlatNetwork network) {
final ActivationFunction activation = network.getActivationFunctions()[0];
final StringBuilder source = new StringBuilder();
source.append("#define ACTIVATION(x,slope)");
source.append(activation.getOpenCLExpression(false));
source.append("\r\n");
source.append("#define DERIVATIVE(x,slope)");
source.append(activation.getOpenCLExpression(true));
source.append("\r\n");
source.append(ResourceLoader.loadString(getSourceName()));
setCLSource(source.toString());
compile(options);
profile.calculateKernelParams(this, this.training);
// setup
init(profile);
}
/**
* @return the errors
*/
public float[] getErrors() {
return this.errors;
}
/**
* @return the tempDataArray
*/
public float[] getTempDataArray() {
return this.tempDataArray;
}
/**
* @return the weightOutArray
*/
public float[] getWeightOutArray() {
return this.weightOutArray;
}
/**
* Setup the kernel.
* @param profile The OpenCL training profile.
*/
public void init(final OpenCLTrainingProfile profile) {
final int errorSize = profile.getKernelGlobalWorkgroup();
final int gradientSize = profile.getKernelGlobalWorkgroup()
* this.flat.getWeights().length;
this.errors = new float[errorSize];
this.paramArray[0] = this.flat.getInputCount();
this.paramArray[1] = this.flat.getOutputCount();
this.paramArray[2] = this.flat.getLayerCounts().length;
// create the buffers
this.inputBuffer = createArrayReadOnly(this.inputArray);
this.idealBuffer = createArrayReadOnly(this.idealArray);
this.errorBuffer = createFloatArrayWriteOnly(errorSize);
this.gradientOutBuffer = createFloatArrayWriteOnly(gradientSize);
this.gradientInBuffer = createArrayReadOnly(this.gradients);
this.paramBuffer = createArrayReadOnly(this.paramArray);
this.layerIndexBuffer = createArrayReadOnly(this.flat.getLayerIndex());
this.layerCountBuffer = createArrayReadOnly(this.flat.getLayerCounts());
this.layerFeedCountBuffer = createArrayReadOnly(this.flat
.getLayerFeedCounts());
this.weightInArrayBuffer = createArrayReadOnly(this.weightInArray);
this.weightOutArrayBuffer = createFloatArrayWriteOnly(this.weightInArray.length);
this.weightIndexBuffer = createArrayReadOnly(this.flat.getWeightIndex());
this.activationTypeBuffer = createArrayReadOnly(this.flat
.getLayerCounts());
this.tempDataInBuffer = createArrayReadOnly(this.tempDataArray);
this.tempDataOutBuffer = createFloatArrayWriteOnly(this.tempDataArray.length);
}
/**
* Release the kernel and all buffers.
*/
@Override
public void release() {
super.release();
releaseBuffer(this.activationTypeBuffer);
releaseBuffer(this.errorBuffer);
releaseBuffer(this.gradientOutBuffer);
releaseBuffer(this.gradientInBuffer);
releaseBuffer(this.idealBuffer);
releaseBuffer(this.inputBuffer);
releaseBuffer(this.layerCountBuffer);
releaseBuffer(this.layerFeedCountBuffer);
releaseBuffer(this.layerIndexBuffer);
releaseBuffer(this.paramBuffer);
releaseBuffer(this.tempDataInBuffer);
releaseBuffer(this.tempDataOutBuffer);
releaseBuffer(this.weightInArrayBuffer);
releaseBuffer(this.weightIndexBuffer);
releaseBuffer(this.weightOutArrayBuffer);
}
/**
* @param tempDataArray
* the tempDataArray to set
*/
public void setTempDataArray(final float[] tempDataArray) {
this.tempDataArray = tempDataArray;
}
}