Source Code of org.encog.engine.network.train.prop.OpenCLTrainingProfile

/*
 * Encog(tm) Core v2.5 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2010 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */
package org.encog.engine.network.train.prop;


import org.encog.engine.EncogEngine;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.kernels.EncogKernel;


/**
 * Specifies a training profile for an OpenCL training session. Includes the
 * following information.
 * 
 * device The device to use.
 * 
 * local ratio: The local workgroup is a OpenCL concept where the global work
 * group is broken into several local work groups. The bigger the local work
 * group the faster things will run. However, your OpenCL device will impose a
 * maximum local work group size. This ratio allows you to use a smaller local
 * work group, for example 0.5 would be half of the max size of the local work
 * group. You will almost always want to leave this value at the max 1.0. It is
 * rare that you might need to decrease it because of the GPU being overtaxed.
 * 
 * 
 * global ratio: The global work group must be a multiple of the local work
 * group. The default value is 1, which means local and global workgroups the
 * same size. Do not set this value lower than 1.0. Values higher than 1.0 can
 * result in higher performance. Should be set to an integer value. For example,
 * 2 would specify a global work workgroup twice the size of the local. Higher
 * values will increase resource load on the GPU and may crash.
 * 
 * segmentation ratio: The main purpose of this ratio is to allow you to scale
 * back on how long the kernels take to execute. For maximum performance leave
 * this value at the default 1.0 value. However, if your GPU is crashing,
 * setting it to a value lower can help. If your are running Encog on the same
 * GPU as your display uses, you may run into timeout issues if your kernel
 * takes too long to execute. Setting this ratio lower can help.
 * 
 */
public class OpenCLTrainingProfile {


  /**
   * The OpenCL device to use.
   */
  private EncogCLDevice device;


  /**
   * The local ratio
   */
  private final double localRatio;


  /**
   * The global ratio.
   */
  private final int globalRatio;


  /**
   * The segmentation ratio.
   */
  private final double segmentationRatio;


  /**
   * The calculated size of the global workgroup.
   */
  private int kernelGlobalWorkgroup;


  /**
   * The calculated size of the local workgroup.
   */
  private int kernelLocalWorkgroup;


  /**
   * The number of training items processed per call.
   */
  private int kernelWorkPerCall;


  /**
   * The number of calls to the kernel that will be made. The number of
   * segments.
   */
  private int kernelNumberOfCalls;


  /**
   * The number of items in the remainder.
   */
  private int kernelRemainder;


  /**
   * The size of the global and local workgroups for the remainder.
   */
  private int kernelRemainderGlobal;


  /**
   * The number of training items processed per call in the remainder.
   */
  private int kernelRemainderPer;


  /**
   * Construct a training profile with the specified device and the value of
   * one for all ratios.
   * 
   * @param device
   *            The device to use.
   */
  public OpenCLTrainingProfile(final EncogCLDevice device) {
    this(device, 1.0, 1, 1.0);
  }


  /**
   * Construct a training profile.
   * 
   * @param device
   *            The device to use.
   * @param localRatio
   *            The local ratio.
   * @param globalRatio
   *            The global ratio.
   * @param segmentationRatio
   *            The segmentation ratio.
   */
  public OpenCLTrainingProfile(final EncogCLDevice device,
      final double localRatio, final int globalRatio,
      final double segmentationRatio) {
    super();
    this.device = device;


    if ((localRatio < 0) || (globalRatio < 0) || (segmentationRatio < 0)) {
      throw new OpenCLError("None of the ratios can be below zero.");
    }


    if (localRatio > 1.0) {
      throw new OpenCLError(
          "The local ratio cannot be greater than 1.0.  That would cause the OpenCL device to have more local items than it can handle.");
    }


    if (globalRatio < 1.0) {
      throw new OpenCLError(
          "The global ratio cannot be less than 1.0.  That would cause the global work area to be less than a local work area.");
    }


    if (segmentationRatio > 1.0) {
      throw new OpenCLError(
          "The segmentation ratio cannot be greater than 1.0.  That would cause the trainer to require more training elements per iteration than exist.");
    }


    this.localRatio = localRatio;
    this.globalRatio = globalRatio;
    this.segmentationRatio = segmentationRatio;
  }


  /**
   * Calculate the kernel values.
   * 
   * @param kernel
   *            The kernel to calculate for.
   * @param training
   *            The training params to use.
   */
  public void calculateKernelParams(final EncogKernel kernel,
      final EngineIndexableSet training) {
    boolean globalValuesAssigned = false;
    int workPerIteration;


    // there are two special cases


    // first, if the ratio is 1.0
    if (Math.abs(this.segmentationRatio - 1.0) < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
      // if the segmentation ratio is 1, then we want NO SEGMENTATION
      // we will have to find a workgroup size that is even
      int trialLocalSize = (int) Math.min(kernel.getMaxWorkGroupSize(),
          training.getRecordCount());


      trialLocalSize++;// falsely add one so the loop can decrease it
                // with
      // no effect.


      // loop and try to find a local size small enough to be even.
      do {
        trialLocalSize--;
        this.kernelLocalWorkgroup = (int) (trialLocalSize * this.localRatio);
        this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);
        this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
        workPerIteration = this.kernelGlobalWorkgroup
            * this.kernelWorkPerCall;
      } while ((workPerIteration != training.getRecordCount())
          && (trialLocalSize > 1));


      if (trialLocalSize > 0) {
        globalValuesAssigned = true;
      }
    }


    // if we either wanted to segment, or the attempt to find an even group
    // size above failed
    if (!globalValuesAssigned) {
      // otherwise divide into segments
      final int maxLocalSize = (int) Math.min(kernel
          .getMaxWorkGroupSize(), training.getRecordCount());
      this.kernelLocalWorkgroup = (int) (maxLocalSize * this.localRatio);
      this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);


      // second special case, if the segmentation ratio is zero, then just
      // do one item per OpenCL call
      if (this.segmentationRatio < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
        this.kernelWorkPerCall = 1;
      } else {
        this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
        if( this.kernelWorkPerCall==0 ) {
          this.kernelWorkPerCall= 1;
        }
      }
    }


    workPerIteration = this.kernelGlobalWorkgroup * this.kernelWorkPerCall;


    this.kernelNumberOfCalls = (int) (training.getRecordCount() / workPerIteration);
    this.kernelRemainder = (int) (training.getRecordCount() % workPerIteration);


    this.kernelRemainderGlobal = this.kernelGlobalWorkgroup;


    // if there is no "final training set", because it lined up evenly,
    // still create one.
    // the final training set is where learning happens.
    if (this.kernelRemainder == 0) {
      this.kernelRemainder = this.kernelGlobalWorkgroup;
      this.kernelRemainderPer = this.kernelWorkPerCall;
      this.kernelNumberOfCalls--;
    } else {
      this.kernelRemainderPer = this.kernelRemainder
          / this.kernelGlobalWorkgroup;
    }


    // does the remainder not have enough to fill the global tasks global?
    if (this.kernelRemainderPer == 0) {
      this.kernelRemainderPer = 1;
      this.kernelRemainderGlobal = this.kernelRemainder;
    }
  }


  /**
   * @return The device to use.
   */
  public EncogCLDevice getDevice() {
    return this.device;
  }


  /**
   * @return The global ratio.
   */
  public int getGlobalRatio() {
    return this.globalRatio;
  }


  /**
   * @return The calculated size of the global workgroup.
   */
  public int getKernelGlobalWorkgroup() {
    return this.kernelGlobalWorkgroup;
  }


  /**
   * @return The calculated size of the local workgroup.
   */
  public int getKernelLocalWorkgroup() {
    return this.kernelLocalWorkgroup;
  }


  /**
   * @return The number of calls to the kernel that will be made. The number
   *         of segments.
   */
  public int getKernelNumberOfCalls() {
    return this.kernelNumberOfCalls;
  }


  /**
   * @return The number of items in the remainder.
   */
  public int getKernelRemainder() {
    return this.kernelRemainder;
  }


  /**
   * @return The size of the global and local workgroups for the remainder.
   */
  public int getKernelRemainderGlobal() {
    return this.kernelRemainderGlobal;
  }


  /**
   * @return The number of training items processed per call in the remainder.
   */
  public int getKernelRemainderPer() {
    return this.kernelRemainderPer;
  }


  /**
   * @return The number of training items processed per call.
   */
  public int getKernelWorkPerCall() {
    return this.kernelWorkPerCall;
  }


  /**
   * @return The local ratio.
   */
  public double getLocalRatio() {
    return this.localRatio;
  }


  /**
   * @return The segmentation ratio.
   */
  public double getSegmentationRatio() {
    return this.segmentationRatio;
  }


  /**
   * Set the device to use.
   * 
   * @param device
   *            The device to use.
   */
  public void setDevice(final EncogCLDevice device) {
    this.device = device;
  }


  /**
   * @return All internal values as a string.
   */
  @Override
  public String toString() {
    final StringBuilder result = new StringBuilder();
    result.append("OpenCL Profile:\n");
    result.append("Local Ratio: ");
    result.append(this.localRatio);
    result.append("\n");
    result.append("Number of global work items: ");
    result.append(this.globalRatio);
    result.append("\n");
    result.append("Segmentation Ratio: ");
    result.append(this.segmentationRatio);
    result.append("\n");
    result.append("Device: ");
    result.append(this.device.toString());
    result.append("\n");


    result.append("kernelGlobalWorkgroup: ");
    result.append(this.kernelGlobalWorkgroup);
    result.append("\n");


    result.append("kernelLocalWorkgroup: ");
    result.append(this.kernelLocalWorkgroup);
    result.append("\n");


    result.append("kernelWorkPerCall: ");
    result.append(this.kernelWorkPerCall);
    result.append("\n");


    result.append("kernelNumberOfCalls: ");
    result.append(this.kernelNumberOfCalls);
    result.append("\n");


    result.append("kernelRemainder: ");
    result.append(this.kernelRemainder);
    result.append("\n");


    result.append("kernelRemainderGlobal: ");
    result.append(this.kernelRemainderGlobal);
    result.append("\n");


    result.append("kernelRemainderPer: ");
    result.append(this.kernelRemainderPer);
    result.append("\n");


    return result.toString();
  }


}
Source Code of org.encog.engine.network.train.prop.OpenCLTrainingProfile

Related Classes of org.encog.engine.network.train.prop.OpenCLTrainingProfile