Package org.encog.engine.network.train.prop

Source Code of org.encog.engine.network.train.prop.OpenCLTrainingProfile

/*
* Encog(tm) Core v2.5 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2010 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*  
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.engine.network.train.prop;

import org.encog.engine.EncogEngine;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.kernels.EncogKernel;

/**
* Specifies a training profile for an OpenCL training session. Includes the
* following information.
*
* device The device to use.
*
* local ratio: The local workgroup is a OpenCL concept where the global work
* group is broken into several local work groups. The bigger the local work
* group the faster things will run. However, your OpenCL device will impose a
* maximum local work group size. This ratio allows you to use a smaller local
* work group, for example 0.5 would be half of the max size of the local work
* group. You will almost always want to leave this value at the max 1.0. It is
* rare that you might need to decrease it because of the GPU being overtaxed.
*
*
* global ratio: The global work group must be a multiple of the local work
* group. The default value is 1, which means local and global workgroups the
* same size. Do not set this value lower than 1.0. Values higher than 1.0 can
* result in higher performance. Should be set to an integer value. For example,
* 2 would specify a global work workgroup twice the size of the local. Higher
* values will increase resource load on the GPU and may crash.
*
* segmentation ratio: The main purpose of this ratio is to allow you to scale
* back on how long the kernels take to execute. For maximum performance leave
* this value at the default 1.0 value. However, if your GPU is crashing,
* setting it to a value lower can help. If your are running Encog on the same
* GPU as your display uses, you may run into timeout issues if your kernel
* takes too long to execute. Setting this ratio lower can help.
*
*/
public class OpenCLTrainingProfile {

  /**
   * The OpenCL device to use.
   */
  private EncogCLDevice device;

  /**
   * The local ratio
   */
  private final double localRatio;

  /**
   * The global ratio.
   */
  private final int globalRatio;

  /**
   * The segmentation ratio.
   */
  private final double segmentationRatio;

  /**
   * The calculated size of the global workgroup.
   */
  private int kernelGlobalWorkgroup;

  /**
   * The calculated size of the local workgroup.
   */
  private int kernelLocalWorkgroup;

  /**
   * The number of training items processed per call.
   */
  private int kernelWorkPerCall;

  /**
   * The number of calls to the kernel that will be made. The number of
   * segments.
   */
  private int kernelNumberOfCalls;

  /**
   * The number of items in the remainder.
   */
  private int kernelRemainder;

  /**
   * The size of the global and local workgroups for the remainder.
   */
  private int kernelRemainderGlobal;

  /**
   * The number of training items processed per call in the remainder.
   */
  private int kernelRemainderPer;

  /**
   * Construct a training profile with the specified device and the value of
   * one for all ratios.
   *
   * @param device
   *            The device to use.
   */
  public OpenCLTrainingProfile(final EncogCLDevice device) {
    this(device, 1.0, 1, 1.0);
  }

  /**
   * Construct a training profile.
   *
   * @param device
   *            The device to use.
   * @param localRatio
   *            The local ratio.
   * @param globalRatio
   *            The global ratio.
   * @param segmentationRatio
   *            The segmentation ratio.
   */
  public OpenCLTrainingProfile(final EncogCLDevice device,
      final double localRatio, final int globalRatio,
      final double segmentationRatio) {
    super();
    this.device = device;

    if ((localRatio < 0) || (globalRatio < 0) || (segmentationRatio < 0)) {
      throw new OpenCLError("None of the ratios can be below zero.");
    }

    if (localRatio > 1.0) {
      throw new OpenCLError(
          "The local ratio cannot be greater than 1.0.  That would cause the OpenCL device to have more local items than it can handle.");
    }

    if (globalRatio < 1.0) {
      throw new OpenCLError(
          "The global ratio cannot be less than 1.0.  That would cause the global work area to be less than a local work area.");
    }

    if (segmentationRatio > 1.0) {
      throw new OpenCLError(
          "The segmentation ratio cannot be greater than 1.0.  That would cause the trainer to require more training elements per iteration than exist.");
    }

    this.localRatio = localRatio;
    this.globalRatio = globalRatio;
    this.segmentationRatio = segmentationRatio;
  }

  /**
   * Calculate the kernel values.
   *
   * @param kernel
   *            The kernel to calculate for.
   * @param training
   *            The training params to use.
   */
  public void calculateKernelParams(final EncogKernel kernel,
      final EngineIndexableSet training) {
    boolean globalValuesAssigned = false;
    int workPerIteration;

    // there are two special cases

    // first, if the ratio is 1.0
    if (Math.abs(this.segmentationRatio - 1.0) < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
      // if the segmentation ratio is 1, then we want NO SEGMENTATION
      // we will have to find a workgroup size that is even
      int trialLocalSize = (int) Math.min(kernel.getMaxWorkGroupSize(),
          training.getRecordCount());

      trialLocalSize++;// falsely add one so the loop can decrease it
                // with
      // no effect.

      // loop and try to find a local size small enough to be even.
      do {
        trialLocalSize--;
        this.kernelLocalWorkgroup = (int) (trialLocalSize * this.localRatio);
        this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);
        this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
        workPerIteration = this.kernelGlobalWorkgroup
            * this.kernelWorkPerCall;
      } while ((workPerIteration != training.getRecordCount())
          && (trialLocalSize > 1));

      if (trialLocalSize > 0) {
        globalValuesAssigned = true;
      }
    }

    // if we either wanted to segment, or the attempt to find an even group
    // size above failed
    if (!globalValuesAssigned) {
      // otherwise divide into segments
      final int maxLocalSize = (int) Math.min(kernel
          .getMaxWorkGroupSize(), training.getRecordCount());
      this.kernelLocalWorkgroup = (int) (maxLocalSize * this.localRatio);
      this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);

      // second special case, if the segmentation ratio is zero, then just
      // do one item per OpenCL call
      if (this.segmentationRatio < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
        this.kernelWorkPerCall = 1;
      } else {
        this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
        if( this.kernelWorkPerCall==0 ) {
          this.kernelWorkPerCall= 1;
        }
      }
    }

    workPerIteration = this.kernelGlobalWorkgroup * this.kernelWorkPerCall;

    this.kernelNumberOfCalls = (int) (training.getRecordCount() / workPerIteration);
    this.kernelRemainder = (int) (training.getRecordCount() % workPerIteration);

    this.kernelRemainderGlobal = this.kernelGlobalWorkgroup;

    // if there is no "final training set", because it lined up evenly,
    // still create one.
    // the final training set is where learning happens.
    if (this.kernelRemainder == 0) {
      this.kernelRemainder = this.kernelGlobalWorkgroup;
      this.kernelRemainderPer = this.kernelWorkPerCall;
      this.kernelNumberOfCalls--;
    } else {
      this.kernelRemainderPer = this.kernelRemainder
          / this.kernelGlobalWorkgroup;
    }

    // does the remainder not have enough to fill the global tasks global?
    if (this.kernelRemainderPer == 0) {
      this.kernelRemainderPer = 1;
      this.kernelRemainderGlobal = this.kernelRemainder;
    }
  }

  /**
   * @return The device to use.
   */
  public EncogCLDevice getDevice() {
    return this.device;
  }

  /**
   * @return The global ratio.
   */
  public int getGlobalRatio() {
    return this.globalRatio;
  }

  /**
   * @return The calculated size of the global workgroup.
   */
  public int getKernelGlobalWorkgroup() {
    return this.kernelGlobalWorkgroup;
  }

  /**
   * @return The calculated size of the local workgroup.
   */
  public int getKernelLocalWorkgroup() {
    return this.kernelLocalWorkgroup;
  }

  /**
   * @return The number of calls to the kernel that will be made. The number
   *         of segments.
   */
  public int getKernelNumberOfCalls() {
    return this.kernelNumberOfCalls;
  }

  /**
   * @return The number of items in the remainder.
   */
  public int getKernelRemainder() {
    return this.kernelRemainder;
  }

  /**
   * @return The size of the global and local workgroups for the remainder.
   */
  public int getKernelRemainderGlobal() {
    return this.kernelRemainderGlobal;
  }

  /**
   * @return The number of training items processed per call in the remainder.
   */
  public int getKernelRemainderPer() {
    return this.kernelRemainderPer;
  }

  /**
   * @return The number of training items processed per call.
   */
  public int getKernelWorkPerCall() {
    return this.kernelWorkPerCall;
  }

  /**
   * @return The local ratio.
   */
  public double getLocalRatio() {
    return this.localRatio;
  }

  /**
   * @return The segmentation ratio.
   */
  public double getSegmentationRatio() {
    return this.segmentationRatio;
  }

  /**
   * Set the device to use.
   *
   * @param device
   *            The device to use.
   */
  public void setDevice(final EncogCLDevice device) {
    this.device = device;
  }

  /**
   * @return All internal values as a string.
   */
  @Override
  public String toString() {
    final StringBuilder result = new StringBuilder();
    result.append("OpenCL Profile:\n");
    result.append("Local Ratio: ");
    result.append(this.localRatio);
    result.append("\n");
    result.append("Number of global work items: ");
    result.append(this.globalRatio);
    result.append("\n");
    result.append("Segmentation Ratio: ");
    result.append(this.segmentationRatio);
    result.append("\n");
    result.append("Device: ");
    result.append(this.device.toString());
    result.append("\n");

    result.append("kernelGlobalWorkgroup: ");
    result.append(this.kernelGlobalWorkgroup);
    result.append("\n");

    result.append("kernelLocalWorkgroup: ");
    result.append(this.kernelLocalWorkgroup);
    result.append("\n");

    result.append("kernelWorkPerCall: ");
    result.append(this.kernelWorkPerCall);
    result.append("\n");

    result.append("kernelNumberOfCalls: ");
    result.append(this.kernelNumberOfCalls);
    result.append("\n");

    result.append("kernelRemainder: ");
    result.append(this.kernelRemainder);
    result.append("\n");

    result.append("kernelRemainderGlobal: ");
    result.append(this.kernelRemainderGlobal);
    result.append("\n");

    result.append("kernelRemainderPer: ");
    result.append(this.kernelRemainderPer);
    result.append("\n");

    return result.toString();
  }

}
TOP

Related Classes of org.encog.engine.network.train.prop.OpenCLTrainingProfile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.