/*
* Copyright 2009 Keith Stevens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package edu.ucla.sspace.index;
import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.DenseVector;
import edu.ucla.sspace.vector.TernaryVector;
import edu.ucla.sspace.vector.VectorMath;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Random;
/**
* An class that generates {@link DoubleVector}s that are orthogonal to each
* other based on configurable properties using the Gram-Schmidt
* orthogonalization process. This class supports three properties:
*
* <dl style="margin-left: 1em">
*
* <dt> <i>Property:</i> <code><b>{@value #VECTOR_MEAN_PROPERTY}
* </b></code> <br>
* <i>Default:</i> {@value #DEFAULT_VECTOR_MEAN}
*
* <dd style="padding-top: .5em">This variable sets the gaussian mean for
* generating values in random vectors. </p>
*
* <dt> <i>Property:</i> <code><b>{@value #VECTOR_STANDARD_DEVIATION_PROPERTY}
* </b></code> <br>
* <i>Default:</i> {@value #DEFAULT_VECTOR_STANDARD_DEVIATION}
*
* <dd style="padding-top: .5em">This variable sets the std when generating
* gaussian values for random vectors. </p>
*
* <dt> <i>Property:</i> <code><b>{@value #VECTOR_LENGTH_PROPERTY}
* </b></code> <br>
* <i>Default:</i> {@value #DEFAULT_VECTOR_LENGTH}
*
* <dd style="padding-top: .5em">This variable sets the length of vectors to
* create. Note that vector lengths passed to {@code generateRandomVector}
* is ignored.
*
* </dl>
*/
public class RandomOrthogonalVectorGenerator
implements DoubleVectorGenerator<DoubleVector>, Serializable {
private static final long serialVersionUID = 1L;
/**
* A random number generator that can be accessed to other classes which
* will rely on the same source of random values.
*/
public static final Random RANDOM = new Random();
/**
* The prefix for naming public properties.
*/
private static final String PROPERTY_PREFIX =
"edu.ucla.sspace.index.RandomOrthogonalVectorGenerator";
/**
* The property to specify the number of values to set in an {@link
* TernaryVector}.
*/
public static final String VECTOR_MEAN_PROPERTY =
PROPERTY_PREFIX + ".mean";
/**
* The property to specify the std in the number of values to set in an
* {@link TernaryVector}.
*/
public static final String VECTOR_STANDARD_DEVIATION_PROPERTY =
PROPERTY_PREFIX + ".std";
public static final String VECTOR_LENGTH_PROPERTY =
PROPERTY_PREFIX + ".length";
/**
* The default number of values to set in an {@link TernaryVector}.
*/
public static final int DEFAULT_VECTOR_MEAN = 0;
/**
* The default number of dimensions to create in each {@code TernaryVector}.
*/
public static final int DEFAULT_VECTOR_LENGTH = 1000;
/**
* The default random std in the number of values that are set in an
* {@code TernaryVector}.
*/
public static final int DEFAULT_VECTOR_STANDARD_DEVIATION = 1;
/**
* The mean of random values to generate.
*/
private double mean;
/**
* The std in the number of values that are set in an {@link
* DoubleVector}
*/
private double std;
/**
* The length for each vector to generate. This also limits the number of
* vectors that can be generated.
*/
private int vectorLength;
/**
* The list of orthogonal vectors already generated by this generator.
*/
private final List<DoubleVector> generatedVectors;
/**
* Constructs this instance using the system properties and no initial
* vector.
*/
public RandomOrthogonalVectorGenerator(int vectorLength) {
this(vectorLength, System.getProperties(), null);
}
/**
* Contructs this instance using the system properties and the provided
* intial vector.
*/
public RandomOrthogonalVectorGenerator(int vectorLength,
DoubleVector originalVector) {
this(vectorLength, System.getProperties(), originalVector);
}
/**
* Constructs this instance using the provided properties and uses an
* initial vector.
*/
public RandomOrthogonalVectorGenerator(int vectorLength,
Properties properties,
DoubleVector originalVector) {
String meanProp =
properties.getProperty(VECTOR_MEAN_PROPERTY);
mean = (meanProp != null)
? Double.parseDouble(meanProp)
: DEFAULT_VECTOR_MEAN;
String stdProp =
properties.getProperty(VECTOR_STANDARD_DEVIATION_PROPERTY);
std = (stdProp != null)
? Double.parseDouble(stdProp)
: DEFAULT_VECTOR_STANDARD_DEVIATION;
this.vectorLength = vectorLength;
generatedVectors = new ArrayList<DoubleVector>();
if (originalVector == null)
originalVector =
generateInitialVector(vectorLength, mean, std);
generatedVectors.add(originalVector);
}
/**
* Generates a simple random vector.
*/
private static DoubleVector generateInitialVector(int length,
double mean,
double std) {
DoubleVector vector = new DenseVector(length);
for (int i = 0; i < length; ++i) {
double v = RANDOM.nextGaussian();
v = std * v + mean;
vector.set(i, v);
}
return vector;
}
/**
* Compute the dot product between two vectors.
*/
private static double dotProduct(DoubleVector u,
DoubleVector v) {
double dot = 0;
for (int i = 0; i < u.length(); ++i) {
double a = u.get(i);
double b = v.get(i);
dot += u.get(i) * v.get(i);
}
return dot;
}
/**
*
* @param length CAUTION: This value is ignored
*
* @return A random vector that is orthogonal to all previously created
* vectors
*/
public DoubleVector generate() {
if (generatedVectors.size() == vectorLength)
throw new IllegalArgumentException(
"Too many vectors have been generated");
DoubleVector vector =
generateInitialVector(vectorLength, mean, std);
for (DoubleVector otherVector : generatedVectors) {
double uDotV = dotProduct(otherVector, vector);
double uDotU = dotProduct(otherVector, otherVector);
for (int i = 0; i < vectorLength; ++i) {
double projection = otherVector.get(i) * uDotV / uDotU;
vector.set(i, vector.get(i) - projection);
}
}
generatedVectors.add(vector);
return vector;
}
}