Package edu.ucla.sspace.clustering

Source Code of edu.ucla.sspace.clustering.CKVWSpectralClustering06$SuperSpectralGenerator

/*
* Copyright 2011 Keith Stevens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.clustering;

import edu.ucla.sspace.common.Statistics;

import edu.ucla.sspace.matrix.Matrix;

import edu.ucla.sspace.util.Generator;

import edu.ucla.sspace.vector.DenseVector;
import edu.ucla.sspace.vector.DoubleVector;

import java.util.Properties;


/**
* A spectral clustering implementation based on the following paper:
*
* <p style="font-family:Garamond, Georgia, serif"> David Cheng ,  Ravi Kannan ,
* Santosh Vempala ,  Grant Wang (2003) A Divid-and-Merge Methodology for
* Clustering. Available <a
* href="http://people.csail.mit.edu/gjw/papers/divmerge.pdf">here</a>.
*
* </p>  This implementation implements a subclass of the {@link
* BaseSpectralCut} and simply computes the second eigen vector for a data set.
*
* @see BaseSpectralCut
* @see SpectralClustering
*
  * @author Keith Stevens
*/
public class CKVWSpectralClustering06 implements Clustering {

    /**
     * The proper prefix.
     */
    public static final String PROPERTY_PREFIX =
        "edu.ucla.sspace.clustering.CKVWSpectralClustering06";

    /**
     * The property used to use K-Means as the objective function.
     */
    public static final String USE_KMEANS =
        PROPERTY_PREFIX + ".useKMeans";

    /**
     * {@inheritDoc}
     */
    public Assignments cluster(Matrix matrix, Properties props) {
        SpectralClustering cluster = new SpectralClustering(
                .2, new SuperSpectralGenerator());
        return cluster.cluster(matrix);
    }

    /**
     * {@inheritDoc}
     */
    public Assignments cluster(Matrix matrix,
                                int numClusters,
                                Properties props) {
        SpectralClustering cluster = new SpectralClustering(
                .2, new SuperSpectralGenerator());
        return cluster.cluster(
                matrix, numClusters, props.getProperty(USE_KMEANS) != null);
    }

    /**
     * An internal spectral cut implementation that is based on the referred to
     * paper.  See paper for details.
     */
    public class SuperSpectralCut extends BaseSpectralCut {

        /**
         * {@inheritDoc}
         */
        protected DoubleVector computeSecondEigenVector(Matrix matrix,
                                                        int vectorLength) {
           // Compute pi, and D.  Pi is the normalized form of rho.  D a
           // diagonal matrix with sqrt(pi) as the values along the diagonal.
           // Also compute pi * D^-1.
            DoubleVector pi = new DenseVector(vectorLength);
            DoubleVector D = new DenseVector(vectorLength);
            DoubleVector piDInverse = new DenseVector(vectorLength);
            for (int i = 0; i < vectorLength; ++i) {
                double piValue = rho.get(i)/pSum;
                pi.set(i, piValue);
                if (piValue > 0d) {
                    D.set(i, Math.sqrt(piValue));
                    piDInverse.set(i, piValue / D.get(i));
                }
            }

            // Create the second largest eigenvector of the a scaled form of the
            // row normalized affinity matrix.  The computation is using the
            // power method such that the affinity matrix is never explicitly
            // computed.
            // piDInverse serves as a vector which is similar to the first eigen
            // vector.  The second eigen vector is assumed to be orthogonal to
            // piDInverse.  This algorithm makes O(log(matrix.rows())) passes
            // through the data matrix.
        
            // Step 1, generate a random vector, v,  that is orthogonal to
            // pi*D-Inverse.
            DoubleVector v = new DenseVector(vectorLength);
            for (int i = 0; i < v.length(); ++i)
                v.set(i, Math.random());

            // Make log(matrix.rows()) passes.
            int log = (int) Statistics.log2(vectorLength);
            for (int k = 0; k < log; ++k) {
                // start the orthonormalizing the eigen vector.
                v = orthonormalize(v, piDInverse);

                // Step 2, repeated, (a) normalize v (b) set v = Q*v, where Q =
                // D * R-Inverse * matrix * matrix-Transpose * D-Inverse.

                // v = Q*v is broken into 4 sub steps that allow for sparse
                // multiplications.
                // Step 2b-1) v = D-Inverse*v.
                for (int i = 0; i < vectorLength; ++ i)
                    if (D.get(i) != 0d)
                        v.set(i, v.get(i) / D.get(i));

                // Step 2b-2) v = matrix-Transpose * v.
                DoubleVector newV = computeMatrixTransposeV(matrix, v);

                // Step 2b-3) v = matrix * v.
                computeMatrixDotV(matrix, newV, v);

                // Step 2b-4) v = D*R-Inverse * v. Note that R is a diagonal
                // matrix with rho as the values along the diagonal.
                for (int i = 0; i < vectorLength; ++i) {
                    double oldValue = v.get(i);
                    double newValue = oldValue * D.get(i) / rho.get(i);
                    v.set(i, newValue);
                }
            }

            return v;
        }
    }

    public String toString() {
        return "CKVWSpectralClustering06";
    }

    /**
     * A simple generator for creating instances of the {@link SpectralCut}
     * class.
     */
    public class SuperSpectralGenerator implements Generator<EigenCut> {

        /**
         * {@inheritDoc}
         */
        public EigenCut generate() {
            return new SuperSpectralCut();
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.clustering.CKVWSpectralClustering06$SuperSpectralGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.