Package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial

Source Code of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.CTLuGLSBackwardSearchAlgorithm$Parameterizer

package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;

/**
* GLS-Backward Search is a statistical approach to detecting spatial outliers.
*
* <p>
* F. Chen and C.-T. Lu and A. P. Boedihardjo: <br>
* GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier
* Detection <br>
* In Proc. 16th ACM SIGKDD international conference on Knowledge discovery and
* data mining, 2010
* </p>
*
* Implementation note: this is just the most basic version of this algorithm.
* The spatial relation must be two dimensional, the set of spatial basis
* functions is hard-coded (but trivial to enhance) to {1,x,y,x*x,y*y,x*y}, and
* we assume the neighborhood is large enough for the simpler formulas to work
* that make the optimization problem convex.
*
* @author Ahmed Hettab
*
* @param <V> Vector type to use for distances
* @param <D> Distance function to use
*/
@Title("GLS-Backward Search")
@Reference(authors = "F. Chen and C.-T. Lu and A. P. Boedihardjo", title = "GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier Detection", booktitle = "Proc. 16th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145/1835804.1835939")
public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
  /**
   * The logger for this class.
   */
  private static final Logging logger = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class);

  /**
   * Parameter Alpha - significance niveau
   */
  private double alpha;

  /**
   * Parameter k - neighborhood size
   */
  private int k;

  /**
   * Constructor.
   *
   * @param distanceFunction Distance function
   * @param k number of nearest neighbors to use
   * @param alpha Significance niveau
   */
  public CTLuGLSBackwardSearchAlgorithm(DistanceFunction<V, D> distanceFunction, int k, double alpha) {
    super(distanceFunction);
    this.alpha = alpha;
    this.k = k;
  }

  /**
   * Run the algorithm
   *
   * @param relationx Spatial relation
   * @param relationy Attribute relation
   * @return Algorithm result
   */
  public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);

    // Outlier detection loop
    {
      ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
      ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx);

      double phialpha = NormalDistribution.standardNormalProbit(1.0 - alpha / 2);
      // Detect outliers while significant.
      while(true) {
        Pair<DBID, Double> candidate = singleIteration(proxy, relationy);
        if(candidate.second < phialpha) {
          break;
        }
        scores.putDouble(candidate.first, candidate.second);
        if (!Double.isNaN(candidate.second)) {
          mm.put(candidate.second);
        }
        idview.remove(candidate.first);
      }

      // Remaining objects are inliers
      for(DBID id : idview) {
        scores.putDouble(id, 0.0);
      }
    }

    Relation<Double> scoreResult = new MaterializedRelation<Double>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
    return new OutlierResult(scoreMeta, scoreResult);
  }

  /**
   * Run a single iteration of the GLS-SOD modeling step
   *
   * @param relationx Geo relation
   * @param relationy Attribute relation
   * @return Top outlier and associated score
   */
  private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
    final int dim = DatabaseUtil.dimensionality(relationx);
    final int dimy = DatabaseUtil.dimensionality(relationy);
    assert (dim == 2);
    KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);

    // We need stable indexed DBIDs
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
    // Sort, so we can do a binary search below.
    ids.sort();

    // init F,X,Z
    Matrix X = new Matrix(ids.size(), 6);
    Matrix F = new Matrix(ids.size(), ids.size());
    Matrix Y = new Matrix(ids.size(), dimy);
    for(int i = 0; i < ids.size(); i++) {
      DBID id = ids.get(i);

      // Fill the data matrix
      {
        V vec = relationx.get(id);
        double la = vec.doubleValue(1);
        double lo = vec.doubleValue(2);
        X.set(i, 0, 1.0);
        X.set(i, 1, la);
        X.set(i, 2, lo);
        X.set(i, 3, la * lo);
        X.set(i, 4, la * la);
        X.set(i, 5, lo * lo);
      }

      {
        for(int d = 0; d < dimy; d++) {
          double idy = relationy.get(id).doubleValue(d + 1);
          Y.set(i, d, idy);
        }
      }

      // Fill the neighborhood matrix F:
      {
        KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
        ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
        for(DistanceResultPair<D> dpair : neighbors) {
          if(id.equals(dpair.getDBID())) {
            continue;
          }
          neighborhood.add(dpair.getDBID());
        }
        // Weight object itself positively.
        F.set(i, i, 1.0);
        final int nweight = -1 / neighborhood.size();
        // We need to find the index positions of the neighbors, unfortunately.
        for(DBID nid : neighborhood) {
          int pos = ids.binarySearch(nid);
          assert (pos >= 0);
          F.set(pos, i, nweight);
        }
      }
    }
    // Estimate the parameter beta
    // Common term that we can save recomputing.
    Matrix common = X.transposeTimesTranspose(F).times(F);
    Matrix b = common.times(X).inverse().times(common.times(Y));
    // Estimate sigma_0 and sigma:
    // sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
    Matrix sigmaMat = F.times(X.times(b).minus(F.times(Y)));
    final double sigma_sum_square = sigmaMat.normF() / (relationx.size() - 6 - 1);
    final double norm = 1 / Math.sqrt(sigma_sum_square);

    // calculate the absolute values of standard residuals
    Matrix E = F.times(Y.minus(X.times(b))).timesEquals(norm);

    DBID worstid = null;
    double worstscore = Double.NEGATIVE_INFINITY;
    for(int i = 0; i < ids.size(); i++) {
      DBID id = ids.get(i);
      double err = E.getRow(i).euclideanLength();
      // double err = Math.abs(E.get(i, 0));
      if(err > worstscore) {
        worstscore = err;
        worstid = id;
      }
    }

    return new Pair<DBID, Double>(worstid, worstscore);
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD);
  }

  @Override
  protected Logging getLogger() {
    return logger;
  }

  /**
   * Parameterization class
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   *
   * @param <V> Input vector type
   * @param <D> Distance type
   */
  public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
    /**
     * Holds the alpha value - significance niveau
     */
    public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("glsbs.alpha", "Significance niveau");

    /**
     * Parameter to specify the k nearest neighbors
     */
    public static final OptionID K_ID = OptionID.getOrCreateOptionID("glsbs.k", "k nearest neighbors to use");

    /**
     * Parameter Alpha - significance niveau
     */
    private double alpha;

    /**
     * Parameter k - neighborhood size
     */
    private int k;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      getParameterAlpha(config);
      getParameterK(config);
    }

    @Override
    protected CTLuGLSBackwardSearchAlgorithm<V, D> makeInstance() {
      return new CTLuGLSBackwardSearchAlgorithm<V, D>(distanceFunction, k, alpha);
    }

    /**
     * Get the alpha parameter
     *
     * @param config Parameterization
     */
    protected void getParameterAlpha(Parameterization config) {
      final DoubleParameter param = new DoubleParameter(ALPHA_ID);
      if(config.grab(param)) {
        alpha = param.getValue();
      }
    }

    /**
     * Get the k parameter
     *
     * @param config Parameterization
     */
    protected void getParameterK(Parameterization config) {
      final IntParameter param = new IntParameter(K_ID);
      if(config.grab(param)) {
        k = param.getValue();
      }
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.CTLuGLSBackwardSearchAlgorithm$Parameterizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.