Package de.lmu.ifi.dbs.elki.logging.progress

Examples of de.lmu.ifi.dbs.elki.logging.progress.StepProgress


  @Override
  public HistogramResult<DoubleVector> run(Database database) throws IllegalStateException {
    final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
    final DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());

    final StepProgress stepprog = logger.isVerbose() ? new StepProgress("Distance statistics", 2) : null;

    // determine binning ranges.
    DoubleMinMax gminmax = new DoubleMinMax();

    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelClustering()).run(database).getAllClusters();

    // global in-cluster min/max
    DoubleMinMax giminmax = new DoubleMinMax();
    // global other-cluster min/max
    DoubleMinMax gominmax = new DoubleMinMax();
    // in-cluster distances
    MeanVariance mimin = new MeanVariance();
    MeanVariance mimax = new MeanVariance();
    MeanVariance midif = new MeanVariance();
    // other-cluster distances
    MeanVariance momin = new MeanVariance();
    MeanVariance momax = new MeanVariance();
    MeanVariance modif = new MeanVariance();
    // Histogram
    final AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>> histogram;
    if(stepprog != null) {
      stepprog.beginStep(1, "Prepare histogram.", logger);
    }
    if(exact) {
      gminmax = exactMinMax(relation, distFunc);
      histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
    }
    else if(sampling) {
      gminmax = sampleMinMax(relation, distFunc);
      histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
    }
    else {
      histogram = FlexiHistogram.LongSumLongSumHistogram(numbin);
    }

    if(stepprog != null) {
      stepprog.beginStep(2, "Build histogram.", logger);
    }
    final FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), logger) : null;
    // iterate per cluster
    final Pair<Long, Long> incFirst = new Pair<Long, Long>(1L, 0L);
    final Pair<Long, Long> incSecond = new Pair<Long, Long>(0L, 1L);
    for(Cluster<?> c1 : split) {
      for(DBID id1 : c1.getIDs()) {
        // in-cluster distances
        DoubleMinMax iminmax = new DoubleMinMax();
        for(DBID id2 : c1.getIDs()) {
          // skip the point itself.
          if(id1 == id2) {
            continue;
          }
          double d = distFunc.distance(id1, id2).doubleValue();

          histogram.aggregate(d, incFirst);

          iminmax.put(d);
        }
        // aggregate
        mimin.put(iminmax.getMin());
        mimax.put(iminmax.getMax());
        midif.put(iminmax.getDiff());
        // min/max
        giminmax.put(iminmax.getMin());
        giminmax.put(iminmax.getMax());

        // other-cluster distances
        DoubleMinMax ominmax = new DoubleMinMax();
        for(Cluster<?> c2 : split) {
          if(c2 == c1) {
            continue;
          }
          for(DBID id2 : c2.getIDs()) {
            // skip the point itself (shouldn't happen though)
            if(id1 == id2) {
              continue;
            }
            double d = distFunc.distance(id1, id2).doubleValue();

            histogram.aggregate(d, incSecond);

            ominmax.put(d);
          }
        }
        // aggregate
        momin.put(ominmax.getMin());
        momax.put(ominmax.getMax());
        modif.put(ominmax.getDiff());
        // min/max
        gominmax.put(ominmax.getMin());
        gominmax.put(ominmax.getMax());
        if(progress != null) {
          progress.incrementProcessed(logger);
        }
      }
    }
    if(progress != null) {
      progress.ensureCompleted(logger);
    }
    // Update values (only needed for sampling case).
    gminmax.setFirst(Math.min(giminmax.getMin(), gominmax.getMin()));
    gminmax.setSecond(Math.max(giminmax.getMax(), gominmax.getMax()));

    if(stepprog != null) {
      stepprog.setCompleted(logger);
    }

    // count the number of samples we have in the data
    long inum = 0;
    long onum = 0;
View Full Code Here


   * Performs the LoOP algorithm on the given database.
   */
  public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
    final double sqrt2 = Math.sqrt(2.0);

    StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;

    Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
    KNNQuery<O, D> knnComp = pair.getFirst();
    KNNQuery<O, D> knnReach = pair.getSecond();

    // Assert we got something
    if(knnComp == null) {
      throw new AbortException("No kNN queries supported by database for comparison distance function.");
    }
    if(knnReach == null) {
      throw new AbortException("No kNN queries supported by database for density estimation distance function.");
    }

    // Probabilistic distances
    WritableDataStore<Double> pdists = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
    {// computing PRDs
      if(stepprog != null) {
        stepprog.beginStep(3, "Computing pdists", logger);
      }
      FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        List<DistanceResultPair<D>> neighbors = knnReach.getKNNForDBID(id, kreach);
        double sqsum = 0.0;
        // use first kref neighbors as reference set
        int ks = 0;
        for(DistanceResultPair<D> neighbor : neighbors) {
          if(objectIsInKNN || neighbor.getDBID() != id) {
            double d = neighbor.getDistance().doubleValue();
            sqsum += d * d;
            ks++;
            if(ks >= kreach) {
              break;
            }
          }
        }
        Double pdist = lambda * Math.sqrt(sqsum / ks);
        pdists.put(id, pdist);
        if(prdsProgress != null) {
          prdsProgress.incrementProcessed(logger);
        }
      }
    }
    // Compute PLOF values.
    WritableDataStore<Double> plofs = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
    MeanVariance mvplof = new MeanVariance();
    {// compute LOOP_SCORE of each db object
      if(stepprog != null) {
        stepprog.beginStep(4, "Computing PLOF", logger);
      }

      FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        List<DistanceResultPair<D>> neighbors = knnComp.getKNNForDBID(id, kcomp);
        MeanVariance mv = new MeanVariance();
        // use first kref neighbors as comparison set.
        int ks = 0;
        for(DistanceResultPair<D> neighbor1 : neighbors) {
          if(objectIsInKNN || neighbor1.getDBID() != id) {
            mv.put(pdists.get(neighbor1.getDBID()));
            ks++;
            if(ks >= kcomp) {
              break;
            }
          }
        }
        double plof = Math.max(pdists.get(id) / mv.getMean(), 1.0);
        if(Double.isNaN(plof) || Double.isInfinite(plof)) {
          plof = 1.0;
        }
        plofs.put(id, plof);
        mvplof.put((plof - 1.0) * (plof - 1.0));

        if(progressPLOFs != null) {
          progressPLOFs.incrementProcessed(logger);
        }
      }
    }

    double nplof = lambda * Math.sqrt(mvplof.getMean());
    if(logger.isDebugging()) {
      logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
    }

    // Compute final LoOP values.
    WritableDataStore<Double> loops = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
    {// compute LOOP_SCORE of each db object
      if(stepprog != null) {
        stepprog.beginStep(5, "Computing LoOP scores", logger);
      }

      FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        loops.put(id, MathUtil.erf((plofs.get(id) - 1) / (nplof * sqrt2)));

        if(progressLOOPs != null) {
          progressLOOPs.incrementProcessed(logger);
        }
      }
    }

    if(stepprog != null) {
      stepprog.setCompleted(logger);
    }

    // Build result representation.
    Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
View Full Code Here

  @Override
  public HistogramResult<DoubleVector> run(Database database) throws IllegalStateException {
    final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
    final DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());

    final StepProgress stepprog = logger.isVerbose() ? new StepProgress("Distance statistics", 2) : null;

    // determine binning ranges.
    DoubleMinMax gminmax = new DoubleMinMax();

    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelClustering()).run(database).getAllClusters();

    // global in-cluster min/max
    DoubleMinMax giminmax = new DoubleMinMax();
    // global other-cluster min/max
    DoubleMinMax gominmax = new DoubleMinMax();
    // in-cluster distances
    MeanVariance mimin = new MeanVariance();
    MeanVariance mimax = new MeanVariance();
    MeanVariance midif = new MeanVariance();
    // other-cluster distances
    MeanVariance momin = new MeanVariance();
    MeanVariance momax = new MeanVariance();
    MeanVariance modif = new MeanVariance();
    // Histogram
    final AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>> histogram;
    if(stepprog != null) {
      stepprog.beginStep(1, "Prepare histogram.", logger);
    }
    if(exact) {
      gminmax = exactMinMax(relation, distFunc);
      histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
    }
    else if(sampling) {
      gminmax = sampleMinMax(relation, distFunc);
      histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
    }
    else {
      histogram = FlexiHistogram.LongSumLongSumHistogram(numbin);
    }

    if(stepprog != null) {
      stepprog.beginStep(2, "Build histogram.", logger);
    }
    final FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), logger) : null;
    // iterate per cluster
    final Pair<Long, Long> incFirst = new Pair<Long, Long>(1L, 0L);
    final Pair<Long, Long> incSecond = new Pair<Long, Long>(0L, 1L);
    for(Cluster<?> c1 : split) {
      for(DBID id1 : c1.getIDs()) {
        // in-cluster distances
        DoubleMinMax iminmax = new DoubleMinMax();
        for(DBID id2 : c1.getIDs()) {
          // skip the point itself.
          if(id1 == id2) {
            continue;
          }
          double d = distFunc.distance(id1, id2).doubleValue();

          histogram.aggregate(d, incFirst);

          iminmax.put(d);
        }
        // aggregate
        mimin.put(iminmax.getMin());
        mimax.put(iminmax.getMax());
        midif.put(iminmax.getDiff());
        // min/max
        giminmax.put(iminmax.getMin());
        giminmax.put(iminmax.getMax());

        // other-cluster distances
        DoubleMinMax ominmax = new DoubleMinMax();
        for(Cluster<?> c2 : split) {
          if(c2 == c1) {
            continue;
          }
          for(DBID id2 : c2.getIDs()) {
            // skip the point itself (shouldn't happen though)
            if(id1 == id2) {
              continue;
            }
            double d = distFunc.distance(id1, id2).doubleValue();

            histogram.aggregate(d, incSecond);

            ominmax.put(d);
          }
        }
        // aggregate
        momin.put(ominmax.getMin());
        momax.put(ominmax.getMax());
        modif.put(ominmax.getDiff());
        // min/max
        gominmax.put(ominmax.getMin());
        gominmax.put(ominmax.getMax());
        if(progress != null) {
          progress.incrementProcessed(logger);
        }
      }
    }
    if(progress != null) {
      progress.ensureCompleted(logger);
    }
    // Update values (only needed for sampling case).
    gminmax.setFirst(Math.min(giminmax.getMin(), gominmax.getMin()));
    gminmax.setSecond(Math.max(giminmax.getMax(), gominmax.getMax()));

    if(stepprog != null) {
      stepprog.setCompleted(logger);
    }

    // count the number of samples we have in the data
    long inum = 0;
    long onum = 0;
View Full Code Here

    }
  }

  @Override
  protected void objectsInserted(DBIDs ids) {
    StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;

    ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
    // materialize the new kNNs and RkNNs
    if(stepprog != null) {
      stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs and RkNNs.", getLogger());
    }
    materializeKNNAndRKNNs(aids, null);

    // update the old kNNs and RkNNs
    if(stepprog != null) {
      stepprog.beginStep(2, "New insertions ocurred, update the affected kNNs and RkNNs.", getLogger());
    }
    ArrayDBIDs rkNN_ids = updateKNNsAndRkNNs(ids);

    // inform listener
    if(stepprog != null) {
      stepprog.beginStep(3, "New insertions ocurred, inform listeners.", getLogger());
    }
    fireKNNsInserted(ids, rkNN_ids);

    if(stepprog != null) {
      stepprog.ensureCompleted(getLogger());
    }
  }
View Full Code Here

    return rkNN_ids;
  }

  @Override
  protected void objectsRemoved(DBIDs ids) {
    StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;

    ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
    // delete the materialized (old) kNNs and RkNNs
    if(stepprog != null) {
      stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs and RkNNs.", getLogger());
    }
    List<KNNResult<D>> kNNs = new ArrayList<KNNResult<D>>(ids.size());
    List<List<DistanceResultPair<D>>> rkNNs = new ArrayList<List<DistanceResultPair<D>>>(ids.size());
    for(DBID id : aids) {
      kNNs.add(storage.get(id));
      storage.delete(id);
      rkNNs.add(new ArrayList<DistanceResultPair<D>>(materialized_RkNN.get(id)));
      materialized_RkNN.delete(id);
    }
    ArrayDBIDs kNN_ids = extractAndRemoveIDs(kNNs, aids);
    ArrayDBIDs rkNN_ids = extractAndRemoveIDs(rkNNs, aids);

    // update the affected kNNs and RkNNs
    if(stepprog != null) {
      stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs and RkNNs.", getLogger());
    }
    // update the kNNs of the RkNNs
    List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
    for(int i = 0; i < rkNN_ids.size(); i++) {
      DBID id = rkNN_ids.get(i);
      storage.put(id, kNNList.get(i));
      for(DistanceResultPair<D> kNN : kNNList.get(i)) {
        Set<DistanceResultPair<D>> rknns = materialized_RkNN.get(kNN.getDBID());
        rknns.add(new GenericDistanceResultPair<D>(kNN.getDistance(), id));
      }
    }
    // update the RkNNs of the kNNs
    SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
    for(int i = 0; i < kNN_ids.size(); i++) {
      DBID id = kNN_ids.get(i);
      SortedSet<DistanceResultPair<D>> rkNN = materialized_RkNN.get(id);
      for(Iterator<DistanceResultPair<D>> it = rkNN.iterator(); it.hasNext();) {
        DistanceResultPair<D> drp = it.next();
        if(idsSet.contains(drp.getDBID())) {
          it.remove();
        }
      }
    }

    // inform listener
    if(stepprog != null) {
      stepprog.beginStep(3, "New deletions ocurred, inform listeners.", getLogger());
    }
    fireKNNsRemoved(ids, rkNN_ids);

    if(stepprog != null) {
      stepprog.ensureCompleted(getLogger());
    }
  }
View Full Code Here

   * neighborhood.
   *
   * @param ids the ids of the newly inserted objects
   */
  protected void objectsInserted(DBIDs ids) {
    StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;

    ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
    // materialize the new kNNs
    if(stepprog != null) {
      stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs.", getLogger());
    }
    List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k);
    for(int i = 0; i < aids.size(); i++) {
      DBID id = aids.get(i);
      storage.put(id, kNNList.get(i));
    }

    // update the affected kNNs
    if(stepprog != null) {
      stepprog.beginStep(2, "New insertions ocurred, update the affected kNNs.", getLogger());
    }
    ArrayDBIDs rkNN_ids = updateKNNsAfterInsertion(ids);

    // inform listener
    if(stepprog != null) {
      stepprog.beginStep(3, "New insertions ocurred, inform listeners.", getLogger());
    }
    fireKNNsInserted(ids, rkNN_ids);

    if(stepprog != null) {
      stepprog.setCompleted(getLogger());
    }
  }
View Full Code Here

   * neighborhood.
   *
   * @param ids the ids of the removed objects
   */
  protected void objectsRemoved(DBIDs ids) {
    StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;

    // delete the materialized (old) kNNs
    if(stepprog != null) {
      stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs.", getLogger());
    }
    for(DBID id : ids) {
      storage.delete(id);
    }

    // update the affected kNNs
    if(stepprog != null) {
      stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs.", getLogger());
    }
    ArrayDBIDs rkNN_ids = updateKNNsAfterDeletion(ids);

    // inform listener
    if(stepprog != null) {
      stepprog.beginStep(3, "New deletions ocurred, inform listeners.", getLogger());
    }
    fireKNNsRemoved(ids, rkNN_ids);

    if(stepprog != null) {
      stepprog.ensureCompleted(getLogger());
    }
  }
View Full Code Here

   * @return Clustering result
   */
  public Clustering<SubspaceModel<V>> run(Relation<V> relation) throws IllegalStateException {
    final int dimensionality = DatabaseUtil.dimensionality(relation);

    StepProgress stepprog = logger.isVerbose() ? new StepProgress(dimensionality) : null;

    // Generate all 1-dimensional clusters
    if(stepprog != null) {
      stepprog.beginStep(1, "Generate all 1-dimensional clusters.", logger);
    }

    // mapping of dimensionality to set of subspaces
    HashMap<Integer, List<Subspace<V>>> subspaceMap = new HashMap<Integer, List<Subspace<V>>>();

    // list of 1-dimensional subspaces containing clusters
    List<Subspace<V>> s_1 = new ArrayList<Subspace<V>>();
    subspaceMap.put(0, s_1);

    // mapping of subspaces to list of clusters
    TreeMap<Subspace<V>, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace<V>, List<Cluster<Model>>>(new Subspace.DimensionComparator());

    for(int d = 0; d < dimensionality; d++) {
      Subspace<V> currentSubspace = new Subspace<V>(d);
      List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);

      if(logger.isDebuggingFiner()) {
        StringBuffer msg = new StringBuffer();
        msg.append("\n").append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
        for(Cluster<Model> cluster : clusters) {
          msg.append("      " + cluster.getIDs() + "\n");
        }
        logger.debugFiner(msg.toString());
      }

      if(!clusters.isEmpty()) {
        s_1.add(currentSubspace);
        clusterMap.put(currentSubspace, clusters);
      }
    }

    // Generate (d+1)-dimensional clusters from d-dimensional clusters
    for(int d = 0; d < dimensionality - 1; d++) {
      if(stepprog != null) {
        stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", logger);
      }

      List<Subspace<V>> subspaces = subspaceMap.get(d);
      if(subspaces == null || subspaces.isEmpty()) {
        if(stepprog != null) {
          for(int dim = d + 1; dim < dimensionality - 1; dim++) {
            stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", logger);
          }
        }
        break;
      }

      List<Subspace<V>> candidates = generateSubspaceCandidates(subspaces);
      List<Subspace<V>> s_d = new ArrayList<Subspace<V>>();

      for(Subspace<V> candidate : candidates) {
        Subspace<V> bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
        if(logger.isDebuggingFine()) {
          logger.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
        }

        List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
        List<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
        for(Cluster<Model> cluster : bestSubspaceClusters) {
          List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
          if(!candidateClusters.isEmpty()) {
            clusters.addAll(candidateClusters);
          }
        }

        if(logger.isDebuggingFine()) {
          StringBuffer msg = new StringBuffer();
          msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
          for(Cluster<Model> c : clusters) {
            msg.append("      " + c.getIDs() + "\n");
          }
          logger.debugFine(msg.toString());
        }

        if(!clusters.isEmpty()) {
          s_d.add(candidate);
          clusterMap.put(candidate, clusters);
        }
      }

      if(!s_d.isEmpty()) {
        subspaceMap.put(d + 1, s_d);
      }
    }

    // build result
    int numClusters = 1;
    result = new Clustering<SubspaceModel<V>>("SUBCLU clustering", "subclu-clustering");
    for(Subspace<V> subspace : clusterMap.descendingKeySet()) {
      List<Cluster<Model>> clusters = clusterMap.get(subspace);
      for(Cluster<Model> cluster : clusters) {
        Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(cluster.getIDs());
        newCluster.setModel(new SubspaceModel<V>(subspace, DatabaseUtil.centroid(relation, cluster.getIDs())));
        newCluster.setName("cluster_" + numClusters++);
        result.addCluster(newCluster);
      }
    }

    if(stepprog != null) {
      stepprog.setCompleted(logger);
    }
    return result;
  }
View Full Code Here

   * calling {@code #doRunInTime(Database)}.
   *
   * @param relation Data to process
   */
  public OutlierResult run(Relation<O> relation) {
    StepProgress stepprog = logger.isVerbose() ? new StepProgress("LOF", 3) : null;
    Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog);
    KNNQuery<O, D> kNNRefer = pair.getFirst();
    KNNQuery<O, D> kNNReach = pair.getSecond();
    return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
  }
View Full Code Here

   * Performs the LoOP algorithm on the given database.
   */
  public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
    final double sqrt2 = Math.sqrt(2.0);

    StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;

    Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
    KNNQuery<O, D> knnComp = pair.getFirst();
    KNNQuery<O, D> knnReach = pair.getSecond();

    // Assert we got something
    if(knnComp == null) {
      throw new AbortException("No kNN queries supported by database for comparison distance function.");
    }
    if(knnReach == null) {
      throw new AbortException("No kNN queries supported by database for density estimation distance function.");
    }

    // Probabilistic distances
    WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    {// computing PRDs
      if(stepprog != null) {
        stepprog.beginStep(3, "Computing pdists", logger);
      }
      FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        final KNNResult<D> neighbors = knnReach.getKNNForDBID(id, kreach);
        double sqsum = 0.0;
        // use first kref neighbors as reference set
        int ks = 0;
        for(DistanceResultPair<D> neighbor : neighbors) {
          if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
            double d = neighbor.getDistance().doubleValue();
            sqsum += d * d;
            ks++;
            if(ks >= kreach) {
              break;
            }
          }
        }
        double pdist = lambda * Math.sqrt(sqsum / ks);
        pdists.putDouble(id, pdist);
        if(prdsProgress != null) {
          prdsProgress.incrementProcessed(logger);
        }
      }
    }
    // Compute PLOF values.
    WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    MeanVariance mvplof = new MeanVariance();
    {// compute LOOP_SCORE of each db object
      if(stepprog != null) {
        stepprog.beginStep(4, "Computing PLOF", logger);
      }

      FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        final KNNResult<D> neighbors = knnComp.getKNNForDBID(id, kcomp);
        MeanVariance mv = new MeanVariance();
        // use first kref neighbors as comparison set.
        int ks = 0;
        for(DistanceResultPair<D> neighbor1 : neighbors) {
          if(objectIsInKNN || !neighbor1.getDBID().equals(id)) {
            mv.put(pdists.doubleValue(neighbor1.getDBID()));
            ks++;
            if(ks >= kcomp) {
              break;
            }
          }
        }
        double plof = Math.max(pdists.doubleValue(id) / mv.getMean(), 1.0);
        if(Double.isNaN(plof) || Double.isInfinite(plof)) {
          plof = 1.0;
        }
        plofs.putDouble(id, plof);
        mvplof.put((plof - 1.0) * (plof - 1.0));

        if(progressPLOFs != null) {
          progressPLOFs.incrementProcessed(logger);
        }
      }
    }

    double nplof = lambda * Math.sqrt(mvplof.getMean());
    if(logger.isDebugging()) {
      logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
    }

    // Compute final LoOP values.
    WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    {// compute LOOP_SCORE of each db object
      if(stepprog != null) {
        stepprog.beginStep(5, "Computing LoOP scores", logger);
      }

      FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        loops.putDouble(id, NormalDistribution.erf((plofs.doubleValue(id) - 1) / (nplof * sqrt2)));

        if(progressLOOPs != null) {
          progressLOOPs.incrementProcessed(logger);
        }
      }
    }

    if(stepprog != null) {
      stepprog.setCompleted(logger);
    }

    // Build result representation.
    Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
View Full Code Here

TOP

Related Classes of de.lmu.ifi.dbs.elki.logging.progress.StepProgress

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.