Package com.facebook.LinkBench

Source Code of com.facebook.LinkBench.DistributionTestBase$UniformBucketer

/*
* Copyright 2012, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.LinkBench;

import java.util.Properties;
import java.util.Random;

import junit.framework.TestCase;

import org.junit.Test;

import com.facebook.LinkBench.distributions.ProbabilityDistribution;

/**
* This test implements generic unit tests for different implementations of
* ProbabilityDistribution.
*
* Most of these tests are either sanity tests (check that output is within
* expected range and obeys basic invariants), and consistency tests
* (check that the output of two different methods is consistent).
*
* While these tests go a long way to checking the consistency of the
* behavior of the ProbabilityDistribution, it cannot check that the
* specific correct values are generated: it is helpful to implement
* additional tests for each concrete implementation.
*
* @author tarmstrong
*/
public abstract class DistributionTestBase extends TestCase {

  protected abstract ProbabilityDistribution getDist();

  protected Properties getDistParams() {
    return new Properties();
  }

  /** Number of cdf checks to perform */
  protected int cdfChecks() {
    return 50000;
  }

  /** Number of pdf checks */
  protected int pdfChecks() {
    return 50000;
  }

  protected Bucketer getBucketer() {
    return new UniformBucketer(cdfChecks());
  }

  /** Percentage difference between cdf and choose() to tolerate */
  protected double tolerance() {
    return 0.002;
  }

  public Random initRandom(String testName) {
    long seed = System.currentTimeMillis();
    System.err.println("Choose seed " + seed + " for test " + testName);
    return new Random(seed);
  }

  /**
   * Check a few invariants cdf should adhere to
   */
  @Test
  public void testCDFSanity() {
    ProbabilityDistribution dist = getDist();
    long min = 453, max = 26546454;
    dist.init(min, max, getDistParams(), "");

    assertEquals(dist.cdf(min-1), 0.0);
    assertEquals(dist.cdf(min-234321), 0.0);
    assertEquals(dist.cdf(max), 1.0);
    assertEquals(dist.cdf(max+2343242224234L), 1.0);

    // Check cdf is monotonically increasing
    double last = 0.0;
    long step = (max - min) / cdfChecks();
    for (long id = min; id < max; id += step) {
      double p = dist.cdf(id);
      assertTrue(p >= last);
      last = p;
    }
  }

  @Test
  public void testPDFSanity() {
    ProbabilityDistribution dist = getDist();
    long min = 453, max = 26546454;
    dist.init(min, max, getDistParams(), "");

    assertEquals(0.0, dist.pdf(min-1));
    assertEquals(0.0, dist.pdf(min-234321));
    assertEquals(0.0, dist.pdf(max));
    assertEquals(0.0, dist.pdf(max+2343242224234L));

    // Check pdf is in correct range
    double total = 0.0;
    long step = (max - min) / pdfChecks();
    for (long id = min; id < max; id += step) {
      double p = dist.pdf(id);
      if ((id - min) < step * 100) {
        System.err.println("p(X=" + id + ") = " + p);
      }
      assertTrue(p >= 0.0);
      assertTrue(p <= 1.0);
      total += p;
    }
    assert(total <= 1.0);
  }

  @Test
  public void testPDFSum() {
    ProbabilityDistribution dist = getDist();
    long min = 1, max = 50;
    dist.init(min, max, getDistParams(), "");

    // Check sum of pdf over small range
    // Order of least to most probably to minimize sum error
    double total = 0.0;
    for (long id = max - 1; id >= min; id--) {
      double p = dist.pdf(id);
      assertTrue(p >= 0.0);
      assertTrue(p <= 1.0);
      System.err.println("p(X=" + id + ") = " + p);
      total += p;
    }
    System.err.println("Total = " + total);
    // Give significant tolerance due to rounding errors
    assertTrue(total <= 1.05);
    assertTrue(total >= 0.95 );
  }

  @Test
  public void testChooseSanity() {
    ProbabilityDistribution dist = getDist();
    long min = 453, max = 26546454;
    dist.init(min, max, getDistParams(), "");
    Random rng = initRandom("testChooseSanity");
    for (int i = 0; i < 100000; i++) {
      long id = dist.choose(rng);
      assertTrue(id >= min);
      assertTrue(id < max);
    }
  }

  /**
   * Check that choose() and cdf() are returning consistent results
   * (i.e. that the result of choose are distributed according to cdf)
   */
  @Test
  public void testCDFChooseConsistency() {

    long min = 100, max = 100000;
    Bucketer bucketer = getBucketer();
    int bucketCount = bucketer.getBucketCount();
    int buckets[] = new int[bucketCount];

    long n = max - min;

    Random rng = initRandom("testCDFChooseConsistency");
    ProbabilityDistribution dist = getDist();
    dist.init(min, max, getDistParams(), "");

    int trials = 1000000;
    for (int i = 0; i < trials; i++) {
      long id = dist.choose(rng);
      long off = id - min;
      int bucket = bucketer.chooseBucket(off, n);
      buckets[bucket]++;
    }

    int totalCount = 0;
    boolean fail = false;
    for (int b = 0; b < bucketCount; b++) {
      totalCount += buckets[b];
      long bucketTop = bucketer.bucketMax(b, n) + min;
      double actCDF = ((double)totalCount) / trials;
      double expCDF = dist.cdf(bucketTop);
      // 0.2% error
      if (Math.abs(expCDF - actCDF) > tolerance()) {
        System.err.println(String.format("Divergence between CDF and " +
            "choose function: P(X <= %d) act: %f exp: %f", bucketTop,
            actCDF, expCDF));
        fail = true;
      }
    }
    if (fail) {
      fail("Divergence between cdf and choose methods: see preceding output " +
          "for details");
    }
  }

  @Test
  public void testCDFPDFConsistency() {
    long min = 252352, max = 6544543;
    ProbabilityDistribution dist = getDist();
    dist.init(min, max, getDistParams(), "");

    long step = (max - min) / cdfChecks();
    for (long id = min + 1; id < max; id += step) {
      double c = dist.cdf(id);
      double c1 = dist.cdf(id - 1);
      double p = dist.pdf(id);

      double err = Math.abs((c - c1) - p);
      if (err > 0.0001) {
        fail(String.format("Error > 0.001: cdf(%d) - cdf(%d) = %f, pdf(%d) = %f",
                              id, id -1, c1 - c, id, p));
      }
    }
  }

  @Test
  public void testQuantileSanity() {
    long min = 0, max = 1000;
    ProbabilityDistribution dist = getDist();
    dist.init(min, max, getDistParams(), "");

    long last = dist.quantile(0.0);
    for (double q = 0.0; q <= 1.0; q += 0.125) {
      long id = dist.quantile(q);
      System.err.format("quantile(%f) = %d\n", q, id);
      assertTrue(id >= min);
      assertTrue(id < max);
      assertTrue(id >= last);
      last = id;
    }

    // min should be most probable, and therefore should definitely
    // be returned by quantile
    assertEquals(min, dist.quantile(0.0));
  }

  /**
   * Different distributions should be bucketed in different ways
   * to test their fit.  For example, the zipf distribution treats
   * lower keys specially so we want to have better resolution for
   * those
   */
  static interface Bucketer {
    public int getBucketCount();
    public int chooseBucket(long i, long n);
    public long bucketMax(int bucket, long n);
  }

  static class UniformBucketer implements Bucketer {
    final int bucketCount;

    public UniformBucketer(int bucketCount) {
      this.bucketCount = bucketCount;
    }

    public int getBucketCount() {
      return bucketCount;
    }

    public int chooseBucket(long i, long n) {
      return (int)((i * bucketCount) / n);
    }

    public long bucketMax(int bucket, long n) {
      return ((long)((((double)bucket+1)/bucketCount)*n))  - 1;
    }
  }
}
TOP

Related Classes of com.facebook.LinkBench.DistributionTestBase$UniformBucketer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.