Package com.mapr.synth.samplers

Examples of com.mapr.synth.samplers.SchemaSampler


    // this isn't quite a unit test.  It produces files to be visualized with R
    @Test
    public void testCompromise() throws IOException, ParseException {
        SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = df.parse("2014-01-01 00:00:00").getTime();
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema013.json"), Charsets.UTF_8).read());

        long exploitStart = df.parse("2014-01-20 00:00:00").getTime();
        long exploitEnd = df.parse("2014-02-20 00:00:00").getTime();
        int exploitStartDay = (int) TimeUnit.DAYS.convert(exploitStart - start, TimeUnit.MILLISECONDS);

        int[] transactionsByDay = new int[DAYS_COUNTED];
        int[] compromiseByDay = new int[DAYS_COUNTED];
        int[] fraudByDay = new int[DAYS_COUNTED];

        Multiset<Integer> fraudUserCounts = HashMultiset.create();
        Multiset<Integer> nonfraudUserCounts = HashMultiset.create();
        Multiset<Integer> allMerchantCounts = HashMultiset.create();
        int fraudAccounts = 0;
        Set<Integer> merchantHistory = Sets.newHashSet();

        // these collect the evolution of the contingency table for just merchant 0 and are indexed by time relative to exploit window.
        int exploitLength = (int) (TimeUnit.DAYS.convert(exploitEnd - exploitStart, TimeUnit.MILLISECONDS)) + 1;
//        exploitLength = 5;
        int[] atmTotal = new int[exploitLength];
        int[] atmFraud = new int[exploitLength];
        int[] atmNonFraud = new int[exploitLength];
        int[] nonAtmFraud = new int[exploitLength];
        int[] nonAtmNonFraud = new int[exploitLength];

        for (int userId = 0; userId < USER_COUNT; userId++) {
            JsonNode sample = s.sample();
            merchantHistory.clear();
            boolean userHasFraud = false;

            int[] hasFraudPerUser = new int[exploitLength];
View Full Code Here


import static org.junit.Assert.*;

public class SchemaSamplerTest {
    @Test
    public void testFieldNames() throws IOException {
        SchemaSampler s = new SchemaSampler("[{\"name\":\"id\", \"class\":\"id\"}, {\"name\":\"foo\", \"class\":\"address\"}, {\"name\":\"bar\", \"class\":\"date\", \"format\":\"yy-MM-dd\"}, {\"name\":\"baz\", \"class\":\"foreign-key\", \"size\":1000, \"skew\":1}]");
        assertEquals("[id, foo, bar, baz]", Iterables.toString(s.getFieldNames()));
        System.out.printf("%s\n", Iterables.toString(s.sample()));
        System.out.printf("%s\n", Iterables.toString(s.sample()));
        System.out.printf("%s\n", Iterables.toString(s.sample()));
        System.out.printf("%s\n", Iterables.toString(s.sample()));
        System.out.printf("%s\n", Iterables.toString(s.sample()));
    }
View Full Code Here

        System.out.printf("%s\n", Iterables.toString(s.sample()));
    }

    @Test
    public void testInt() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema001.json"), Charsets.UTF_8).read());
        Multiset<String> counts = HashMultiset.create();
        for (int i = 0; i < 10000; i++) {
            counts.add(s.sample().get("size").asText());
        }
        for (int i = 10; i < 99; i++) {
            Assert.assertTrue(counts.elementSet().contains(i + ""));
        }
        assertEquals(99 - 10, counts.elementSet().size());
View Full Code Here

        assertEquals(99 - 10, counts.elementSet().size());
    }

    @Test
    public void testString() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema002.json"), Charsets.UTF_8).read());
        Multiset<String> counts = HashMultiset.create();
        double n = 10000;
        for (int i = 0; i < n; i++) {
            counts.add(s.sample().get("foo").asText());
        }
        check(counts, 0.95 / 2, "YES");
        check(counts, 0.05 / 2, "NO");
        check(counts, 1.00 / 2, "NA");
    }
View Full Code Here

        assertEquals(p, counts.count(s) / n, Math.sqrt(n * p * (n - p)));
    }

    @Test
    public void testSeveral() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema003.json"), Charsets.UTF_8).read());
        Multiset<String> gender = HashMultiset.create();
        Pattern namePattern = Pattern.compile("[A-Z][a-z]+ [A-Z][a-z]+");
        Pattern addressPattern = Pattern.compile("[0-9]+ [A-Z][a-z]+ [A-Z][a-z]+ [A-Z][a-z]+");
        Pattern datePattern1 = Pattern.compile("[01][0-9]/[0123][0-9]/20[012][0-9]");
        Pattern datePattern2 = Pattern.compile("2014-0[12]-[0123][0-9]");
        Pattern datePattern3 = Pattern.compile("[01][0-9]/[0123][0-9]/199[5-9]");
        for (int i = 0; i < 10000; i++) {
            JsonNode record = s.sample();
            assertEquals(i, record.get("id").asInt());
            assertTrue(namePattern.matcher(record.get("name").asText()).matches());
            assertTrue(addressPattern.matcher(record.get("address").asText()).matches());
            assertTrue(datePattern1.matcher(record.get("first_visit").asText()).matches());
            assertTrue(datePattern2.matcher(record.get("second_date").asText()).matches());
View Full Code Here

        check(gender, 0.02 * (1 - 0.02), "OTHER");
    }

    @Test
    public void testMisc() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema004.json"), Charsets.UTF_8).read());
        Multiset<String> country = HashMultiset.create();
        Multiset<String> language = HashMultiset.create();
        Multiset<String> browser = HashMultiset.create();
        Multiset<String> state = HashMultiset.create();
        Multiset<String> os = HashMultiset.create();
        for (int i = 0; i < 10000; i++) {
            JsonNode record = s.sample();
            country.add(record.get("co").asText());
            browser.add(record.get("br").asText());
            language.add(record.get("la").asText());
            state.add(record.get("st").asText());
            os.add(record.get("os").asText());
View Full Code Here

        assertEquals(5876.0, os.count("win7"), 120);
    }

    @Test
    public void testSequence() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read());
        OnlineSummarizer s0 = new OnlineSummarizer();
        OnlineSummarizer s1 = new OnlineSummarizer();
        for (int i = 0; i < 10000; i++) {
            JsonNode x = s.sample();
            s0.add(Iterables.size(x.get("c")));
            s1.add(Iterables.size(x.get("d")));

            for (JsonNode n : x.get("d")) {
                int z = n.asInt();
View Full Code Here

    }


    @Test
    public void testSequenceArray() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema006.json"), Charsets.UTF_8).read());
        for (int i = 0; i < 10; i++) {
            JsonNode x = s.sample();
            Iterator<JsonNode> values = x.get("x").elements();
            assertEquals(3, values.next().asInt());
            assertEquals(6, values.next().asInt());
            assertEquals(8, values.next().asInt());
View Full Code Here

        }
    }

    @Test
    public void testMap() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema011.json"), Charsets.UTF_8).read());
        for (int i = 0; i < 100; i++) {
            JsonNode x = s.sample();
            assertEquals(i, x.get("id").asInt());
            int v = x.get("stuff").get("a").asInt();
            assertTrue(v == 3 || v == 4);
            v = x.get("stuff").get("b").asInt();
            assertTrue(v == 4 || v == 5);
View Full Code Here

    }

    @Test
    public void testSkewedInteger() throws IOException {
        // will give fields x, y, z, q with different skewness
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema007.json"), Charsets.UTF_8).read());

        SortedMultiset<Integer> x = TreeMultiset.create();
        SortedMultiset<Integer> y = TreeMultiset.create();
        SortedMultiset<Integer> z = TreeMultiset.create();
        SortedMultiset<Integer> q = TreeMultiset.create();
        for (int i = 0; i < 10000; i++) {
            JsonNode record = s.sample();
            x.add(record.get("x").asInt());
            y.add(record.get("y").asInt());
            z.add(record.get("z").asInt());
            q.add(record.get("q").asInt());
        }
View Full Code Here

TOP

Related Classes of com.mapr.synth.samplers.SchemaSampler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.