Package com.cloudera.recordbreaker.schemadict

Examples of com.cloudera.recordbreaker.schemadict.SchemaSuggest


      //
      // Now evaluate the dictionary using the "test" set.
      // Be sure to keep a lot of statistics about match failures
      //
      System.err.println("Testing schema dictionary...");
      SchemaSuggest ss = new SchemaSuggest(dictDir);
      ss.setUseAttributeLabels(false);
      TreeMap<Integer, Integer> overallSizes = new TreeMap<Integer, Integer>();
      TreeMap<Integer, Integer> failureSizes = new TreeMap<Integer, Integer>();
      List<Schema> failedSchemas = new ArrayList<Schema>();
      List<SchemaStatisticalSummary> failedSummaries = new ArrayList<SchemaStatisticalSummary>();
      double totalReciprocalRank = 0;
      int i = 0;
      int failures = 0;

      // Iterate through all files in the test dir     
      System.err.println("Examining: " + testDbDir);
      for (File f: testDbDir.listFiles()) {
        try {
          if (f.getName().endsWith(".avro")) {
            String testName = f.getName();
            SchemaStatisticalSummary testSummary = new SchemaStatisticalSummary("input");
            Schema testSchema = testSummary.createSummaryFromData(f);
            int schemaSize = testSchema.getFields().size();
            Integer sizeCount = overallSizes.get(schemaSize);
            if (sizeCount == null) {
              sizeCount = new Integer(0);
            }
            overallSizes.put(schemaSize, new Integer(sizeCount.intValue() + 1));

            System.err.println("Testing against " + testName);
            System.err.println("Schema size is " + schemaSize);

            // Go through the top-MAX_MAPPINGS related schemas, as returned by SchemaDictionary
            int rank = 1;
            long startTime = System.currentTimeMillis();
            List<DictionaryMapping> mappings = ss.inferSchemaMapping(f, MAX_MAPPINGS);
            long endTime = System.currentTimeMillis();
            System.err.println("  it took " + ((endTime - startTime) / 1000.0) + ", returned " + mappings.size() + " elts");
       
            double scores[] = new double[mappings.size()];
            boolean foundGoal = false;
View Full Code Here

TOP

Related Classes of com.cloudera.recordbreaker.schemadict.SchemaSuggest

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.