Package org.apache.mahout.classifier.bayes.model

Examples of org.apache.mahout.classifier.bayes.model.ClassifierContext


    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }

  public void testResults() throws Exception {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = { "aa", "ff" };
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    System.out.println("Result: " + result);
  }
View Full Code Here


    store.loadFeatureWeight("dd", "e", 50);
    store.updateVocabCount();
  }

  public void test() throws InvalidDatastoreException {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument( document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to e", "e", result.getLabel());

    document = new String[]{"ff"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());

    document = new String[]{"cc"};
    result = classifier.classifyDocument( document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }
View Full Code Here

    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }

  public void testResults() throws Exception {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument( document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    System.out.println("Result: " + result);
  }
View Full Code Here

    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: "
          + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(),
        params.get("defaultCat"));
    final TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {

      for (File file : subdirs) {
        log.info("--------------");
        log.info("Testing: " + file);
        String correctLabel = file.getName().split(".txt")[0];
        final TimingStatistics operationStats = new TimingStatistics();

        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()),
            Charset.forName(params.get("encoding")), false)) {

          Map<String, List<String>> document = new NGrams(line, Integer
              .parseInt(params.get("gramSize"))).generateNGrams();
          for (Map.Entry<String, List<String>> stringListEntry : document
              .entrySet()) {
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(
                strings.toArray(new String[strings.size()]), params
                    .get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel,
View Full Code Here

    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: "
          + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      String className = (String) cmdLine.getValue(analyzerOpt);
      analyzer = Class.forName(className).asSubclass(Analyzer.class)
          .newInstance();
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    }

    log.info("Converting input document to proper format");
    String[] document = BayesFileFormatter.readerToDocument(analyzer,
        new InputStreamReader(new FileInputStream(docPath), Charset
            .forName(encoding)));
    StringBuilder line = new StringBuilder();
    for (String token : document) {
      line.append(token).append(' ');
    }

    List<String> doc = new NGrams(line.toString(), gramSize)
        .generateNGramsWithoutLabel();

    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classifyDocument(doc
        .toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);

  }
View Full Code Here

      }
     
    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      String className = (String) cmdLine.getValue(analyzerOpt);
      analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer(Version.LUCENE_30);
    }
   
    log.info("Converting input document to proper format");

    String[] document =
        BayesFileFormatter.readerToDocument(analyzer,Files.newReader(docPath, Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for (String token : document) {
      line.append(token).append(' ');
    }
   
    List<String> doc = new NGrams(line.toString(), gramSize).generateNGramsWithoutLabel();
   
    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classifyDocument(doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);
   
  }
View Full Code Here

   
  }

  @Test
  public void test() throws Exception {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to e", "e", result.getLabel());
   
    document = new String[] {"dd"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
   
    document = new String[] {"cc"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }
View Full Code Here

    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }

  @Test
  public void testResults() throws Exception {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
  }
View Full Code Here

    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], params.getGramSize()).generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
View Full Code Here

    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], params.getGramSize()).generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.bayes.model.ClassifierContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.