Package edu.umd.hooka

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()


    List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("1457");
    vocab.addOrGet("19");

    float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f};     // all same since no stemming or stopword removal
    testOOV("zh", vocab, true, true, zhExpectedOOVRates);
    testOOV("zh", vocab, false, true, zhExpectedOOVRates);
View Full Code Here


  @Test
  public void testTurkishOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/tr-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");
View Full Code Here

    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/tr-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");

    float[] trStopStemExpectedOOVRates = {0.85714287f, 1f, 0.6f, 0f};
    float[] trStopExpectedOOVRates = {1f, 1f, 0.8f, 0.5f};
View Full Code Here

    List<String> sentences = readInput(dir + "data/tokenizer/test/tr-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");

    float[] trStopStemExpectedOOVRates = {0.85714287f, 1f, 0.6f, 0f};
    float[] trStopExpectedOOVRates = {1f, 1f, 0.8f, 0.5f};
    float[] trStemExpectedOOVRates = {0.85714287f, 1f, 0.71428573f, 0.33333334f};
View Full Code Here

    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");

    float[] trStopStemExpectedOOVRates = {0.85714287f, 1f, 0.6f, 0f};
    float[] trStopExpectedOOVRates = {1f, 1f, 0.8f, 0.5f};
    float[] trStemExpectedOOVRates = {0.85714287f, 1f, 0.71428573f, 0.33333334f};
    float[] trExpectedOOVRates = {1f, 1f, 0.85714287f, 0.6666667f};
View Full Code Here

  @Test
  public void testArabicOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/ar-test.tok.stemmed.stop");
    for (String token : sentences.get(0).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("2011");
    float[] arStopStemExpectedOOVRates = {0f, 1f, 0.8181818f, 1f};
    float[] arStopExpectedOOVRates = {0.6666667f, 1f, 0.8181818f, 1f};
    float[] arStemExpectedOOVRates = {0f, 1f, 0.85714287f, 1f};
View Full Code Here

    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/ar-test.tok.stemmed.stop");
    for (String token : sentences.get(0).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("2011");
    float[] arStopStemExpectedOOVRates = {0f, 1f, 0.8181818f, 1f};
    float[] arStopExpectedOOVRates = {0.6666667f, 1f, 0.8181818f, 1f};
    float[] arStemExpectedOOVRates = {0f, 1f, 0.85714287f, 1f};
    float[] arExpectedOOVRates = {0.6666667f, 1f, 0.85714287f, 1f};
View Full Code Here

  }

  @Test
  public void testEnglishOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    vocab.addOrGet("r.d.");
    vocab.addOrGet("craig");
    vocab.addOrGet("dictionari");
    vocab.addOrGet("polynesian");
    vocab.addOrGet("mytholog");
    vocab.addOrGet("greenwood");
View Full Code Here

  @Test
  public void testEnglishOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    vocab.addOrGet("r.d.");
    vocab.addOrGet("craig");
    vocab.addOrGet("dictionari");
    vocab.addOrGet("polynesian");
    vocab.addOrGet("mytholog");
    vocab.addOrGet("greenwood");
    vocab.addOrGet("press");
View Full Code Here

  @Test
  public void testEnglishOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    vocab.addOrGet("r.d.");
    vocab.addOrGet("craig");
    vocab.addOrGet("dictionari");
    vocab.addOrGet("polynesian");
    vocab.addOrGet("mytholog");
    vocab.addOrGet("greenwood");
    vocab.addOrGet("press");
    vocab.addOrGet("new");
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.