Package edu.umd.hooka

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()


    vocab.addOrGet("york");
    vocab.addOrGet("1989");
    vocab.addOrGet("24");
    vocab.addOrGet("26");
    vocab.addOrGet("english");
    vocab.addOrGet("tree");
    vocab.addOrGet("einbaum");

    float[] enStopStemExpectedOOVRates = {1f, 18/19f, 4/7.0f, 0f};
    float[] enStopExpectedOOVRates = {1f, 18/19f, 4/7.0f, 2/12f};
    float[] enStemExpectedOOVRates = {1f, 36/37f, 15/18.0f, 7/19f};
View Full Code Here


    vocab.addOrGet("1989");
    vocab.addOrGet("24");
    vocab.addOrGet("26");
    vocab.addOrGet("english");
    vocab.addOrGet("tree");
    vocab.addOrGet("einbaum");

    float[] enStopStemExpectedOOVRates = {1f, 18/19f, 4/7.0f, 0f};
    float[] enStopExpectedOOVRates = {1f, 18/19f, 4/7.0f, 2/12f};
    float[] enStemExpectedOOVRates = {1f, 36/37f, 15/18.0f, 7/19f};
    float[] enExpectedOOVRates = {1f, 36/37f, 15/18.0f, 9/19f};
View Full Code Here

  @Test
  public void testChineseOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("1457");
    vocab.addOrGet("19");

    float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f};     // all same since no stemming or stopword removal
View Full Code Here

    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("1457");
    vocab.addOrGet("19");

    float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f};     // all same since no stemming or stopword removal
    testOOV("zh", vocab, true, true, zhExpectedOOVRates);
View Full Code Here

    List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("1457");
    vocab.addOrGet("19");

    float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f};     // all same since no stemming or stopword removal
    testOOV("zh", vocab, true, true, zhExpectedOOVRates);
    testOOV("zh", vocab, false, true, zhExpectedOOVRates);
View Full Code Here

  @Test
  public void testTurkishOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/tr-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");
View Full Code Here

    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/tr-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");

    float[] trStopStemExpectedOOVRates = {0.85714287f, 1f, 0.6f, 0f};
    float[] trStopExpectedOOVRates = {1f, 1f, 0.8f, 0.5f};
View Full Code Here

    List<String> sentences = readInput(dir + "data/tokenizer/test/tr-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");

    float[] trStopStemExpectedOOVRates = {0.85714287f, 1f, 0.6f, 0f};
    float[] trStopExpectedOOVRates = {1f, 1f, 0.8f, 0.5f};
    float[] trStemExpectedOOVRates = {0.85714287f, 1f, 0.71428573f, 0.33333334f};
View Full Code Here

    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("ispanyol");
    vocab.addOrGet("isim");
    vocab.addOrGet("10");

    float[] trStopStemExpectedOOVRates = {0.85714287f, 1f, 0.6f, 0f};
    float[] trStopExpectedOOVRates = {1f, 1f, 0.8f, 0.5f};
    float[] trStemExpectedOOVRates = {0.85714287f, 1f, 0.71428573f, 0.33333334f};
    float[] trExpectedOOVRates = {1f, 1f, 0.85714287f, 0.6666667f};
View Full Code Here

  @Test
  public void testArabicOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/ar-test.tok.stemmed.stop");
    for (String token : sentences.get(0).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("2011");
    float[] arStopStemExpectedOOVRates = {0f, 1f, 0.8181818f, 1f};
    float[] arStopExpectedOOVRates = {0.6666667f, 1f, 0.8181818f, 1f};
    float[] arStemExpectedOOVRates = {0f, 1f, 0.85714287f, 1f};
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.