VocabularyWritable vocab = new VocabularyWritable();
List<String> sentences = readInput(dir + "data/tokenizer/test/ar-test.tok.stemmed.stop");
for (String token : sentences.get(0).split(" ")) {
vocab.addOrGet(token);
}
vocab.addOrGet("2011");
float[] arStopStemExpectedOOVRates = {0f, 1f, 0.8181818f, 1f};
float[] arStopExpectedOOVRates = {0.6666667f, 1f, 0.8181818f, 1f};
float[] arStemExpectedOOVRates = {0f, 1f, 0.85714287f, 1f};
float[] arExpectedOOVRates = {0.6666667f, 1f, 0.85714287f, 1f};