VocabularyWritable vocab = new VocabularyWritable();
List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
for (String token : sentences.get(3).split(" ")) {
vocab.addOrGet(token);
}
vocab.addOrGet("1457");
vocab.addOrGet("19");
float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f}; // all same since no stemming or stopword removal
testOOV("zh", vocab, true, true, zhExpectedOOVRates);