Examples of addOrGet()


Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

        Pattern p = Pattern.compile("(.+)\\tentropy .+nTrans");
        Matcher m = p.matcher(line);
        if ( m.find() ) {
          cur = m.group(1);

          int gerIndex = srcVocab.addOrGet(cur)
          logger.debug("Found: "+cur+" with index: "+gerIndex);


          List<PairOfIntFloat> indexProbPairs = new ArrayList<PairOfIntFloat>();
          float sumOfProbs = 0.0f;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

          topTrans.clear();
          earlyTerminate = false;    // reset status
          skipTerm = false;
          prev = srcTerm;
          int prevIndex = curIndex;
          curIndex = srcVocab.addOrGet(srcTerm);
          if(curIndex <= prevIndex){
            // we've seen this foreign term before. probably due to tokenization or sorting error in aligner. just ignore.
            logger.debug("FLAG: "+line);
            curIndex = prevIndex;    // revert curIndex value since we're skipping this one
            skipTerm = true;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

        Pattern p = Pattern.compile("(.+)\\tentropy .+nTrans");
        Matcher m = p.matcher(line);
        if(m.find()){
          cur = m.group(1);

          int gerIndex = srcVocab.addOrGet(cur)
          logger.debug("Found: "+cur+" with index: "+gerIndex);


          List<PairOfIntFloat> indexProbPairs = new ArrayList<PairOfIntFloat>();
          float sumOfProbs = 0.0f;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

          topTrans.clear();
          earlyTerminate = false;    // reset status
          skipTerm = false;
          prev = srcTerm;
          int prevIndex = curIndex;
          curIndex = srcVocab.addOrGet(srcTerm);
          if(curIndex <= prevIndex){
            // we've seen this foreign term before. probably due to tokenization or sorting error in aligner. just ignore.
            logger.debug("FLAG: "+line);
            curIndex = prevIndex;    // revert curIndex value since we're skipping this one
            skipTerm = true;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

          topTrans.clear();
          earlyTerminate = false;    //reset status
          skipTerm = false;
          prev = srcTerm;
          int prevIndex = curIndex;
          curIndex = srcVocab.addOrGet(srcTerm);
          if(curIndex <= prevIndex){
            //we've seen this foreign term before. probably due to tokenization or sorting error in aligner. just ignore.
            curIndex = prevIndex;    //revert curIndex value since we're skipping this one
            skipTerm = true;
            continue;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

        Pattern p = Pattern.compile("(.+)\\tentropy .+nTrans");
        Matcher m = p.matcher(line);
        if(m.find()){
          cur = m.group(1);

          int gerIndex = srcVocab.addOrGet(cur)
          logger.debug("Found: "+cur+" with index: "+gerIndex);


          List<PairOfIntFloat> indexProbPairs = new ArrayList<PairOfIntFloat>();
          float sumprob = 0.0f;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

        Pattern p = Pattern.compile("(.+)\\tentropy .+nTrans");
        Matcher m = p.matcher(line);
        if(m.find()){
          cur = m.group(1);

          int gerIndex = srcVocab.addOrGet(cur)
          logger.debug("Found: "+cur+" with index: "+gerIndex);


          List<PairOfIntFloat> indexProbPairs = new ArrayList<PairOfIntFloat>();
          float sumOfProbs = 0.0f;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

          topTrans.clear();
          earlyTerminate = false;    // reset status
          skipTerm = false;
          prev = srcTerm;
          int prevIndex = curIndex;
          curIndex = srcVocab.addOrGet(srcTerm);
          if(curIndex <= prevIndex){
            // we've seen this foreign term before. probably due to tokenization or sorting error in aligner. just ignore.
            logger.debug("FLAG: "+line);
            curIndex = prevIndex;    // revert curIndex value since we're skipping this one
            skipTerm = true;
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

  @Test
  public void testChineseOOVs() {
    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("1457");
    vocab.addOrGet("19");

    float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f};     // all same since no stemming or stopword removal
View Full Code Here

Examples of edu.umd.hooka.VocabularyWritable.addOrGet()

    VocabularyWritable vocab = new VocabularyWritable();
    List<String> sentences = readInput(dir + "data/tokenizer/test/zh-test.tok.stemmed.stop");
    for (String token : sentences.get(3).split(" ")) {
      vocab.addOrGet(token);
    }
    vocab.addOrGet("1457");
    vocab.addOrGet("19");

    float[] zhExpectedOOVRates = {0.6666667f, 0.8666667f, 0.72727275f, 0f};     // all same since no stemming or stopword removal
    testOOV("zh", vocab, true, true, zhExpectedOOVRates);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.