Package edu.umd.hooka.alignment.model1

Source Code of edu.umd.hooka.alignment.model1.Model1Base

package edu.umd.hooka.alignment.model1;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;

import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.OutputCollector;

import edu.umd.hooka.Int2FloatMap;
import edu.umd.hooka.PhrasePair;
import edu.umd.hooka.alignment.AlignmentModel;
import edu.umd.hooka.alignment.PartialCountContainer;
import edu.umd.hooka.ttables.TTable;

public abstract class Model1Base extends AlignmentModel {

  protected final int maxF = 214350; // TODO: fix
  protected boolean _includeEnglishNullWord = true;

  public Model1Base(boolean incNullWord) {
    _includeEnglishNullWord = incNullWord;
  }

  TreeMap<IntWritable, Int2FloatMap> counts =
    new TreeMap<IntWritable, Int2FloatMap>();
  IntWritable nullWord = new IntWritable(0);

  public void writePartialCounts(OutputCollector<IntWritable,PartialCountContainer> output) throws IOException
  {
    PartialCountContainer pcc = new PartialCountContainer();
    Iterator<Map.Entry<IntWritable, Int2FloatMap>> i = counts.entrySet().iterator();
    while (i.hasNext()) {
      Map.Entry<IntWritable, Int2FloatMap> p = i.next();
      pcc.setContent(p.getValue().getAsIndexedFloatArray());
      output.collect(p.getKey(), pcc);
      i.remove();
    }
  }
 
  public void addPartialTranslationCountsToTTable(TTable tcounts) {
    Iterator<Map.Entry<IntWritable, Int2FloatMap>> i = counts.entrySet().iterator();
    while (i.hasNext()) {
      Map.Entry<IntWritable, Int2FloatMap> p = i.next();
      int ei = p.getKey().get();
      for (Map.Entry<Integer, FloatWritable> f:p.getValue().entrySet()) {
        tcounts.add(ei, f.getKey(), f.getValue().get());
      }
      i.remove();
    }
  }
 
  /*tcmap is actually a 2D array projected onto a linear space*/ 
  FloatWritable[] tcmap = null;
  int width = 0;
  protected void initializeCountTableForSentencePair(PhrasePair pp) {
    int ew[] = pp.getE().getWords();
    int fw[] = pp.getF().getWords();
    width = fw.length;
    // add null word to the beginning of e sentence:
    tcmap = new FloatWritable[(ew.length+1) * fw.length];
    int c = 0;
    Int2FloatMap ecm = null;
    if (_includeEnglishNullWord) {
      ecm = counts.get(nullWord);
      if (ecm == null) {
        ecm = new Int2FloatMap();
        counts.put(nullWord, ecm);
      }
      for (int fi:fw) {
        ecm.createIfMissing(fi);
        tcmap[c] = ecm.getFloatWritable(fi);
        c++;
      }
    } else { c += fw.length; }
    for (int ei:ew) {
      IntWritable cew = new IntWritable(ei);
      ecm = counts.get(cew);
      if (ecm == null) {
        ecm = new Int2FloatMap();
        counts.put(cew, ecm);
      }
      for (int fi:fw) {
        ecm.createIfMissing(fi);
        tcmap[c] = ecm.getFloatWritable(fi);
        c++;
      }
    }
  }
 
  /**
   * Normally, i is a zero based array, but since E
   * may have a null word, in this case, i=0 refers to the
   * null word, i=1 refers to the first word.  For j,
   * j=0 refers to the first word.
   */
  protected final int getTranslationCoord(int i_plus1, int j) {
    return i_plus1 * width + j;
  }
 
  protected final void addTranslationCount(int i_plus1, int j, float v) {
    if (v == 0.0f) return;
    int coord = getTranslationCoord(i_plus1, j);
    if (tcmap[coord] == null) {
      throw new RuntimeException("isNull(" + i_plus1 + "," + j +")");
    }
    //add v to existing count
    tcmap[coord].set(tcmap[coord].get() + v);
  }

  protected final void addTranslationCount(int coord, float v) {
    if (v == 0.0f) return;
    tcmap[coord].set(tcmap[coord].get() + v);
  }
}
TOP

Related Classes of edu.umd.hooka.alignment.model1.Model1Base

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.