Package cc.mallet.fst.semi_supervised.constraints

Source Code of cc.mallet.fst.semi_supervised.constraints.SelfTransitionGEConstraint

/* Copyright (C) 2010 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

package cc.mallet.fst.semi_supervised.constraints;

import java.util.ArrayList;
import java.util.BitSet;

import cc.mallet.fst.SumLattice;
import cc.mallet.fst.semi_supervised.StateLabelMap;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;

/**
* GE Constraint on the probability of self-transitions in the FST.
*
* @author Gregory Druck
*/
public class SelfTransitionGEConstraint implements GEConstraint {

  private double selfTransProb;
  private double numTokens;
  private double expectation;
  private double weight;
 
 
  /**
   * @param selfTransProb Probability of self-transition
   * @param weight Weight of this constraint in the objective function
   */
  public SelfTransitionGEConstraint(double selfTransProb, double weight) {
    this.selfTransProb = selfTransProb;
    this.weight = weight;
    this.numTokens = 0;
    this.expectation = 0;
  }
 
  private SelfTransitionGEConstraint(double selfTransProb, double weight, double numTokens, double expectation) {
    this.selfTransProb = selfTransProb;
    this.weight = weight;
    this.numTokens = numTokens;
    this.expectation = expectation;
  }
 
  public GEConstraint copy() {
    return new SelfTransitionGEConstraint(selfTransProb, weight, numTokens, expectation);
  }
 
  public boolean isOneStateConstraint() {
    return false;
  }
 
  public void setStateLabelMap(StateLabelMap map) {}
 
  // no pre-processing possible here
  public void preProcess(FeatureVector fv) {}
 
  public BitSet preProcess(InstanceList data) {
    // count number of tokens
    BitSet bitSet = new BitSet(data.size());
    bitSet.set(0, data.size(), true);
    for (Instance instance : data) {
      FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
      this.numTokens += fvs.size();
    }
    return bitSet;
  }   
 
  public double getCompositeConstraintFeatureValue(FeatureVector fv, int ip, int si1, int si2) {
    if (si1 == si2) {
      return this.weight * (selfTransProb / expectation);
    }
    else {
      return this.weight * ((1-selfTransProb) / (numTokens - expectation));
    }
  }

  public double getValue() {
    double selfTransEx = this.expectation / this.numTokens;
    if (selfTransProb == 1) {
      return weight * Math.log(selfTransEx);
    }
    else if (selfTransProb == 0) {
      return weight * Math.log(1-selfTransEx);
    }
   
    return weight * (selfTransProb * (Math.log(selfTransEx) - Math.log(selfTransProb))
      + ((1-selfTransProb) * (Math.log(1-selfTransEx)-Math.log(1-selfTransProb))));
  }

  public void zeroExpectations() {
    this.expectation = 0;
  }
 
  public void computeExpectations(ArrayList<SumLattice> lattices) {
    double[][][] xis;
    for (int i = 0; i < lattices.size(); i++) {
      SumLattice lattice = lattices.get(i);
      xis = lattice.getXis();
      int numStates = xis[0].length;
      FeatureVectorSequence fvs = (FeatureVectorSequence)lattice.getInput();
      for (int ip = 0; ip < fvs.size(); ++ip) {
        for (int si = 0; si < numStates; si++) {
          this.expectation += Math.exp(xis[ip][si][si]);
        }
      }
    }
    System.err.println("Self transition expectation: " + (this.expectation/this.numTokens));
  }
}
TOP

Related Classes of cc.mallet.fst.semi_supervised.constraints.SelfTransitionGEConstraint

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.