Package edu.cmu.sphinx.linguist.language.grammar

Source Code of edu.cmu.sphinx.linguist.language.grammar.FSTGrammar

/*
* Copyright 1999-2002 Carnegie Mellon University. 
* Portions Copyright 2002 Sun Microsystems, Inc. 
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.linguist.language.grammar;

import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.util.ExtendedStreamTokenizer;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.props.*;

import java.io.IOException;
import java.util.*;

/**
* Loads a grammar from a file representing a finite-state transducer (FST) in the 'ARPA' grammar format. The ARPA FST
* format is like so (the explanation of the format is below): <br>
* <p/>
* <pre>
*  I 2
*  F 0 2.30259
*  T 0 1 &lt;unknown&gt; &lt;unknown&gt; 2.30259
*  T 0 4 wood wood 1.60951
*  T 0 5 cindy cindy 1.60951
*  T 0 6 pittsburgh pittsburgh 1.60951
*  T 0 7 jean jean 1.60951
*  F 1 2.89031
*  T 1 0 , , 0.587725
*  T 1 4 wood wood 0.58785
*  F 2 3.00808
*  T 2 0 , , 0.705491
*  T 2 1 &lt;unknown&gt; &lt;unknown&gt; 0.58785
*  F 3 2.30259
*  T 3 0
*  F 4 2.89031
*  T 4 0 , , 0.587725
*  T 4 6 pittsburgh pittsburgh 0.58785
*  F 5 2.89031
*  T 5 0 , , 0.587725
*  T 5 7 jean jean 0.58785
*  F 6 2.89031
*  T 6 0 , , 0.587725
*  T 6 5 cindy cindy 0.58785
*  F 7 1.28093
*  T 7 0 , , 0.454282
*  T 7 4 wood wood 1.28093
*   </pre>
* <p/>
* <b>Key: </b>
* <p/>
* <pre>
*  I - initial node, so &quot;I 2&quot; means node 2 is the initial node
*  F - final node, e.g., &quot;F 0 2.30259&quot; means that node 0 is a final node,
*  and the probability of finishing at node 0 is 2.30259 (in -ln)
*  T - transition, &quot;T 0 4 wood wood 1.60951&quot; means &quot;transitioning from
*  node 0 to node 4, the output is wood and the machine is now
*  in the node wood, and the probability associated with the
*  transition is 1.60951 (in -ln)&quot;. &quot;T 6 0 , , 0.587725&quot; is
*  a backoff transition, and the output is null (epsilon in
*  the picture), and the machine is now in the null node.
*   </pre>
* <p/>
* <p/>
* Probabilities read in from the FST file are in negative natural log format and are converted to the internal logMath
* log base.
* <p/>
* As the FST file is read in, a Grammar object that is structurally equivalent to the FST is created. The steps of
* converting the FST file to a Grammar object are: <ol>
* <p/>
* <li><b>Create all the Grammar nodes </b> <br> Go through the entire FST file and for each word transition, take the
* destination node ID and create a grammar node using that ID. These nodes are kept in a hashmap to make sure they
* are created once for each ID. Therefore, we get one word per grammar node.</li>
* <p/>
* <br>
* <p/>
* <li><b>Create an end node for each Grammar node </b> <br> This is end node is used for backoff transitions into the
* Grammar node, so that it will not go through the word itself, but instead go directly to the end of the word.
* Moreover, we also add an <b>optional </b> silence node between the grammar node and its end node. The result of this
* step on each grammar node (show in Figure 1 below as the circle with "word") is as follows. The end node is the empty
* circle at the far right: <br> <img src="doc-files/fst-end-node.jpg"> <br> <b>Figure 1: Addition of end node and the
* <i>optional </i> silence. </b> </li>
* <p/>
* <br>
* <p/>
* <li><b>Create the transitions </b> <br> Read through the entire FST file, and for each line indicating a transition,
* connect up the corresponding Grammar nodes. Backoff transitions and null transitions (i.e., the ones that do not
* output a word) will be linked to the end node of a grammar node.</li>
* <p/>
* </ol>
*/

public class FSTGrammar extends Grammar {

    /** The property for the location of the FST n-gram file. */
    @S4String(defaultValue = "default.arpa_gram")
    public final static String PROP_PATH = "path";

    // TODO: If this property turns out to be worthwhile, turn this
    // into a full fledged property
    private boolean addInitialSilenceNode;

    // TODO: If this property turns out to be worthwhile, turn this
    // into a full fledged property

    // ------------------------------
    // Configuration data
    // -------------------------------

    private boolean addOptionalSilence;
    private final boolean ignoreUnknownTransitions = true;
    private String path;
    private LogMath logMath;

    private final Map<String, GrammarNode> nodes = new HashMap<String, GrammarNode>();
    private final Set<GrammarNode> expandedNodes = new HashSet<GrammarNode>();


    /**
     * Create class from reference text (not implemented).
     *
     * @param bogusText dummy variable
     */
    @Override
    protected GrammarNode createGrammar(String bogusText)
            throws NoSuchMethodException {
        throw new NoSuchMethodException("Does not create "
                + "grammar with reference text");
    }


    public FSTGrammar(String path, boolean showGrammar, boolean optimizeGrammar, boolean addSilenceWords, boolean addFillerWords, Dictionary dictionary) {
        super(showGrammar,optimizeGrammar,addSilenceWords,addFillerWords,dictionary);
        this.path = path;
        logMath = LogMath.getLogMath();
    }

    public FSTGrammar() {

    }


    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
    */
    @Override
    public void newProperties(PropertySheet ps) throws PropertyException {
        super.newProperties(ps);
       
        path = ps.getString(PROP_PATH);
    }


    /**
     * Creates the grammar.
     *
     * @return the initial node for the grammar.
     */
    @Override
    protected GrammarNode createGrammar() throws IOException {

        GrammarNode initialNode = null;
        GrammarNode finalNode = null;

        // first pass create the FST nodes
        int maxNodeId = createNodes(path);

        // create the final node:
        finalNode = createGrammarNode(++maxNodeId, Dictionary.SILENCE_SPELLING);
        finalNode.setFinalNode(true);

        // replace each word node with a pair of nodes, which
        // consists of the word node and a new dummy end node, which is
        // for adding null or backoff transitions
        maxNodeId = expandWordNodes(maxNodeId);

        ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true);

        // Second pass, add all of the arcs

        while (!tok.isEOF()) {
            String token;
            tok.skipwhite();
            token = tok.getString();

            // System.out.println(token);

            if (token == null) {
                break;

            } else if (token.equals("I")) {
                assert initialNode == null;
                int initialID = tok.getInt("initial ID");
                String nodeName = "G" + initialID;

                // TODO: FlatLinguist requires the initial grammar node
                // to contain a single silence. We'll do that for now,
                // but once the FlatLinguist is fixed, this should be
                // returned to its former method of creating an empty
                // initial grammar node
                //          initialNode = createGrammarNode(initialID, false);

                initialNode = createGrammarNode(initialID,
                        Dictionary.SILENCE_SPELLING);
                nodes.put(nodeName, initialNode);

                // optionally add a silence node
                if (addInitialSilenceNode) {
                    GrammarNode silenceNode =
                            createGrammarNode(++maxNodeId,
                                    Dictionary.SILENCE_SPELLING);
                    initialNode.add(silenceNode, LogMath.LOG_ONE);
                    silenceNode.add(initialNode, LogMath.LOG_ONE);
                }

            } else if (token.equals("T")) {
                int thisID = tok.getInt("this id");
                int nextID = tok.getInt("next id");

                GrammarNode thisNode = get(thisID);
                GrammarNode nextNode = get(nextID);

                // if the source node is an FSTGrammarNode, we want
                // to join the endNode to the destination node

                if (hasEndNode(thisNode)) {
                    thisNode = getEndNode(thisNode);
                }

                float lnProb = 0f;        // negative natural log
                String output = tok.getString();

                if (output == null || output.equals(",")) {

                    // these are epsilon (meaning backoff) transitions

                    if (output != null && output.equals(",")) {
                        tok.getString(); // skip the word
                        lnProb = tok.getFloat("probability");
                    }

                    // if the destination node has been expanded
                    // we actually want to add the backoff transition
                    // to the endNode

                    if (hasEndNode(nextNode)) {
                        nextNode = getEndNode(nextNode);
                    }

                } else {
                    String word = tok.getString();     // skip words
                    lnProb = tok.getFloat("probability");

                    if (ignoreUnknownTransitions && word.equals("<unknown>")) {
                        continue;
                    }
                    /*
                    * System.out.println(nextNode + ": " + output);
                    */
                    assert hasWord(nextNode);
                }

                thisNode.add(nextNode, convertProbability(lnProb));

            } else if (token.equals("F")) {
                int thisID = tok.getInt("this id");
                float lnProb = tok.getFloat("probability");

                GrammarNode thisNode = get(thisID);
                GrammarNode nextNode = finalNode;

                if (hasEndNode(thisNode)) {
                    thisNode = getEndNode(thisNode);
                }

                thisNode.add(nextNode, convertProbability(lnProb));
            }
        }
        tok.close();

        assert initialNode != null;

        return initialNode;
    }


    /**
     * Reads the FST file in the given path, and creates the nodes in the FST file.
     *
     * @param path the path of the FST file to read
     * @return the highest ID of all nodes
     * @throws java.io.IOException
     */
    private int createNodes(String path) throws IOException {
        ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true);
        int maxNodeId = 0;
        while (!tok.isEOF()) {
            tok.skipwhite();
            String token = tok.getString();
            if (token == null) {
                break;
            } else if (token.equals("T")) {
                tok.getInt("src id"); // toss source node
                int id = tok.getInt("dest id"); // dest node numb
                if (id > maxNodeId) {
                    maxNodeId = id;
                }
                String word1 = tok.getString(); // get word
                if (word1 == null) {
                    continue;
                }
                String word2 = tok.getString(); // get word
                tok.getString(); // toss probability
                String nodeName = "G" + id;
                GrammarNode node = nodes.get(nodeName);
                if (node == null) {
                    if (word2.equals(",")) {
                        node = createGrammarNode(id, false);
                    } else {
                        node = createGrammarNode(id, word2.toLowerCase());
                    }
                    nodes.put(nodeName, node);
                } else {
                    if (!word2.equals(",")) {
                        /*
                         * if (!word2.toLowerCase().equals(getWord(node))) {
                         * System.out.println(node + ": " + word2 + ' ' + getWord(node)); }
                         */
                        assert (word2.toLowerCase().equals(getWord(node)));
                    }
                }
            }
        }
        tok.close();
        return maxNodeId;
    }


    /**
     * Expand each of the word nodes into a pair of nodes, as well as adding an optional silence node between the
     * grammar node and its end node.
     *
     * @param maxNodeID the node ID to start with for the new nodes
     * @return the last (or maximum) node ID
     */
    private int expandWordNodes(int maxNodeID) {
        Collection<GrammarNode> allNodes = nodes.values();
        String[][] silence = {{Dictionary.SILENCE_SPELLING}};
        for (GrammarNode node :allNodes) {
            // if it has at least one word, then expand the node
            if (node.getNumAlternatives() > 0) {
                GrammarNode endNode = createGrammarNode(++maxNodeID, false);
                node.add(endNode, LogMath.LOG_ONE);
                // add an optional silence
                if (addOptionalSilence) {
                    GrammarNode silenceNode = createGrammarNode(++maxNodeID,
                            silence);
                    node.add(silenceNode, LogMath.LOG_ONE);
                    silenceNode.add(endNode, LogMath.LOG_ONE);
                }
                expandedNodes.add(node);
            }
        }
        return maxNodeID;
    }


    /**
     * Converts the probability from -ln to logmath
     *
     * @param lnProb the probability to convert. Probabilities in the arpa format in negative natural log format. We
     *               convert them to logmath.
     * @return the converted probability in logMath log base
     */
    private float convertProbability(float lnProb) {
        return logMath.lnToLog(-lnProb);
    }


    /**
     * Given an id returns the associated grammar node
     *
     * @param id the id of interest
     * @return the grammar node or null if none could be found with the proper id
     */
    private GrammarNode get(int id) {
        String name = "G" + id;
        GrammarNode grammarNode = nodes.get(name);
        if (grammarNode == null) {
            grammarNode = createGrammarNode(id, false);
            nodes.put(name, grammarNode);
        }
        return grammarNode;
    }


    /**
     * Determines if the node has a word
     *
     * @param node the grammar node of interest
     * @return true if the node has a word
     */
    private boolean hasWord(GrammarNode node) {
        return (node.getNumAlternatives() > 0);
    }


    /**
     * Gets the word from the given grammar ndoe
     *
     * @param node the node of interest
     * @return the word (or null if the node has no word)
     */
    private String getWord(GrammarNode node) {
        String word = null;
        if (node.getNumAlternatives() > 0) {
            Word[][] alternatives = node.getAlternatives();
            word = alternatives[0][0].getSpelling();
        }
        return word;
    }


    /**
     * Determines if the given node has an end node associated with it.
     *
     * @param node the node of interest
     * @return <code>true</code> if the given node has an end node.
     */
    private boolean hasEndNode(GrammarNode node) {
        return (expandedNodes.contains(node));
    }


    /**
     * Retrieves the end node associated with the given node
     *
     * @param node the node of interest
     * @return the ending node or null if no end node is available
     */
    private GrammarNode getEndNode(GrammarNode node) {
        GrammarArc[] arcs = node.getSuccessors();
        assert arcs != null && arcs.length > 0;
        return arcs[0].getGrammarNode();
    }
}
TOP

Related Classes of edu.cmu.sphinx.linguist.language.grammar.FSTGrammar

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.