/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.grammar.generator;
import java.util.Stack;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import net.sourceforge.chaperon.grammar.Grammar;
import net.sourceforge.chaperon.grammar.production.Production;
import net.sourceforge.chaperon.grammar.production.ProductionList;
import net.sourceforge.chaperon.grammar.production.ReduceType;
import net.sourceforge.chaperon.grammar.symbol.NonTerminalSymbol;
import net.sourceforge.chaperon.grammar.symbol.Symbol;
import net.sourceforge.chaperon.grammar.symbol.TerminalSymbol;
import net.sourceforge.chaperon.grammar.token.Associativity;
//import net.sourceforge.chaperon.grammar.token.Comment;
import net.sourceforge.chaperon.grammar.token.Token;
import net.sourceforge.chaperon.grammar.token.TokenList;
//import net.sourceforge.chaperon.grammar.token.Whitespace;
import net.sourceforge.chaperon.grammar.token.definition.Alternation;
import net.sourceforge.chaperon.grammar.token.definition.BeginOfLine;
import net.sourceforge.chaperon.grammar.token.definition.CharacterClass;
import net.sourceforge.chaperon.grammar.token.definition.CharacterClassElement;
import net.sourceforge.chaperon.grammar.token.definition.CharacterInterval;
import net.sourceforge.chaperon.grammar.token.definition.CharacterSequence;
import net.sourceforge.chaperon.grammar.token.definition.CharacterSet;
import net.sourceforge.chaperon.grammar.token.definition.Concatenation;
import net.sourceforge.chaperon.grammar.token.definition.UniversalCharacter;
import net.sourceforge.chaperon.grammar.token.definition.EndOfLine;
import net.sourceforge.chaperon.grammar.token.definition.DefinitionElement;
//import net.sourceforge.chaperon.grammar.token.definition.DefinitionList;
/**
* This class should generate a grammar from a SAX stream
*
* @author Stephan Michels
* @version %version%
*/
public class SAXGrammarGenerator extends DefaultHandler
{
/** The URI of the grammar DTD */
public final static String URI = "-//Chaperon//DTD grammar 1.0//EN";
/** Element name */
public final static String GRAMMAR_ELEMENT = "grammar";
/** URI attribute of the grammar */
public final static String URI_ATTRIBUTE = "uri";
/** Element name */
public final static String ALTERNATION_ELEMENT = "alt";
/** Element name */
public final static String BEGINOFLINE_ELEMENT = "bol";
/** Element name */
public final static String CHARACTERCLASS_ELEMENT = "cc";
/** Element name */
public final static String CHARACTERINTERVAL_ELEMENT = "ci";
/** Attribute name of the min property */
public final static String CHARACTERINTERVAL_MIN_ATTRIBUTE = "min";
/** Attribute name of the max property */
public final static String CHARACTERINTERVAL_MAX_ATTRIBUTE = "max";
/** Element name */
public final static String CHARACTERSEQUENCE_ELEMENT = "string";
/** Attribute name of the sequence property */
public final static String CHARACTERSEQUENCE_SEQUENCE_ATTRIBUTE = "content";
/** Element name */
public final static String CHARACTERSET_ELEMENT = "cs";
/** Attribute name of the characters property */
public final static String CHARACTERSET_CHARACTERS_ATTRIBUTE = "content";
/** Element name */
//public final static String COMMENT_ELEMENT = "comment";
/** Element name */
public final static String CONCATENATION_ELEMENT = "concat";
/** Element name */
public final static String UNIVERSALCHARACTER_ELEMENT = "dot";
/** Element name */
public final static String ENDOFLINE_ELEMENT = "eol";
/** Element name */
public final static String TOKEN_ELEMENT = "token";
/** Attribute name of the symbol property */
public final static String TOKEN_SYMBOL_ATTRIBUTE = "tsymbol";
/** Attribute name of the associativity property */
public final static String TOKEN_ASSOCIATIVE_ATTRIBUTE = "assoc";
/** Attribute name of the minOccurs property */
public final static String TOKENDEFINITION_MINOCCURS_ATTRIBUTE = "minOccurs";
/** Attribute name of the minOccurs property */
public final static String TOKENDEFINITION_MAXOCCURS_ATTRIBUTE = "maxOccurs";
/** Element name */
public final static String TOKENLIST_ELEMENT = "tokens";
/** Element name */
//public final static String WHITESPACE_ELEMENT = "whitespace";
public final static String IGNORABLETOKENLIST_ELEMENT = "ignorabletokens";
/** Element name */
public final static String PRODUCTION_ELEMENT = "production";
/** Attribute name of the Symbol property */
public final static String PRODUCTION_SYMBOL_ATTRIBUTE = "ntsymbol";
/** Attribute name of the reducetype property */
public final static String PRODUCTION_REDUCETYPE_ATTRIBUTE = "reducetype";
/** Attribute name of the Precedence property */
public final static String PRODUCTION_PRECEDENCE_ATTRIBUTE = "prec";
/** Element name */
public final static String PRODUCTIONLIST_ELEMENT = "productions";
/** Element name */
public final static String NONTERMINALSYMBOL_ELEMENT = "ntsymbol";
/** Element name */
public final static String TERMINALSYMBOL_ELEMENT = "tsymbol";
/** Element name */
public final static String STARTSYMBOL_ELEMENT = "ssymbol";
/** Element name */
public final static String SYMBOL_NAME_ATTRIBUTE = "name";
/** Element name */
public final static String STARTSYMBOL_NAME_ATTRIBUTE = "ntsymbol";
private Grammar grammar;
private Stack stack;
/**
* Returns the generated Grammar
*
* @return Grammar
*/
public Grammar getGrammar()
{
return grammar;
}
/**
* Receive notification of the beginning of a document.
*/
public void startDocument()
{
stack = new Stack();
}
/**
* Return the content of the minOccurs attribute
*
* @param atts Attributes of an element
*
* @return minOccurs attribute
*/
private int getMinOccursFromAttributes(Attributes atts)
{
int minOccurs = 1;
String attribute = atts.getValue(TOKENDEFINITION_MINOCCURS_ATTRIBUTE);
if ((attribute != null) && (attribute.length() > 0))
{
try
{
minOccurs = (new Integer(attribute)).intValue();
}
catch (NumberFormatException e)
{
// System.err.println("error: "+attribute+" ist not an integer number");
minOccurs = 1;
}
if (minOccurs < 0)
minOccurs = 0;
}
return minOccurs;
}
/**
* Return the content of the maxOccurs attribute
*
* @param atts Attributes of an element
*
* @return maxOccurs attribute
*/
private int getMaxOccursFromAttributes(Attributes atts)
{
int maxOccurs = 1;
String attribute = atts.getValue(TOKENDEFINITION_MAXOCCURS_ATTRIBUTE);
if ((attribute != null) && (attribute.length() > 0))
{
if (attribute.equals("*"))
maxOccurs = Integer.MAX_VALUE;
else
{
try
{
maxOccurs = (new Integer(attribute)).intValue();
}
catch (NumberFormatException e)
{
// System.err.println("error: "+attribute+" ist not an integer number");
maxOccurs = 1;
}
if (maxOccurs < 1)
maxOccurs = 1;
}
}
return maxOccurs;
}
/**
* Return the content of the associatve attribute
*
* @param atts Attributes of an element
*
* @return Associatve attribute
*/
private Associativity getAssociativityFromAttributes(Attributes atts)
{
String attribute = atts.getValue(TOKEN_ASSOCIATIVE_ATTRIBUTE);
if ((attribute != null) && (attribute.length() > 0))
return Associativity.valueOf(attribute);
return Associativity.NONASSOC;
}
/**
* Return the content of the reducetype attribute
*
* @param atts Attributes of an element
*
* @return Reducetype attribute
*/
private ReduceType getReduceTypeFromAttributes(Attributes atts)
{
String attribute = atts.getValue(PRODUCTION_REDUCETYPE_ATTRIBUTE);
if ((attribute != null) && (attribute.length() > 0))
return ReduceType.valueOf(attribute);
return ReduceType.NORMAL;
}
/**
* Receive notification of the beginning of an element.
*
* @param namespaceURI The Namespace URI, or the empty string if the
* element has no Namespace URI or if Namespace
* processing is not being performed.
* @param localName The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param qName The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
* @param atts The attributes attached to the element. If there are no
* attributes, it shall be an empty Attributes object.
*/
public void startElement(String namespaceURI, String localName,
String qName, Attributes atts) throws SAXException
{
try
{
if (localName.equals(GRAMMAR_ELEMENT))
{
grammar = new Grammar();
grammar.setURI(atts.getValue(URI_ATTRIBUTE));
stack.push(grammar);
}
else if (localName.equals(TOKENLIST_ELEMENT))
{
stack.push(new TokenList());
}
else if (localName.equals(IGNORABLETOKENLIST_ELEMENT))
{
stack.push(new TokenList());
}
else if (localName.equals(TOKEN_ELEMENT))
{
Token token = new Token(TerminalSymbol.valueOf(atts.getValue(TOKEN_SYMBOL_ATTRIBUTE)));
token.setAssociativity(getAssociativityFromAttributes(atts));
stack.push(token);
}
/*else if (localName.equals(COMMENT_ELEMENT))
{
stack.push(new Comment(grammar));
}
else if (localName.equals(WHITESPACE_ELEMENT))
{
stack.push(new Whitespace(grammar));
}*/
else if (localName.equals(ALTERNATION_ELEMENT))
{
Alternation alternation = new Alternation();
alternation.setMinOccurs(getMinOccursFromAttributes(atts));
alternation.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(alternation);
}
else if (localName.equals(CONCATENATION_ELEMENT))
{
Concatenation concatenation = new Concatenation();
concatenation.setMinOccurs(getMinOccursFromAttributes(atts));
concatenation.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(concatenation);
}
else if (localName.equals(CHARACTERSEQUENCE_ELEMENT))
{
CharacterSequence charactersequence = new CharacterSequence();
charactersequence.setMinOccurs(getMinOccursFromAttributes(atts));
charactersequence.setMaxOccurs(getMaxOccursFromAttributes(atts));
charactersequence.setSequence(atts.getValue(CHARACTERSEQUENCE_SEQUENCE_ATTRIBUTE));
stack.push(charactersequence);
}
else if (localName.equals(UNIVERSALCHARACTER_ELEMENT))
{
UniversalCharacter uni = new UniversalCharacter();
uni.setMinOccurs(getMinOccursFromAttributes(atts));
uni.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(uni);
}
else if (localName.equals(BEGINOFLINE_ELEMENT))
{
BeginOfLine bol = new BeginOfLine();
stack.push(bol);
}
else if (localName.equals(ENDOFLINE_ELEMENT))
{
EndOfLine eol = new EndOfLine();
stack.push(eol);
}
else if (localName.equals(CHARACTERCLASS_ELEMENT))
{
CharacterClass characterclass = new CharacterClass();
characterclass.setMinOccurs(getMinOccursFromAttributes(atts));
characterclass.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(characterclass);
}
else if (localName.equals("ncc"))
{
CharacterClass characterclass = new CharacterClass();
characterclass.setNegation(true);
characterclass.setMinOccurs(getMinOccursFromAttributes(atts));
characterclass.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(characterclass);
}
else if (localName.equals(CHARACTERSET_ELEMENT))
{
CharacterSet characterset = new CharacterSet();
characterset.setCharacters(atts.getValue(CHARACTERSET_CHARACTERS_ATTRIBUTE));
stack.push(characterset);
}
else if (localName.equals(CHARACTERINTERVAL_ELEMENT))
{
CharacterInterval characterinterval = new CharacterInterval();
characterinterval.setMinimum(atts.getValue(CHARACTERINTERVAL_MIN_ATTRIBUTE).charAt(0));
characterinterval.setMaximum(atts.getValue(CHARACTERINTERVAL_MAX_ATTRIBUTE).charAt(0));
stack.push(characterinterval);
}
else if (localName.equals(PRODUCTIONLIST_ELEMENT))
{
stack.push(new ProductionList());
}
else if (localName.equals(PRODUCTION_ELEMENT))
{
Production production = new Production(
NonTerminalSymbol.valueOf(atts.getValue(PRODUCTION_SYMBOL_ATTRIBUTE)));
production.setReduceType(getReduceTypeFromAttributes(atts));
String precedencesymbol = atts.getValue(PRODUCTION_PRECEDENCE_ATTRIBUTE);
if ((precedencesymbol != null) && (precedencesymbol.length() > 0))
production.setPrecedence(TerminalSymbol.valueOf(precedencesymbol));
stack.push(production);
}
else if (localName.equals(NONTERMINALSYMBOL_ELEMENT))
{
stack.push(NonTerminalSymbol.valueOf(atts.getValue(SYMBOL_NAME_ATTRIBUTE)));
}
else if (localName.equals(TERMINALSYMBOL_ELEMENT))
{
stack.push(TerminalSymbol.valueOf(atts.getValue(SYMBOL_NAME_ATTRIBUTE)));
}
else if (localName.equals(STARTSYMBOL_ELEMENT))
{
stack.push(NonTerminalSymbol.valueOf(atts.getValue(STARTSYMBOL_NAME_ATTRIBUTE)));
}
else
System.out.println(localName + " would not process");
}
catch (Exception e)
{
e.printStackTrace();
}
}
/**
* Receive notification of the end of an element.
*
* @param namespaceURI The Namespace URI, or the empty string if the
* element has no Namespace URI or if Namespace
* processing is not being performed.
* @param localName The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param qName The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
*/
public void endElement(String namespaceURI, String localName,
String qName) throws SAXException
{
try
{
if (localName.equals(GRAMMAR_ELEMENT))
{
// stack.push(new Grammar());
grammar = (Grammar) stack.pop();
}
else if (localName.equals(TOKENLIST_ELEMENT))
{
TokenList tokens = (TokenList) stack.pop();
Grammar grammar = (Grammar) stack.peek();
grammar.getTokenList().addToken(tokens);
}
else if (localName.equals(IGNORABLETOKENLIST_ELEMENT))
{
TokenList tokens = (TokenList) stack.pop();
Grammar grammar = (Grammar) stack.peek();
grammar.getIgnorableTokenList().addToken(tokens);
}
else if (localName.equals(TOKEN_ELEMENT))
{
Token token = (Token) stack.pop();
TokenList tokens = (TokenList) stack.peek();
tokens.addToken(token);
}
else if ((localName.equals(ALTERNATION_ELEMENT))
|| (localName.equals(CONCATENATION_ELEMENT))
|| (localName.equals(CHARACTERSEQUENCE_ELEMENT))
|| (localName.equals(CHARACTERCLASS_ELEMENT))
|| (localName.equals(UNIVERSALCHARACTER_ELEMENT))
|| (localName.equals(BEGINOFLINE_ELEMENT))
|| (localName.equals(ENDOFLINE_ELEMENT))
|| (localName.equals("ncc")))
{
DefinitionElement definitionelement = (DefinitionElement) stack.pop();
if (stack.peek() instanceof Alternation)
{
Alternation alternation = (Alternation) stack.peek();
alternation.addDefinitionElement(definitionelement);
}
else if (stack.peek() instanceof Concatenation)
{
Concatenation concatenation = (Concatenation) stack.peek();
concatenation.addDefinitionElement(definitionelement);
}
else if (stack.peek() instanceof Token)
{
Token token = (Token) stack.peek();
token.setDefinition(definitionelement);
}
}
else if ((localName.equals(CHARACTERSET_ELEMENT))
|| (localName.equals(CHARACTERINTERVAL_ELEMENT)))
{
CharacterClassElement characterclasselement = (CharacterClassElement) stack.pop();
CharacterClass characterclass = (CharacterClass) stack.peek();
characterclass.addCharacterClassElement(characterclasselement);
}
/*else if (localName.equals(COMMENT_ELEMENT))
{
Comment comment = (Comment) stack.pop();
Grammar grammar = (Grammar) stack.peek();
grammar.setComment(comment);
}
else if (localName.equals(WHITESPACE_ELEMENT))
{
Whitespace whitespace = (Whitespace) stack.pop();
Grammar grammar = (Grammar) stack.peek();
grammar.setWhitespace(whitespace);
}*/
else if (localName.equals(PRODUCTIONLIST_ELEMENT))
{
ProductionList productions = (ProductionList) stack.pop();
Grammar grammar = (Grammar) stack.peek();
grammar.getProductionList().addProduction(productions);
}
else if (localName.equals(PRODUCTION_ELEMENT))
{
Production production = (Production) stack.pop();
ProductionList productions = (ProductionList) stack.peek();
productions.addProduction(production);
}
else if (localName.equals(NONTERMINALSYMBOL_ELEMENT))
{
Symbol ntsymbol = (Symbol) stack.pop();
Production production = (Production) stack.peek();
production.getDefinition().addSymbol(ntsymbol);
}
else if (localName.equals(TERMINALSYMBOL_ELEMENT))
{
Symbol tsymbol = (Symbol) stack.pop();
Production production = (Production) stack.peek();
production.getDefinition().addSymbol(tsymbol);
}
else if (localName.equals(STARTSYMBOL_ELEMENT))
{
NonTerminalSymbol ssymbol = (NonTerminalSymbol) stack.pop();
Grammar grammar = (Grammar) stack.peek();
grammar.setStartSymbol(ssymbol);
}
else
System.out.println(localName + " would not process");
}
catch (Exception e)
{
e.printStackTrace();
}
}
}