package org.maltparserx.core.syntaxgraph.writer;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.SortedMap;
import java.util.regex.PatternSyntaxException;
import org.maltparserx.core.exception.MaltChainedException;
import org.maltparserx.core.io.dataformat.ColumnDescription;
import org.maltparserx.core.io.dataformat.DataFormatException;
import org.maltparserx.core.io.dataformat.DataFormatInstance;
import org.maltparserx.core.symbol.SymbolTable;
import org.maltparserx.core.syntaxgraph.PhraseStructure;
import org.maltparserx.core.syntaxgraph.TokenStructure;
import org.maltparserx.core.syntaxgraph.node.NonTerminalNode;
import org.maltparserx.core.syntaxgraph.node.PhraseStructureNode;
import org.maltparserx.core.syntaxgraph.node.TokenNode;
/**
*
*
* @author Johan Hall
*/
public class BracketWriter implements SyntaxGraphWriter {
private enum PennWriterFormat {
DEFAULT, PRETTY
};
private PennWriterFormat format;
private BufferedWriter writer;
private DataFormatInstance dataFormatInstance;
private SortedMap<String,ColumnDescription> inputColumns;
private SortedMap<String,ColumnDescription> edgeLabelColumns;
private SortedMap<String,ColumnDescription> phraseLabelColumns;
private char STARTING_BRACKET = '(';
private String EMPTY_EDGELABEL = "??";
private char CLOSING_BRACKET = ')';
private char INPUT_SEPARATOR = ' ';
private char EDGELABEL_SEPARATOR = '-';
private char SENTENCE_SEPARATOR = '\n';
private String optionString;
private boolean closeStream = true;
public BracketWriter() {
}
public void open(String fileName, String charsetName) throws MaltChainedException {
try {
open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
} catch (FileNotFoundException e) {
throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
} catch (UnsupportedEncodingException e) {
throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
}
}
public void open(OutputStream os, String charsetName) throws MaltChainedException {
try {
if (os == System.out || os == System.err) {
closeStream = false;
}
open(new OutputStreamWriter(os, charsetName));
} catch (UnsupportedEncodingException e) {
throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
}
}
private void open(OutputStreamWriter osw) throws MaltChainedException {
setWriter(new BufferedWriter(osw));
}
public void writeEpilog() throws MaltChainedException {
}
public void writeProlog() throws MaltChainedException {
}
public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
if (syntaxGraph == null || dataFormatInstance == null) {
return;
}
if (syntaxGraph instanceof PhraseStructure && syntaxGraph.hasTokens()) {
// PhraseStructure phraseStructure = ((PhraseStructure) syntaxGraph);
if (format == PennWriterFormat.PRETTY) {
writeElement(((PhraseStructure) syntaxGraph).getPhraseStructureRoot(), 0);
} else {
writeElement(((PhraseStructure) syntaxGraph).getPhraseStructureRoot());
}
try {
writer.write(SENTENCE_SEPARATOR);
writer.flush();
} catch (IOException e) {
close();
throw new DataFormatException("Could not write to the output file. ", e);
}
}
}
private void writeElement(PhraseStructureNode element) throws MaltChainedException {
try {
if (element instanceof TokenNode) {
PhraseStructureNode t = (PhraseStructureNode)element;
SymbolTable table = null;
writer.write(STARTING_BRACKET);
int i = 0;
for (String inputColumn : inputColumns.keySet()) {
if (i != 0) {
writer.write(INPUT_SEPARATOR);
}
table = inputColumns.get(inputColumn).getSymbolTable();
if (t.hasLabel(table)) {
writer.write(t.getLabelSymbol(table));
}
if (i == 0) {
for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
if (t.hasParentEdgeLabel(table) && !t.getParent().isRoot() && !t.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
writer.write(EDGELABEL_SEPARATOR);
writer.write(t.getParentEdgeLabelSymbol(table));
}
}
}
i++;
}
writer.write(CLOSING_BRACKET);
} else {
NonTerminalNode nt = (NonTerminalNode)element;
writer.write(STARTING_BRACKET);
SymbolTable table = null;
int i = 0;
for (String phraseLabelColumn : phraseLabelColumns.keySet()) {
if (i != 0) {
writer.write(INPUT_SEPARATOR);
}
table = phraseLabelColumns.get(phraseLabelColumn).getSymbolTable();
if (nt.hasLabel(table)) {
writer.write(nt.getLabelSymbol(table));
}
if (i == 0) {
for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
if (nt.hasParentEdgeLabel(table) && !nt.getParent().isRoot() && !nt.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
writer.write(EDGELABEL_SEPARATOR);
writer.write(nt.getParentEdgeLabelSymbol(table));
}
}
}
i++;
}
for (PhraseStructureNode node : ((NonTerminalNode)element).getChildren()) {
writeElement(node);
}
writer.write(CLOSING_BRACKET);
}
} catch (IOException e) {
throw new DataFormatException("Could not write to the output file. ", e);
}
}
private String getIndentation(int depth) {
StringBuilder sb = new StringBuilder("");
for (int i = 0; i < depth; i++) {
sb.append("\t");
}
return sb.toString();
}
private void writeElement(PhraseStructureNode element, int depth) throws MaltChainedException {
try {
if (element instanceof TokenNode) {
PhraseStructureNode t = (PhraseStructureNode)element;
SymbolTable table = null;
writer.write("\n" + getIndentation(depth) + STARTING_BRACKET);
int i = 0;
for (String inputColumn : inputColumns.keySet()) {
if (i != 0) {
writer.write(INPUT_SEPARATOR);
}
table = inputColumns.get(inputColumn).getSymbolTable();
if (t.hasLabel(table)) {
writer.write(encodeString(t.getLabelSymbol(table)));
}
if (i == 0) {
for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
if (t.hasParentEdgeLabel(table) && !t.getParent().isRoot() && !t.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
writer.write(EDGELABEL_SEPARATOR);
writer.write(t.getParentEdgeLabelSymbol(table));
}
}
}
i++;
}
writer.write(CLOSING_BRACKET);
} else {
NonTerminalNode nt = (NonTerminalNode)element;
writer.write("\n" + getIndentation(depth) + STARTING_BRACKET);
SymbolTable table = null;
int i = 0;
for (String phraseLabelColumn : phraseLabelColumns.keySet()) {
if (i != 0) {
writer.write(INPUT_SEPARATOR);
}
table = phraseLabelColumns.get(phraseLabelColumn).getSymbolTable();
if (nt.hasLabel(table)) {
writer.write(nt.getLabelSymbol(table));
}
if (i == 0) {
for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
if (nt.hasParentEdgeLabel(table) && !nt.getParent().isRoot() && !nt.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
writer.write(EDGELABEL_SEPARATOR);
writer.write(nt.getParentEdgeLabelSymbol(table));
}
}
}
i++;
}
for (PhraseStructureNode node : ((NonTerminalNode)element).getChildren()) {
writeElement(node, depth + 1);
}
writer.write("\n" + getIndentation(depth) + CLOSING_BRACKET);
}
} catch (IOException e) {
throw new DataFormatException("Could not write to the output file. ", e);
}
}
public BufferedWriter getWriter() {
return writer;
}
public void setWriter(BufferedWriter writer) throws MaltChainedException {
close();
this.writer = writer;
}
public DataFormatInstance getDataFormatInstance() {
return dataFormatInstance;
}
public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
this.dataFormatInstance = dataFormatInstance;
inputColumns = dataFormatInstance.getInputColumnDescriptions();
edgeLabelColumns = dataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptions();
phraseLabelColumns = dataFormatInstance.getPhraseStructureNodeLabelColumnDescriptions();
}
public String getOptions() {
return optionString;
}
public void setOptions(String optionString) throws MaltChainedException {
this.optionString = optionString;
format = PennWriterFormat.DEFAULT;
String[] argv;
try {
argv = optionString.split("[_\\p{Blank}]");
} catch (PatternSyntaxException e) {
throw new DataFormatException("Could not split the bracket writer option '"+optionString+"'. ", e);
}
for (int i=0; i < argv.length-1; i++) {
if(argv[i].charAt(0) != '-') {
throw new DataFormatException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
}
if(++i>=argv.length) {
throw new DataFormatException("The last argument does not have any value. ");
}
switch(argv[i-1].charAt(1)) {
case 'f':
if (argv[i].equals("p")) {
format = PennWriterFormat.PRETTY;
} else if (argv[i].equals("p")) {
format = PennWriterFormat.DEFAULT;
}
break;
default:
throw new DataFormatException("Unknown bracket writer option: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");
}
}
}
public void close() throws MaltChainedException {
try {
if (writer != null) {
writer.flush();
if (closeStream) {
writer.close();
}
writer = null;
}
} catch (IOException e) {
throw new DataFormatException("Could not close the output file. ", e);
}
}
private String encodeString(String string) {
return string.replace("(", "-LRB-").replace(")", "-RRB-").replace("[", "-LSB-").replace("]", "-RSB-").replace("{", "-LCB-").replace("}", "-RCB-");
}
}