/* Sesame - Storage and Querying architecture for RDF and RDF Schema
* Copyright (C) 2001-2004 Aduna
* Copyright (C) 2005 Andrew Newman - Conversion to JRDF, bugs fixed,
* modified blank node handling.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.jrdf.parser.rdfxml;
import org.jrdf.collection.MapFactory;
import org.jrdf.collection.MemMapFactory;
import org.jrdf.graph.BlankNode;
import org.jrdf.graph.GraphElementFactory;
import org.jrdf.graph.GraphElementFactoryException;
import org.jrdf.graph.Literal;
import org.jrdf.graph.ObjectNode;
import org.jrdf.graph.PredicateNode;
import org.jrdf.graph.SubjectNode;
import org.jrdf.graph.URIReference;
import org.jrdf.parser.ConfigurableParser;
import org.jrdf.parser.NamespaceListener;
import org.jrdf.parser.ParseErrorListener;
import org.jrdf.parser.ParseException;
import org.jrdf.parser.ParseLocationListener;
import org.jrdf.parser.ParserBlankNodeFactory;
import org.jrdf.parser.StatementHandler;
import org.jrdf.parser.StatementHandlerException;
import org.jrdf.parser.bnodefactory.ParserBlankNodeFactoryImpl;
import org.jrdf.vocabulary.RDF;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.TransformerConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URI;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Stack;
/**
* A parser for XML-serialized RDF. This parser operates directly
* on the SAX events generated by a SAX-enabled XML parser. The XML parser
* should be compliant with SAX2. You should specify which SAX parser should
* be used by setting the <code>org.xml.sax.driver</code> property.
* This parser is not thread-safe, therefore it's public methods are
* synchronized.
* <p/>
* To parse a document using this parser:
* <ul>
* <LI>Create an instance of RdfXmlParser, optionally supplying it with your
* own ValueFactory.</LI>
* <LI>Set the StatementHandler.</LI>
* <LI>Optionally, set the ParseErrorListener, ParseLocationListener and/or
* NamespaceListener.</LI>
* <LI>Optionally, specify whether the parser should verify the data it
* parses and whether it should stop immediately when it finds an error in
* the data (both default to <tt>true</tt>).
* <LI>Call the parse method.</LI>
* </ul>
* Example code:
* <pre>
* Graph jrdfGraph = new GraphImpl();
* Parser parser = new RdfXmlParser(jrdfGraph.getElementFactory());
* parser.setStatementHandler(myStatementHandler);
* parser.setParseErrorListener(myParseErrorListener);
* parser.setVerifyData(true);
* parser.setStopAtFirstError(false);
* <p/>
* // Parse the data from inputStream, resolving any relative URIs against http://foo/bar:
* parser.parse(inputStream, "http://foo/bar");
* </pre>
*/
// TODO (AN) Dependent on SAXFilter
public final class RdfXmlParser implements ConfigurableParser {
/**
* String version of RDF's Base URI. Remove when all APIs use URI instead of string.
*/
private static final String BASE_URI_STR = RDF.BASE_URI.toString();
/**
* The rdf:type resource. *
*/
private URIReference RDF_TYPE;
/**
* The rdf:subject resource. *
*/
private URIReference RDF_SUBJECT;
/**
* The rdf:predicate resource. *
*/
private URIReference RDF_PREDICATE;
/**
* The rdf:object resource. *
*/
private URIReference RDF_OBJECT;
/**
* The rdf:Statement resource. *
*/
private URIReference RDF_STATEMENT;
/**
* The rdf:LI resource. *
*/
private URIReference RDF_LI;
/**
* The rdf:first resource. *
*/
private URIReference RDF_FIRST;
/**
* The rdf:rest resource. *
*/
private URIReference RDF_REST;
/**
* The rdf:nil resource. *
*/
private URIReference RDF_NIL;
/**
* A filter filtering calls to SAX methods specifically for this parser.
*/
private SAXFilter saxFilter;
/**
* A factory for creating resources, bNodes and literals.
*/
private GraphElementFactory valueFactory;
/**
* A map for looking up blank nodes and returning the same one each time.
*/
private ParserBlankNodeFactory bNodeFactory;
/**
* The object to report statements to.
*/
private StatementHandler statementHandler;
/**
* The object to report parse errors to.
*/
private ParseErrorListener errorListener;
/**
* The base URIReference for resolving relative URIs. This variable is set/modified
* by the SAXFilter during parsing such that it always represents the URI
* of the context in which elements are reported.
*/
private URI baseURI;
/**
* The language of literal values as can be specified using xml:lang
* attributes. This variable is set/modified by the SAXFilter during
* parsing such that it always represents the language of the context
* in which elements are reported.
*/
private String xmlLang;
/**
* A stack of node- and property elements.
*/
private Stack<Element> elementStack = new Stack<Element>();
/**
* A set containing URIs that have been generated as a result of rdf:ID
* attributes. These URIs should be unique within a single document.
*/
private Set<URI> usedIDs = new HashSet<URI>();
/**
* If Bdb API is used. UserIDs will be stored in a stored map.
*/
// private Map<Integer, Set> storedMap;
/**
* Flag indicating whether the parser should check the data it parses.
*/
boolean verifyData = true;
/**
* Flag indicating whether the parser should preserve bnode identifiers specified
* in the source.
*/
boolean preserveBNodeIds;
/**
* Indicates how datatyped literals should be handled. Legal
* values are <tt>DT_IGNORE</tt>, <tt>DT_VERIFY</tt> and
* <tt>DT_NORMALIZE</tt>.
*/
private int datatypeHandling;
/**
* Flag indicating whether the parser should stop parsing when it finds
* an error in the data.
*/
private boolean stopAtFirstError = true;
/**
* Creates a new RdfXmlParser that will use the supplied GraphElementFactory to create objects for resources,
* bNodes and literals. This create an in memory blank node factory which may exhaust available memory.
*
* @param graphElementFactory A GraphElementFactory.
*/
public RdfXmlParser(GraphElementFactory graphElementFactory) {
this(graphElementFactory, new MemMapFactory());
}
/**
* Creates a new RdfXmlParser that will use the supplied BlankNodeFactoryCreator and create a new map to be used
* by the generic ParserBlankNodeFactoryImpl.
*
* @param graphElementFactory A GraphElementFactory.
* @param creator A BlankNodeFactoryCreator.
*/
public RdfXmlParser(GraphElementFactory graphElementFactory, MapFactory creator) {
this(graphElementFactory, new ParserBlankNodeFactoryImpl(creator, graphElementFactory));
}
/**
* Creates a new RdfXmlParser that will use the supplied GraphElementFactory
* to create objects for resources, bNodes and literals.
*
* @param newValueFactory A GraphElementFactory.
* @param newBNodeFactory A ParserBlankNodeFactory.
*/
public RdfXmlParser(GraphElementFactory newValueFactory, ParserBlankNodeFactory newBNodeFactory) {
try {
init(newValueFactory, newBNodeFactory);
// Initialize the statement handler to empty.
setStatementHandler(new StatementHandler() {
public void handleStatement(SubjectNode subject, PredicateNode predicate, ObjectNode object) {
}
});
} catch (TransformerConfigurationException tce) {
throw new RuntimeException(tce);
} catch (GraphElementFactoryException e) {
throw new RuntimeException(e);
}
}
private void init(GraphElementFactory newValueFactory, ParserBlankNodeFactory newBNodeFactory)
throws TransformerConfigurationException, GraphElementFactoryException {
this.valueFactory = newValueFactory;
this.bNodeFactory = newBNodeFactory;
datatypeHandling = DT_VERIFY;
RDF_TYPE = this.valueFactory.createURIReference(RDF.TYPE);
RDF_SUBJECT = this.valueFactory.createURIReference(RDF.SUBJECT);
RDF_PREDICATE = this.valueFactory.createURIReference(RDF.PREDICATE);
RDF_OBJECT = this.valueFactory.createURIReference(RDF.OBJECT);
RDF_STATEMENT = this.valueFactory.createURIReference(RDF.STATEMENT);
RDF_LI = this.valueFactory.createURIReference(RDF.LI);
RDF_FIRST = this.valueFactory.createURIReference(RDF.FIRST);
RDF_REST = this.valueFactory.createURIReference(RDF.REST);
RDF_NIL = this.valueFactory.createURIReference(RDF.NIL);
// SAXFilter does some filtering and verifying of SAX events
saxFilter = new SAXFilter(this);
}
// implements Parser.setStatementHandler(StatementHandler)
public synchronized void setStatementHandler(StatementHandler sh) {
statementHandler = sh;
}
// implements Parser.setParseErrorListener(ParseErrorListener)
public synchronized void setParseErrorListener(ParseErrorListener el) {
errorListener = el;
}
// implements Parser.setParseLocationListener(ParseLocationListener)
public synchronized void setParseLocationListener(ParseLocationListener ll) {
saxFilter.setParseLocationListener(ll);
}
// implements Parser.setNamespaceListener(NamespaceListener)
public synchronized void setNamespaceListener(NamespaceListener nl) {
saxFilter.setNamespaceListener(nl);
}
// implements Parser.setVerifyData(boolean)
public synchronized void setVerifyData(boolean newVerifyData) {
this.verifyData = newVerifyData;
}
// implements Parser.setPreserveBNodeIds(boolean)
public synchronized void setPreserveBNodeIds(boolean newPreserveBNodeIds) {
this.preserveBNodeIds = newPreserveBNodeIds;
}
// implements Parser.setStopAtFirstError(boolean)
public synchronized void setStopAtFirstError(boolean newStopAtFirstError) {
this.stopAtFirstError = newStopAtFirstError;
}
// implements Parser.setDatatypeHandling(int)
public void setDatatypeHandling(int newDatatypeHandling) {
this.datatypeHandling = newDatatypeHandling;
}
public void setParseStandAloneDocuments(boolean standAloneDocs) {
saxFilter.setParseStandAloneDocuments(standAloneDocs);
}
/**
* Returns whether the parser is currently in a mode to parse stand-alone
* RDF documents.
*
* @see #setParseStandAloneDocuments
*/
public boolean getParseStandAloneDocuments() {
return saxFilter.getParseStandAloneDocuments();
}
/**
* Parses the data from the supplied InputStream, using the supplied
* BASE_URI to resolve any relative URI references.
*
* @param newInputStream The InputStream from which to read the data.
* @param newBaseURI The URI associated with the data in the InputStream.
* @throws IOException If an I/O error occurred while data was read
* from the InputStream.
* @throws ParseException If the parser has found an unrecoverable
* parse error.
* @throws StatementHandlerException If the configured statement handler
* encountered an unrecoverable error.
* @throws IllegalArgumentException If the supplied input stream or
* base URI is <tt>null</tt>.
*/
public synchronized void parse(InputStream newInputStream, String newBaseURI) throws IOException, ParseException,
StatementHandlerException {
if (null == newInputStream) {
throw new IllegalArgumentException("Input stream cannot be 'null'");
}
if (null == newBaseURI) {
throw new IllegalArgumentException("Base URI cannot be 'null'");
}
InputSource inputSource = new InputSource(newInputStream);
inputSource.setSystemId(newBaseURI);
parse(inputSource);
}
/**
* Parses the data from the supplied Reader, using the supplied BASE_URI
* to resolve any relative URI references.
*
* @param newReader The Reader from which to read the data.
* @param newBaseURI The URI associated with the data in the InputStream.
* @throws IOException If an I/O error occurred while data was read
* from the InputStream.
* @throws ParseException If the parser has found an unrecoverable
* parse error.
* @throws StatementHandlerException If the configured statement handler
* has encountered an unrecoverable error.
* @throws IllegalArgumentException If the supplied reader or base URI
* is <tt>null</tt>.
*/
public synchronized void parse(Reader newReader, String newBaseURI) throws IOException, ParseException,
StatementHandlerException {
if (null == newReader) {
throw new IllegalArgumentException("Reader cannot be 'null'");
}
if (null == newBaseURI) {
throw new IllegalArgumentException("Base URI cannot be 'null'");
}
InputSource inputSource = new InputSource(newReader);
inputSource.setSystemId(newBaseURI);
parse(inputSource);
}
private void parse(InputSource inputSource) throws IOException, ParseException, StatementHandlerException {
try {
//saxFilter.clear();
saxFilter.setDocumentURI(inputSource.getSystemId());
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature("http://xml.org/sax/features/namespaces", true);
factory.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
XMLReader xmlReader = factory.newSAXParser().getXMLReader();
xmlReader.setContentHandler(saxFilter);
xmlReader.parse(inputSource);
} catch (ParserConfigurationException pce) {
throw new ParseException(pce, -1, -1);
} catch (SAXParseException e) {
Exception wrappedExc = e.getException();
if (null == wrappedExc) {
wrappedExc = e;
}
throw new ParseException(wrappedExc, e.getLineNumber(), e.getColumnNumber());
} catch (SAXException e) {
Exception wrappedExc = e.getException();
if (null == wrappedExc) {
wrappedExc = e;
}
if (wrappedExc instanceof StatementHandlerException) {
throw (StatementHandlerException) wrappedExc;
} else {
throw new ParseException(wrappedExc, -1, -1);
}
} finally {
// Clean up
saxFilter.clear();
baseURI = null;
xmlLang = null;
elementStack.clear();
usedIDs.clear();
bNodeFactory.clear();
}
}
void setBaseURI(URI newBaseURI) {
this.baseURI = newBaseURI;
}
void setXmlLang(String newXmlLang) {
if ("".equals(newXmlLang)) {
this.xmlLang = null;
} else {
this.xmlLang = newXmlLang;
}
}
void startElement(String namespaceURI, String localName, String qName, Atts atts) throws SAXException {
if (topIsProperty()) {
// this element represents the subject and/or object of a statement
processNodeElt(namespaceURI, localName, qName, atts, false);
} else {
// this element represents a property
processPropertyElt(namespaceURI, localName, qName, atts, false);
}
}
void endElement(String namespaceURI, String localName, String qName) throws SAXException {
Object topElement = peekStack(0);
if (topElement instanceof NodeElement) {
// Check if top node is 'volatile', meaning that it doesn't have a
// start- and end element associated with it.
if (((NodeElement) topElement).isVolatile()) {
elementStack.pop();
}
} else {
// topElement instanceof PropertyElement
PropertyElement predicate = (PropertyElement) topElement;
if (predicate.parseCollection()) {
SubjectNode lastListResource = predicate.getLastListResource();
if (null == lastListResource) {
// no last list resource, list must have been empty.
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), predicate.getURI(), RDF_NIL);
handleReification(RDF_NIL);
} else {
// Generate the final tail of the list.
reportStatement(lastListResource, RDF_REST, RDF_NIL);
}
}
}
elementStack.pop();
}
void emptyElement(String namespaceURI, String localName, String qName, Atts atts) throws SAXException {
if (topIsProperty()) {
// this element represents the subject and/or object of a statement
processNodeElt(namespaceURI, localName, qName, atts, true);
} else {
// this element represents a property
processPropertyElt(namespaceURI, localName, qName, atts, true);
}
}
void text(String text) throws SAXException {
if (!topIsProperty()) {
throw new SAXException("unexpected literal");
}
PropertyElement propEl = (PropertyElement) peekStack(0);
String datatype = propEl.getDatatype();
Literal lit = createLiteral(text, xmlLang, datatype);
NodeElement subject = (NodeElement) peekStack(1);
PropertyElement predicate = (PropertyElement) peekStack(0);
reportStatement(subject.getResource(), predicate.getURI(), lit);
handleReification(lit);
}
/* Process a node element (can be both subject and object) */
private void processNodeElt(String namespaceURI, String localName, String qName, Atts atts, boolean isEmptyElt)
throws SAXException {
if (verifyData) {
// Check the element name
checkNodeEltName(namespaceURI, localName, qName);
}
SubjectNode nodeResource = getNodeResource(atts);
NodeElement nodeElement = new NodeElement(nodeResource);
if (!elementStack.isEmpty()) {
// node can be object of a statement, or part of an rdf:List
NodeElement subject = (NodeElement) peekStack(1);
PropertyElement predicate = (PropertyElement) peekStack(0);
if (predicate.parseCollection()) {
SubjectNode lastListRes = predicate.getLastListResource();
BlankNode newListRes = createBNode();
if (null == lastListRes) {
// first element in the list
reportStatement(subject.getResource(), predicate.getURI(), newListRes);
handleReification(newListRes);
} else {
// not the first element in the list
reportStatement(lastListRes, RDF_REST, newListRes);
}
reportStatement(newListRes, RDF_FIRST, (ObjectNode) nodeResource);
predicate.setLastListResource(newListRes);
} else {
reportStatement(subject.getResource(), predicate.getURI(), (ObjectNode) nodeResource);
handleReification((ObjectNode) nodeResource);
}
}
if (!"Description".equals(localName) || !namespaceURI.equals(BASE_URI_STR)) {
// element name is uri's type
URIReference className;
if ("".equals(namespaceURI)) {
// No namespace, use base URI
className = buildResourceFromLocalName(localName);
} else {
className = createURIReference(namespaceURI + localName);
}
reportStatement(nodeResource, RDF_TYPE, className);
}
Att type = atts.removeAtt(BASE_URI_STR, "type");
if (null != type) {
// rdf:type attribute, value is a URI-reference
URIReference className = buildURIFromReference(type.getValue());
reportStatement(nodeResource, RDF_TYPE, className);
}
if (verifyData) {
checkRdfAtts(atts);
}
processSubjectAtts(nodeElement, atts);
if (!isEmptyElt) {
elementStack.push(nodeElement);
}
}
/**
* Retrieves the resource of a node element (subject or object) using
* relevant attributes (rdf:ID, rdf:about and rdf:nodeID) from its
* attributes list.
*
* @return a resource or a bNode.
*/
private SubjectNode getNodeResource(Atts atts) throws SAXException {
Att id = atts.removeAtt(BASE_URI_STR, "ID");
Att about = atts.removeAtt(BASE_URI_STR, "about");
Att nodeID = atts.removeAtt(BASE_URI_STR, "nodeID");
if (verifyData) {
int definedAttsCount = 0;
if (null != id) {
definedAttsCount++;
}
if (null != about) {
definedAttsCount++;
}
if (null != nodeID) {
definedAttsCount++;
}
if (1 < definedAttsCount) {
sendError("Only one of the attributes rdf:ID, rdf:about or rdf:nodeID can be used here");
}
}
SubjectNode result;
if (null != id) {
result = buildURIFromID(id.getValue());
} else if (null != about) {
result = buildURIFromReference(about.getValue());
} else if (null != nodeID) {
result = createBNode(nodeID.getValue());
} else {
// No resource specified, generate a bNode
result = createBNode();
}
return result;
}
/**
* processes subject attributes. *
*/
private void processSubjectAtts(NodeElement nodeElt, Atts atts) throws SAXException {
SubjectNode subject = nodeElt.getResource();
Iterator iter = atts.iterator();
while (iter.hasNext()) {
Att att = (Att) iter.next();
URIReference predicate = createURIReference(att.getURI());
Literal lit = createLiteral(att.getValue(), xmlLang, null);
reportStatement(subject, predicate, lit);
}
}
private void processPropertyElt(String namespaceURI, String localName, String qName, Atts atts, boolean isEmptyElt)
throws SAXException {
if (verifyData) {
checkPropertyEltName(namespaceURI, localName, qName);
}
// Get the URI of the property
URIReference propURI;
if ("".equals(namespaceURI)) {
// no namespace URI
sendError("unqualified property element <" + qName + "> not allowed");
// Use base URI as namespace:
propURI = buildResourceFromLocalName(localName);
} else {
propURI = createURIReference(namespaceURI + localName);
}
// List expansion rule
if (propURI.equals(RDF_LI)) {
NodeElement subject = (NodeElement) peekStack(0);
propURI = createURIReference(BASE_URI_STR + "_" + subject.getNextLiCounter());
}
// Push the property on the stack.
PropertyElement predicate = new PropertyElement(propURI);
elementStack.push(predicate);
// Check if property has a reification ID
Att id = atts.removeAtt(BASE_URI_STR, "ID");
if (null != id) {
URIReference reifURI = buildURIFromID(id.getValue());
predicate.setReificationURI(reifURI);
}
// Check for presence of rdf:parseType attribute
Att parseType = atts.removeAtt(BASE_URI_STR, "parseType");
if (null != parseType) {
if (verifyData) {
checkNoMoreAtts(atts);
}
String parseTypeValue = parseType.getValue();
if ("Resource".equals(parseTypeValue)) {
BlankNode objectResource = createBNode();
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), propURI, objectResource);
if (isEmptyElt) {
handleReification(objectResource);
} else {
NodeElement object = new NodeElement(objectResource);
object.setIsVolatile(true);
elementStack.push(object);
}
} else if ("Collection".equals(parseTypeValue)) {
if (isEmptyElt) {
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), propURI, RDF_NIL);
handleReification(RDF_NIL);
} else {
predicate.setParseCollection(true);
}
} else {
// other parseType
if (!"Literal".equals(parseTypeValue)) {
sendWarning("unknown parseType: " + parseType.getValue());
}
if (isEmptyElt) {
NodeElement subject = (NodeElement) peekStack(1);
Literal lit = createLiteral("", null, RDF.XML_LITERAL.toString());
reportStatement(subject.getResource(), propURI, lit);
handleReification(lit);
} else {
// The next string is an rdf:XMLLiteral
predicate.setDatatype(RDF.XML_LITERAL.toString());
saxFilter.setParseLiteralMode();
}
}
} else if (isEmptyElt) {
// empty element without an rdf:parseType attribute
if (0 == atts.size() || 1 == atts.size() && atts.getAtt(RDF.BASE_URI.toString(), "datatype") != null) {
// element had no attributes, or only the optional rdf:ID
NodeElement subject = (NodeElement) peekStack(1);
Literal lit = createLiteral("", xmlLang, null);
reportStatement(subject.getResource(), propURI, lit);
handleReification(lit);
} else {
// Create resource for the statement's object.
SubjectNode resourceRes = getPropertyResource(atts);
// All special rdf attributes have been checked/removed.
if (verifyData) {
checkRdfAtts(atts);
}
NodeElement resourceElt = new NodeElement(resourceRes);
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), propURI, (ObjectNode) resourceRes);
handleReification((ObjectNode) resourceRes);
Att type = atts.removeAtt(BASE_URI_STR, "type");
if (null != type) {
// rdf:type attribute, value is a URI-reference
URIReference className = buildURIFromReference(type.getValue());
reportStatement(resourceRes, RDF_TYPE, className);
}
processSubjectAtts(resourceElt, atts);
}
// Empty element has been pushed on the stack already, remove it.
elementStack.pop();
} else {
// Not an empty element, sub elements will follow.
// Check for rdf:datatype attribute
Att datatype = atts.removeAtt(BASE_URI_STR, "datatype");
if (null != datatype) {
predicate.setDatatype(datatype.getValue());
}
// No more attributes are expected.
if (verifyData) {
checkNoMoreAtts(atts);
}
}
}
/**
* Retrieves the object resource of a property element using relevant attributes
* (rdf:resource and rdf:nodeID) from its attributes list.
*
* @return a resource or a bNode.
*/
private SubjectNode getPropertyResource(Atts atts) throws SAXException {
Att resource = atts.removeAtt(BASE_URI_STR, "resource");
Att nodeID = atts.removeAtt(BASE_URI_STR, "nodeID");
if (verifyData) {
int definedAttsCount = 0;
if (null != resource) {
definedAttsCount++;
}
if (null != nodeID) {
definedAttsCount++;
}
if (1 < definedAttsCount) {
sendError("Only one of the attributes rdf:resource or rdf:nodeID can be used here");
}
}
SubjectNode result;
if (null != resource) {
result = buildURIFromReference(resource.getValue());
} else if (null != nodeID) {
result = createBNode(nodeID.getValue());
} else {
// No resource specified, generate a bNode
result = createBNode();
}
return result;
}
/*
* Processes any rdf:ID attributes that generate reified statements. This
* method assumes that a PropertyElement (which can have an rdf:ID
* attribute) is on top of the stack, and a NodeElement is below that.
*/
private void handleReification(ObjectNode value) throws SAXException {
PropertyElement predicate = (PropertyElement) peekStack(0);
if (predicate.isReified()) {
NodeElement subject = (NodeElement) peekStack(1);
URIReference reifRes = predicate.getReificationURI();
reifyStatement(reifRes, subject.getResource(), predicate.getURI(), value);
}
}
private void reifyStatement(SubjectNode reifNode, SubjectNode subj,
PredicateNode pred, ObjectNode obj) throws SAXException {
reportStatement(reifNode, RDF_TYPE, RDF_STATEMENT);
reportStatement(reifNode, RDF_SUBJECT, (ObjectNode) subj);
reportStatement(reifNode, RDF_PREDICATE, (ObjectNode) pred);
reportStatement(reifNode, RDF_OBJECT, obj);
}
/**
* Builds a Resource from a non-qualified localname.
*/
private URIReference buildResourceFromLocalName(String localName) throws SAXException {
// Resolve the relative URI against the base URI
URI uri = baseURI.resolve("#" + localName);
return createURIReference(uri);
}
/**
* Builds a Resource from the value of an rdf:ID attribute.
*/
private URIReference buildURIFromID(String id) throws SAXException {
if (verifyData) {
// Check if 'id' is a legal NCName
if (!XmlUtil.isNCName(id)) {
sendError("Not an XML Name: " + id);
}
}
// Resolve the relative URI against the base URI
URI uri = baseURI.resolve("#" + id);
if (verifyData) {
// uriString should be unique in the current document
if (!usedIDs.add(uri)) {
// uriString was not added because the collection already contained
// an equal string.
sendError("'" + id + "' already used as ID value, values of rdf:ID attributes should be unique");
}
}
return createURIReference(uri);
}
private URIReference buildURIFromReference(String uriReference) throws SAXException {
URI relUri = safeURICreator(uriReference);
if (verifyData) {
String uriScheme = relUri.getScheme();
String uriAuthority = relUri.getAuthority();
String uriQuery = relUri.getQuery();
String uriPath = relUri.getPath();
if (null == uriScheme && // Relative URI that is not a self-reference
!(null == uriAuthority && null == uriQuery && 0 == uriPath.length()) &&
baseURI.isOpaque()) {
sendError("Relative URI '" + uriReference + "' cannot be resolved using the opaque base URI '" +
baseURI + "'");
}
}
if ("".equals(uriReference)) {
return createURIReference(baseURI);
} else {
return createURIReference(baseURI.resolve(relUri));
}
}
private URIReference createURIReference(String uri) throws SAXException {
return createURIReference(safeURICreator(uri));
}
private URI safeURICreator(String uriReference) throws SAXException {
try {
return URI.create(uriReference);
} catch (IllegalArgumentException e) {
Locator loc = saxFilter.getLocator();
String msg;
if (e.getCause() != null) {
msg = e.getCause().getMessage();
} else {
msg = e.getMessage();
}
msg = msg + " line: " + loc.getLineNumber();
sendError(msg);
throw new SAXException(msg);
}
}
private URIReference createURIReference(URI uri) throws SAXException {
try {
return valueFactory.createURIReference(uri);
} catch (GraphElementFactoryException e) {
throw new SAXException(e);
}
}
private BlankNode createBNode() throws SAXException {
try {
return bNodeFactory.createBlankNode();
} catch (GraphElementFactoryException e) {
throw new SAXException(e);
}
}
private BlankNode createBNode(String nodeID) throws SAXException {
if (verifyData) {
// Check if 'nodeID' is a legal NCName
if (!XmlUtil.isNCName(nodeID)) {
sendError("Not an XML Name: " + nodeID);
}
}
try {
return bNodeFactory.createBlankNode(nodeID);
} catch (GraphElementFactoryException e) {
throw new SAXException(e);
}
}
private Literal createLiteral(String label, String lang, String datatype) throws SAXException {
try {
if (null != datatype) {
if (DT_VERIFY == datatypeHandling) {
if (!XmlDatatypeUtil.isValidValue(label, datatype)) {
throw new Exception("'" + label + "' is not a valid value for datatype " + datatype);
}
} else if (DT_NORMALIZE == datatypeHandling) {
label = XmlDatatypeUtil.normalize(label, datatype);
}
return valueFactory.createLiteral(label, new URI(datatype));
} else if (null != lang) {
return valueFactory.createLiteral(label, lang);
} else {
return valueFactory.createLiteral(label);
}
} catch (Exception e) {
throw new SAXException(e);
}
}
private Object peekStack(int distFromTop) {
return elementStack.get(elementStack.size() - 1 - distFromTop);
}
private boolean topIsProperty() {
return elementStack.isEmpty() || peekStack(0) instanceof PropertyElement;
}
/**
* Checks whether the node element name is from the RDF namespace and, if so, if it is
* allowed to be used in a node element. If the name is equal to one of the disallowed
* names (RDF, ID, about, parseType, resource, nodeID, datatype and LI), an error is
* generated. If the name is not defined in the RDF namespace, but it claims that it
* is from this namespace, a warning is generated.
*/
private void checkNodeEltName(String namespaceURI, String localName, String qName) throws SAXException {
if (BASE_URI_STR.equals(namespaceURI)) {
if ("Description".equals(localName) ||
"Seq".equals(localName) ||
"Bag".equals(localName) ||
"Alt".equals(localName) ||
"Statement".equals(localName) ||
"Property".equals(localName) ||
"List".equals(localName) ||
"subject".equals(localName) ||
"predicate".equals(localName) ||
"object".equals(localName) ||
"type".equals(localName) ||
"value".equals(localName) ||
"first".equals(localName) ||
"rest".equals(localName) ||
"nil".equals(localName) ||
localName.startsWith("_")) {
// These are OK
;
} else if (
"li".equals(localName) ||
"RDF".equals(localName) ||
"ID".equals(localName) ||
"about".equals(localName) ||
"parseType".equals(localName) ||
"resource".equals(localName) ||
"nodeID".equals(localName) ||
"datatype".equals(localName)) {
sendError("<" + qName + "> not allowed as node element");
} else if (
"bagID".equals(localName) ||
"aboutEach".equals(localName) ||
"aboutEachPrefix".equals(localName)) {
sendError(qName + " is no longer a valid RDF name");
} else {
sendWarning("unknown rdf element <" + qName + ">");
}
}
}
/**
* Checks whether the property element name is from the RDF namespace and, if so,
* if it is allowed to be used in a property element. If the name is equal to one of
* the disallowed names (RDF, ID, about, parseType, resource and LI), an error is
* generated. If the name is not defined in the RDF namespace, but it claims that it
* is from this namespace, a warning is generated.
*/
private void checkPropertyEltName(String namespaceURI, String localName, String qName) throws SAXException {
if (BASE_URI_STR.equals(namespaceURI)) {
if ("li".equals(localName) ||
"Seq".equals(localName) ||
"Bag".equals(localName) ||
"Alt".equals(localName) ||
"Statement".equals(localName) ||
"Property".equals(localName) ||
"List".equals(localName) ||
"subject".equals(localName) ||
"predicate".equals(localName) ||
"object".equals(localName) ||
"type".equals(localName) ||
"value".equals(localName) ||
"first".equals(localName) ||
"rest".equals(localName) ||
"nil".equals(localName) ||
localName.startsWith("_")) {
// These are OK
;
} else if (
"Description".equals(localName) ||
"RDF".equals(localName) ||
"ID".equals(localName) ||
"about".equals(localName) ||
"parseType".equals(localName) ||
"resource".equals(localName) ||
"nodeID".equals(localName) ||
"datatype".equals(localName)) {
sendError("<" + qName + "> not allowed as property element");
} else if (
"bagID".equals(localName) ||
"aboutEach".equals(localName) ||
"aboutEachPrefix".equals(localName)) {
sendError(qName + " is no longer a valid RDF name");
} else {
sendWarning("unknown rdf element <" + qName + ">");
}
}
}
/**
* Checks whether 'atts' contains attributes from the RDF namespace that are not
* allowed as attributes. If such an attribute is found, an error is generated and
* the attribute is removed from 'atts'. If the attribute is not defined in the RDF
* namespace, but it claims that it is from this namespace, a warning is generated.
*/
private void checkRdfAtts(Atts atts) throws SAXException {
Iterator iter = atts.iterator();
while (iter.hasNext()) {
Att att = (Att) iter.next();
if (BASE_URI_STR.equals(att.getNamespace())) {
String localName = att.getLocalName();
if ("Seq".equals(localName) ||
"Bag".equals(localName) ||
"Alt".equals(localName) ||
"Statement".equals(localName) ||
"Property".equals(localName) ||
"List".equals(localName) ||
"subject".equals(localName) ||
"predicate".equals(localName) ||
"object".equals(localName) ||
"type".equals(localName) ||
"value".equals(localName) ||
"first".equals(localName) ||
"rest".equals(localName) ||
"nil".equals(localName) ||
localName.startsWith("_")) {
// These are OK
;
} else if (
"Description".equals(localName) ||
"li".equals(localName) ||
"RDF".equals(localName) ||
"ID".equals(localName) ||
"about".equals(localName) ||
"parseType".equals(localName) ||
"resource".equals(localName) ||
"nodeID".equals(localName) ||
"datatype".equals(localName)) {
sendError("'" + att.getQName() + "' not allowed as attribute name");
iter.remove();
} else if (
"bagID".equals(localName) ||
"aboutEach".equals(localName) ||
"aboutEachPrefix".equals(localName)) {
sendError(att.getQName() + " is no longer a valid RDF name");
} else {
sendWarning("unknown rdf attribute '" + att.getQName() + "'");
}
}
}
}
/**
* Checks whether 'atts' is empty. If this is not the case, a warning is generated
* for each attribute that is still present.
*/
private void checkNoMoreAtts(Atts atts) {
if (0 < atts.size()) {
Iterator<Att> iter = atts.iterator();
while (iter.hasNext()) {
iter.next();
iter.remove();
}
}
}
/**
* Reports a stament to the configured StatementHandler.
*
* @param subject The statement's subject.
* @param predicate The statement's predicate.
* @param object The statement's object.
* @throws SAXException If the configured StatementHandler throws a
* StatementHandlerException, which will be wrapped in a SAXException.
*/
private void reportStatement(SubjectNode subject, PredicateNode predicate, ObjectNode object) throws SAXException {
try {
statementHandler.handleStatement(subject, predicate, object);
} catch (Exception e) {
// Wrap exception in a SAXException, it will be unwrapped in the
// parse() method
throw new SAXException(e);
}
}
void sendWarning(String msg) {
if (null != errorListener) {
Locator loc = saxFilter.getLocator();
if (null == loc) {
errorListener.warning(msg, -1, -1);
} else {
errorListener.warning(msg, loc.getLineNumber(), loc.getColumnNumber());
}
}
}
synchronized void sendError(String msg) throws SAXException {
if (null != errorListener) {
Locator loc = saxFilter.getLocator();
if (null == loc) {
errorListener.error(msg, -1, -1);
} else {
errorListener.error(msg, loc.getLineNumber(), loc.getColumnNumber());
}
}
if (stopAtFirstError) {
throw new SAXException(msg);
}
}
void sendFatalError(String msg) throws SAXException {
if (null != errorListener) {
Locator loc = saxFilter.getLocator();
if (null == loc) {
errorListener.fatalError(msg, -1, -1);
} else {
errorListener.fatalError(msg, loc.getLineNumber(), loc.getColumnNumber());
}
}
throw new SAXException(msg);
}
// TODO Take a look NodeElement and PropertyElement.
interface Element {
}
static class NodeElement implements Element {
private SubjectNode resource;
private boolean isVolatile;
private int liCounter = 1;
NodeElement(SubjectNode newResource) {
this.resource = newResource;
}
public SubjectNode getResource() {
return resource;
}
public void setIsVolatile(boolean newIsVolatile) {
this.isVolatile = newIsVolatile;
}
public boolean isVolatile() {
return isVolatile;
}
public int getNextLiCounter() {
return liCounter++;
}
public String toString() {
return "Subject SkipListNode: " + resource + " isVolatile: " + isVolatile;
}
}
static class PropertyElement implements Element {
/**
* The property URI. *
*/
private URIReference uri;
/**
* An optional reification identifier. *
*/
private URIReference reificationURI;
/**
* An optional datatype. *
*/
private String datatype;
/**
* Flag indicating whether this PropertyElement has an
* attribute <tt>rdf:parseType="Collection"</tt>. *
*/
private boolean parseCollection;
/**
* The resource that was used to append the last part
* of an rdf:List. *
*/
private SubjectNode lastListResource;
PropertyElement(URIReference newUri) {
this.uri = newUri;
}
public URIReference getURI() {
return uri;
}
public boolean isReified() {
return null != reificationURI;
}
public void setReificationURI(URIReference newReificationURI) {
this.reificationURI = newReificationURI;
}
public URIReference getReificationURI() {
return reificationURI;
}
public void setDatatype(String newDatatype) {
this.datatype = newDatatype;
}
public String getDatatype() {
return datatype;
}
public boolean parseCollection() {
return parseCollection;
}
public void setParseCollection(boolean newParseCollection) {
this.parseCollection = newParseCollection;
}
public SubjectNode getLastListResource() {
return lastListResource;
}
public void setLastListResource(SubjectNode newLastListResource) {
this.lastListResource = newLastListResource;
}
public String toString() {
return "URI: " + uri + " datatype " + datatype;
}
}
}