Package org.openrdf.rio.ntriples

Source Code of org.openrdf.rio.ntriples.NTriplesParser

/*
* Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
*
* Licensed under the Aduna BSD-style license.
*/
package org.openrdf.rio.ntriples;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;

import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.helpers.RDFParserBase;

/**
* RDF parser for N-Triples files. A specification of NTriples can be found in
* <a href="http://www.w3.org/TR/rdf-testcases/#ntriples">this section</a> of
* the RDF Test Cases document. This parser is not thread-safe, therefore its
* public methods are synchronized.
*
* @author Arjohn Kampman
*/
public class NTriplesParser extends RDFParserBase {

  /*-----------*
   * Variables *
   *-----------*/

  private Reader reader;

  private int lineNo;

  private Resource subject;

  private URI predicate;

  private Value object;

  /*--------------*
   * Constructors *
   *--------------*/

  /**
   * Creates a new NTriplesParser that will use a {@link ValueFactoryImpl} to
   * create object for resources, bNodes and literals.
   */
  public NTriplesParser() {
    super();
  }

  /**
   * Creates a new NTriplesParser that will use the supplied
   * <tt>ValueFactory</tt> to create RDF model objects.
   *
   * @param valueFactory
   *        A ValueFactory.
   */
  public NTriplesParser(ValueFactory valueFactory) {
    super(valueFactory);
  }

  /*---------*
   * Methods *
   *---------*/

  // implements RDFParser.getRDFFormat()
  public final RDFFormat getRDFFormat() {
    return RDFFormat.NTRIPLES;
  }

  /**
   * Implementation of the <tt>parse(InputStream, String)</tt> method defined
   * in the RDFParser interface.
   *
   * @param in
   *        The InputStream from which to read the data, must not be
   *        <tt>null</tt>. The InputStream is supposed to contain 7-bit
   *        US-ASCII characters, as per the N-Triples specification.
   * @param baseURI
   *        The URI associated with the data in the InputStream, must not be
   *        <tt>null</tt>.
   * @throws IOException
   *         If an I/O error occurred while data was read from the InputStream.
   * @throws RDFParseException
   *         If the parser has found an unrecoverable parse error.
   * @throws RDFHandlerException
   *         If the configured statement handler encountered an unrecoverable
   *         error.
   * @throws IllegalArgumentException
   *         If the supplied input stream or base URI is <tt>null</tt>.
   */
  public synchronized void parse(InputStream in, String baseURI)
    throws IOException, RDFParseException, RDFHandlerException
  {
    if (in == null) {
      throw new IllegalArgumentException("Input stream can not be 'null'");
    }
    // Note: baseURI will be checked in parse(Reader, String)

    try {
      parse(new InputStreamReader(in, "US-ASCII"), baseURI);
    }
    catch (UnsupportedEncodingException e) {
      // Every platform should support the US-ASCII encoding...
      throw new RuntimeException(e);
    }
  }

  /**
   * Implementation of the <tt>parse(Reader, String)</tt> method defined in the
   * RDFParser interface.
   *
   * @param reader
   *        The Reader from which to read the data, must not be <tt>null</tt>.
   * @param baseURI
   *        The URI associated with the data in the Reader, must not be
   *        <tt>null</tt>.
   * @throws IOException
   *         If an I/O error occurred while data was read from the InputStream.
   * @throws RDFParseException
   *         If the parser has found an unrecoverable parse error.
   * @throws RDFHandlerException
   *         If the configured statement handler encountered an unrecoverable
   *         error.
   * @throws IllegalArgumentException
   *         If the supplied reader or base URI is <tt>null</tt>.
   */
  public synchronized void parse(Reader reader, String baseURI)
    throws IOException, RDFParseException, RDFHandlerException
  {
    if (reader == null) {
      throw new IllegalArgumentException("Reader can not be 'null'");
    }
    if (baseURI == null) {
      throw new IllegalArgumentException("base URI can not be 'null'");
    }

    rdfHandler.startRDF();

    this.reader = reader;
    lineNo = 1;

    reportLocation(lineNo, 1);

    try {
      int c = reader.read();
      c = skipWhitespace(c);

      while (c != -1) {
        if (c == '#') {
          // Comment, ignore
          c = skipLine(c);
        }
        else if (c == '\r' || c == '\n') {
          // Empty line, ignore
          c = skipLine(c);
        }
        else {
          c = parseTriple(c);
        }

        c = skipWhitespace(c);
      }
    }
    finally {
      clear();
    }

    rdfHandler.endRDF();
  }

  /**
   * Reads characters from reader until it finds a character that is not a
   * space or tab, and returns this last character. In case the end of the
   * character stream has been reached, -1 is returned.
   */
  private int skipWhitespace(int c)
    throws IOException
  {
    while (c == ' ' || c == '\t') {
      c = reader.read();
    }

    return c;
  }

  /**
   * Reads characters from reader until the first EOL has been read. The first
   * character after the EOL is returned. In case the end of the character
   * stream has been reached, -1 is returned.
   */
  private int skipLine(int c)
    throws IOException
  {
    while (c != -1 && c != '\r' && c != '\n') {
      c = reader.read();
    }

    // c is equal to -1, \r or \n. In case of a \r, we should
    // check whether it is followed by a \n.

    if (c == '\n') {
      c = reader.read();

      lineNo++;

      reportLocation(lineNo, 1);
    }
    else if (c == '\r') {
      c = reader.read();

      if (c == '\n') {
        c = reader.read();
      }

      lineNo++;

      reportLocation(lineNo, 1);
    }

    return c;
  }

  private int parseTriple(int c)
    throws IOException, RDFParseException, RDFHandlerException
  {
    c = parseSubject(c);

    c = skipWhitespace(c);

    c = parsePredicate(c);

    c = skipWhitespace(c);

    c = parseObject(c);

    c = skipWhitespace(c);

    if (c == -1) {
      throwEOFException();
    }
    else if (c != '.') {
      reportFatalError("Expected '.', found: " + (char)c);
    }

    c = skipLine(c);

    Statement st = createStatement(subject, predicate, object);
    rdfHandler.handleStatement(st);

    subject = null;
    predicate = null;
    object = null;

    return c;
  }

  private int parseSubject(int c)
    throws IOException, RDFParseException
  {
    StringBuilder sb = new StringBuilder(100);

    // subject is either an uriref (<foo://bar>) or a nodeID (_:node1)
    if (c == '<') {
      // subject is an uriref
      c = parseUriRef(c, sb);
      subject = createURI(sb.toString());
    }
    else if (c == '_') {
      // subject is a bNode
      c = parseNodeID(c, sb);
      subject = createBNode(sb.toString());
    }
    else if (c == -1) {
      throwEOFException();
    }
    else {
      reportFatalError("Expected '<' or '_', found: " + (char)c);
    }

    return c;
  }

  private int parsePredicate(int c)
    throws IOException, RDFParseException
  {
    StringBuilder sb = new StringBuilder(100);

    // predicate must be an uriref (<foo://bar>)
    if (c == '<') {
      // predicate is an uriref
      c = parseUriRef(c, sb);
      predicate = createURI(sb.toString());
    }
    else if (c == -1) {
      throwEOFException();
    }
    else {
      reportFatalError("Expected '<', found: " + (char)c);
    }

    return c;
  }

  private int parseObject(int c)
    throws IOException, RDFParseException
  {
    StringBuilder sb = new StringBuilder(100);

    // object is either an uriref (<foo://bar>), a nodeID (_:node1) or a
    // literal ("foo"-en or "1"^^<xsd:integer>).
    if (c == '<') {
      // object is an uriref
      c = parseUriRef(c, sb);
      object = createURI(sb.toString());
    }
    else if (c == '_') {
      // object is a bNode
      c = parseNodeID(c, sb);
      object = createBNode(sb.toString());
    }
    else if (c == '"') {
      // object is a literal
      StringBuilder lang = new StringBuilder(8);
      StringBuilder datatype = new StringBuilder(40);
      c = parseLiteral(c, sb, lang, datatype);
      object = createLiteral(sb.toString(), lang.toString(), datatype.toString());
    }
    else if (c == -1) {
      throwEOFException();
    }
    else {
      reportFatalError("Expected '<', '_' or '\"', found: " + (char)c);
    }

    return c;
  }

  private int parseUriRef(int c, StringBuilder uriRef)
    throws IOException, RDFParseException
  {
    assert c == '<' : "Supplied char should be a '<', is: " + c;

    // Read up to the next '>' character
    c = reader.read();
    while (c != '>') {
      if (c == -1) {
        throwEOFException();
      }
      uriRef.append((char)c);
      c = reader.read();
    }

    // c == '>', read next char
    c = reader.read();

    return c;
  }

  private int parseNodeID(int c, StringBuilder name)
    throws IOException, RDFParseException
  {
    assert c == '_' : "Supplied char should be a '_', is: " + c;

    c = reader.read();
    if (c == -1) {
      throwEOFException();
    }
    else if (c != ':') {
      reportError("Expected ':', found: " + (char)c);
    }

    c = reader.read();
    if (c == -1) {
      throwEOFException();
    }
    else if (!NTriplesUtil.isLetter(c)) {
      reportError("Expected a letter, found: " + (char)c);
    }
    name.append((char)c);

    // Read all following letter and numbers, they are part of the name
    c = reader.read();
    while (c != -1 && NTriplesUtil.isLetterOrNumber(c)) {
      name.append((char)c);
      c = reader.read();
    }

    return c;
  }

  private int parseLiteral(int c, StringBuilder value, StringBuilder lang, StringBuilder datatype)
    throws IOException, RDFParseException
  {
    assert c == '"' : "Supplied char should be a '\"', is: " + c;

    // Read up to the next '"' character
    c = reader.read();
    while (c != '"') {
      if (c == -1) {
        throwEOFException();
      }
      value.append((char)c);

      if (c == '\\') {
        // This escapes the next character, which might be a double quote
        c = reader.read();
        if (c == -1) {
          throwEOFException();
        }
        value.append((char)c);
      }

      c = reader.read();
    }

    // c == '"', read next char
    c = reader.read();

    if (c == '@') {
      // Read language
      c = reader.read();
      while (c != -1 && c != '.' && c != '^' && c != ' ' && c != '\t') {
        lang.append((char)c);
        c = reader.read();
      }
    }
    else if (c == '^') {
      // Read datatype
      c = reader.read();

      // c should be another '^'
      if (c == -1) {
        throwEOFException();
      }
      else if (c != '^') {
        reportError("Expected '^', found: " + (char)c);
      }

      c = reader.read();

      // c should be a '<'
      if (c == -1) {
        throwEOFException();
      }
      else if (c != '<') {
        reportError("Expected '<', found: " + (char)c);
      }

      c = parseUriRef(c, datatype);
    }

    return c;
  }

  @Override
  protected URI createURI(String uri)
    throws RDFParseException
  {
    try {
      uri = NTriplesUtil.unescapeString(uri);
    }
    catch (IllegalArgumentException e) {
      reportError(e.getMessage());
    }

    return super.createURI(uri);
  }

  protected Literal createLiteral(String label, String lang, String datatype)
    throws RDFParseException
  {
    try {
      label = NTriplesUtil.unescapeString(label);
    }
    catch (IllegalArgumentException e) {
      reportError(e.getMessage());
    }

    if (lang.length() == 0) {
      lang = null;
    }

    if (datatype.length() == 0) {
      datatype = null;
    }

    URI dtURI = null;
    if (datatype != null) {
      dtURI = createURI(datatype);
    }

    return super.createLiteral(label, lang, dtURI);
  }

  /**
   * Overrides {@link RDFParserBase#reportWarning(String)}, adding line number
   * information to the error.
   */
  @Override
  protected void reportWarning(String msg) {
    reportWarning(msg, lineNo, -1);
  }

  /**
   * Overrides {@link RDFParserBase#reportError(String)}, adding line number
   * information to the error.
   */
  @Override
  protected void reportError(String msg)
    throws RDFParseException
  {
    reportError(msg, lineNo, -1);
  }

  /**
   * Overrides {@link RDFParserBase#reportFatalError(String)}, adding line
   * number information to the error.
   */
  @Override
  protected void reportFatalError(String msg)
    throws RDFParseException
  {
    reportFatalError(msg, lineNo, -1);
  }

  /**
   * Overrides {@link RDFParserBase#reportFatalError(Exception)}, adding line
   * number information to the error.
   */
  @Override
  protected void reportFatalError(Exception e)
    throws RDFParseException
  {
    reportFatalError(e, lineNo, -1);
  }

  private void throwEOFException()
    throws RDFParseException
  {
    throw new RDFParseException("Unexpected end of file");
  }
}
TOP

Related Classes of org.openrdf.rio.ntriples.NTriplesParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.