Package org.fcrepo.server.storage.translation

Source Code of org.fcrepo.server.storage.translation.DOTranslationUtility

/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package org.fcrepo.server.storage.translation;

import static org.fcrepo.common.Models.CONTENT_MODEL_3_0;
import static org.fcrepo.common.Models.FEDORA_OBJECT_3_0;
import static org.fcrepo.common.Models.SERVICE_DEFINITION_3_0;
import static org.fcrepo.common.Models.SERVICE_DEPLOYMENT_3_0;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;

import org.fcrepo.common.Constants;
import org.fcrepo.common.Models;
import org.fcrepo.common.rdf.RDFName;
import org.fcrepo.common.xml.namespace.QName;
import org.fcrepo.server.Server;
import org.fcrepo.server.config.Parameter;
import org.fcrepo.server.config.ServerConfiguration;
import org.fcrepo.server.errors.ObjectIntegrityException;
import org.fcrepo.server.errors.StreamIOException;
import org.fcrepo.server.storage.types.AuditRecord;
import org.fcrepo.server.storage.types.Datastream;
import org.fcrepo.server.storage.types.DatastreamXMLMetadata;
import org.fcrepo.server.storage.types.DigitalObject;
import org.fcrepo.server.storage.types.Disseminator;
import org.fcrepo.server.utilities.StreamUtility;
import org.fcrepo.utilities.DateUtility;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Utility methods for usage by digital object serializers and deserializers.
* This class provides methods for detecting various forms of relative
* repository URLs, which are URLs that point to the hostname and port of the
* local repository. Methods will detect these kinds of URLS in datastream
* location fields and in special cases of inline XML. Methods are available to
* convert these URLS back and forth from relative URL syntax, to Fedora's
* internal local URL syntax, and to absolute URL sytnax. This utility class
* defines different "translation contexts" and the format of these relative
* URLs will be set appropriately to the context. Currently defined translation
* contexts are: 0=Deserialize XML into java object appropriate for in-memory
* usage 1=Serialize java object to XML appropriate for "public" export
* (absolute URLs) 2=Serialize java object to XML appropriate for move/migrate
* to another repository 3=Serialize java object to XML appropriate for internal
* storage</b> </p> The public "normalize*" methods in this class should be
* called to make the right decisions about what conversions should occur for
* what contexts. Other utility methods set default values for datastreams and
* disseminators.
*
* @author Sandy Payette
* @version $Id$
*/

@SuppressWarnings("deprecation")
public abstract class DOTranslationUtility
        implements Constants {

    private static final Logger logger =
            LoggerFactory.getLogger(DOTranslationUtility.class);

    /**
     * DESERIALIZE_INSTANCE: Deserialize XML into a java object appropriate for
     * in-memory usage. This will make the value of relative repository URLs
     * appropriate for instantiations of the digital object in memory. For
     * External (E) and Redirected (R) datastreams, any URLs that are relative
     * to the local repository are converted to absolute URLs using the
     * currently configured hostname:port of the repository. To do this, the
     * dsLocation is searched for instances the Fedora local URL string
     * ("http://local.fedora.server") which is the way Fedora internally keeps
     * track of instances of relative repository URLs. For Managed Content (M)
     * datastreams, the internal identifiers are instantiated as is. Also,
     * certain reserved inline XML datastreams (WSDL and SERVICE_PROFILE) are
     * searched for relative repository URLs and they are made absolute.
     */
    public static final int DESERIALIZE_INSTANCE = 0;

    /**
     * SERIALIZE_EXPORT_PUBLIC: Serialize digital object to XML appropriate for
     * "public" external use. This is context is appropriate when the exporting
     * repository will continue to exist and will continue to support callback
     * URLs for datastream content and disseminations. This gives a "public"
     * export of an object in which all relative repository URLs AND internal
     * identifiers are converted to absolute callback URLs. For External (E) and
     * Redirected (R) datastreams, any URLs that are relative to the local
     * repository are converted to absolute URLs using the currently configured
     * hostname:port of the repository. For Managed Content (M) datastreams, the
     * internal identifiers in dsLocation are converted to default dissemination
     * URLs so they can serve as callbacks to the repository to obtain the
     * internally managed content. Also, selected inline XML datastreams (i.e.,
     * WSDL and SERVICE_PROFILE) are searched for relative repository URLs and
     * they are made absolute.
     */
    public static final int SERIALIZE_EXPORT_PUBLIC = 1;

    /**
     * SERIALIZE_EXPORT_MIGRATE: Serialize digital object to XML in a manner
     * appropriate for migrating or moving objects from one repository to
     * another. This context is appropriate when the local repository will NOT
     * be available after objects have been migrated to a new repository. For
     * External (E) and Redirected (R)datastreams, any URLs that are relative to
     * the local repository will be expressed with the Fedora local URL syntax
     * (which consists of the string "local.fedora.server" standing in place of
     * the actual "hostname:port"). This enables a new repository to ingest the
     * serialization and maintain the relative nature of the URLs (they will
     * become relative to the *new* repository. Also, for Managed Content (M)
     * datastreams, the internal identifiers in dsLocation are converted to
     * default dissemination URLs. This enables the new repository to callback
     * to the old repository to obtain the content bytestream to be stored in
     * the new repository. Also, within selected inline XML datastreams (i.e.,
     * WSDL and SERVICE_PROFILE) any URLs that are relative to the local
     * repository will also be expressed with the Fedora local URL syntax.
     */
    public static final int SERIALIZE_EXPORT_MIGRATE = 2;

    /**
     * SERIALIZE_STORAGE_INTERNAL: Serialize java object to XML appropriate for
     * persistent storage in the repository, ensuring that any URLs that are
     * relative to the local repository are stored with the Fedora local URL
     * syntax. The Fedora local URL syntax consists of the string
     * "local.fedora.server" standing in place of the actual "hostname:port" on
     * the URL). Managed Content (M) datastreams are stored with internal
     * identifiers in dsLocation. Also, within selected inline XML datastreams
     * (i.e., WSDL and SERVICE_PROFILE) any URLs that are relative to the local
     * repository will also be stored with the Fedora local URL syntax. Note
     * that a view of the storage serialization can be obtained via the
     * getObjectXML method of API-M.
     */
    public static final int SERIALIZE_STORAGE_INTERNAL = 3;

    /**
     * SERIALIZE_EXPORT_ARCHIVE: Serialize digital object to XML in a manner
     * appropriate for creating a stand alone archive of objects from a
     * repository that will NOT be available after objects have been exported.
     * For External (E) and Redirected (R)datastreams, any URLs that are
     * relative to the local repository will be expressed with the Fedora local
     * URL syntax (which consists of the string "local.fedora.server" standing
     * in place of the actual "hostname:port"). This enables a new repository to
     * ingest the serialization and maintain the relative nature of the URLs
     * (they will become relative to the *new* repository. Also, for Managed
     * Content (M) datastreams, the internal identifiers in dsLocation are
     * converted to default dissemination URLs, and the contents of the URL's
     * are included inline via base-64 encoding. This enables the new repository
     * recreate the content bytestream to be stored in the new repository, when
     * the original repository is no longer available. Also, within selected
     * inline XML datastreams (i.e., WSDL and SERVICE_PROFILE) any URLs that are
     * relative to the local repository will also be expressed with the Fedora
     * local URL syntax.
     */
    public static final int SERIALIZE_EXPORT_ARCHIVE = 4;

    /**
     * Deserialize or Serialize as is. This context doesn't attempt to do any
     * conversion of URLs.
     */
    public static final int AS_IS = 5;

    // Fedora URL LOCALIZATION Pattern:
    // Pattern that is used as the internal replacement syntax for URLs that
    // refer back to the local repository.  This pattern virtualized the
    // repository server address, so that if the host:port of the repository is
    // changed, objects that have URLs that refer to the local repository won't break.
    private static final Pattern s_fedoraLocalPattern =
            Pattern.compile("http://local.fedora.server/");

    // Fedora Application Context Localization pattern
    // Specifically refers to the current fedora application context (host:port/context)
    private static final Pattern s_fedoraLocalAppContextPattern =
            Pattern.compile("http://local.fedora.server/fedora/");

    // PATTERN FOR DEPRECATED METHOD (getItem of the Default Disseminator), for example:
    public static Pattern s_getItemPattern =
            Pattern.compile("/fedora-system:3/getItem\\?itemID=");

    // ABSOLUTE REPOSITORY URL Patterns:
    // Patterns of how the protocol and repository server address may be encoded
    // in a URL that points back to the local repository.

    private static Pattern s_concreteLocalUrl;
    private static Pattern s_concreteLocalUrlAppContext;

    private static Pattern s_concreteLocalUrlNoPort;
    private static Pattern s_concreteLocalUrlAppContextNoPort;

    // CALLBACK DISSEMINATION URL Pattern (for M datastreams in export files):
    // Pattern of how protocol, repository server address, and path is encoded
    // for a callback dissemination URL to the local repository.
    // This is used for encoding datastream location URLs for Managed Content
    // datastreams inside an export file.  Internal Fedora identifiers for
    // the Managed Content datastreams are replaced with public callback URLS.
    private static String s_localDissemUrlStart; // "http://hostname:port/fedora/get/"

    // The actual host and port of the Fedora repository server
    private static String s_hostInfo = null;

    // Host, port, and Fedora context
    private static String s_hostContextInfo;

    private static boolean m_serverOnPort80 = false;

    private static boolean m_serverOnRedirectPort443 = false;

    private static XMLInputFactory m_xmlInputFactory =
            XMLInputFactory.newInstance();

    // initialize static class with stuff that's used by all DO Serializerers
    static {
        // get host port from system properties (for testing without server instance)
        String fedoraServerHost = System.getProperty("fedora.hostname");
        String fedoraServerPort = System.getProperty("fedora.port");
        String fedoraServerPortSSL = System.getProperty("fedoraRedirectPort");
        String fedoraAppServerContext =
                System.getProperty("fedora.appServerContext");

        if (fedoraServerPort != null) {
            if (fedoraServerPort.equals("80")) {
                m_serverOnPort80 = true;
            }
        }
        if (fedoraServerPortSSL != null) {
            if (fedoraServerPortSSL.equals("443")) {
                m_serverOnRedirectPort443 = true;
            }
        }

        // otherwise, get host port from the server instance if they are null
        if (fedoraServerHost == null || fedoraServerPort == null
                || fedoraAppServerContext == null) {
            // if fedoraServerHost or fedoraServerPort system properties
            // are not defined, read them from server configuration
            ServerConfiguration config = Server.getConfig();
            fedoraServerHost =
                    config.getParameter("fedoraServerHost",Parameter.class).getValue();
            fedoraServerPort =
                    config.getParameter("fedoraServerPort",Parameter.class).getValue();
            fedoraAppServerContext =
                    config.getParameter("fedoraAppServerContext",Parameter.class).getValue();
            fedoraServerPortSSL =
                    config.getParameter("fedoraRedirectPort",Parameter.class).getValue();
            if (fedoraServerPort.equals("80")) {
                m_serverOnPort80 = true;
            }
            if (fedoraServerPortSSL.equals("443")) {
                m_serverOnRedirectPort443 = true;
            }
        }
        // set the currently configured host:port of the repository
        s_hostInfo = "http://" + fedoraServerHost;
        if (!fedoraServerPort.equals("80") && !fedoraServerPort.equals("443")) {
            s_hostInfo = s_hostInfo + ":" + fedoraServerPort;
        }
        s_hostInfo = s_hostInfo + "/";
        s_hostContextInfo = s_hostInfo + fedoraAppServerContext + "/";

        // compile the pattern for public dissemination URLs at local server
        s_localDissemUrlStart = s_hostInfo + fedoraAppServerContext + "/get/";

        s_concreteLocalUrl =
            Pattern.compile("https?://(localhost|" + fedoraServerHost
                            + "):" + fedoraServerPort + "/");

        s_concreteLocalUrlAppContext =
            Pattern.compile("https?://(localhost|" + fedoraServerHost
                            + "):" + fedoraServerPort + "/("
                            + fedoraAppServerContext + "|fedora)/");

        s_concreteLocalUrlNoPort =
            Pattern.compile("https?://(localhost|" + fedoraServerHost
                            + ")/");

        s_concreteLocalUrlAppContextNoPort =
            Pattern.compile("https?://(localhost|" + fedoraServerHost
                            + ")/(" + fedoraAppServerContext + "|fedora)/");
    }

    /**
     * Make URLs that are relative to the local Fedora repository ABSOLUTE URLs.
     * First, see if any URLs are expressed in relative URL syntax (beginning
     * with "fedora/get" or "fedora/search") and convert these to the special
     * Fedora local URL syntax ("http://local.fedora.server/..."). Then look for
     * all URLs that contain the special Fedora local URL syntax and replace
     * instances of this string with the actual host:port configured for the
     * repository. This ensures that all forms of relative repository URLs are
     * converted to proper absolute URLs that reference the hostname:port of the
     * local Fedora repository. Examples:
     * "http://local.fedora.server/fedora/get/demo:1/DS1" is converted to
     * "http://myrepo.com:8080/fedora/get/demo:1/DS1" "fedora/get/demo:1/DS1" is
     * converted to "http://myrepo.com:8080/fedora/get/demo:1/DS1"
     * "http://local.fedora.server/fedora/get/demo:1/sdef:1/getFoo?in="
     * http://local.fedora.server/fedora/get/demo:2/DC" is converted to
     * "http://myrepo.com:8080/fedora/get/demo:1/sdef:1/getFoo?in="
     * http://myrepo.com:8080/fedora/get/demo:2/DC"
     *
     * @param m_xmlContent
     * @return String with all relative repository URLs and Fedora local URLs
     *         converted to absolute URL syntax.
     */
    public static String makeAbsoluteURLs(String input) {
        String output = input;

        // First pass: convert fedora app context URLs via variable substitution
        output =
                s_fedoraLocalAppContextPattern.matcher(output)
                        .replaceAll(s_hostContextInfo);

        // Second pass: convert non-fedora-app-context URLs via variable substitution
        output = s_fedoraLocalPattern.matcher(output).replaceAll(s_hostInfo);

        logger.debug("makeAbsoluteURLs: input=" + input + ", output=" + output);
        return output;
    }

    /**
     * Detect all forms of URLs that point to the local Fedora repository and
     * make sure they are encoded in the special Fedora local URL syntax
     * (http://local.fedora.server/..."). First, look for relative URLs that
     * begin with "fedora/get" or "fedora/search" replaces instances of these
     * string patterns with the special Fedora relative URL syntax. Then, look
     * for absolute URLs that have a host:port equal to the host:port currently
     * configured for the Fedora repository and replace host:port with the
     * special string. The special Fedora relative URL string provides a
     * consistent unique string be easily searched for and either converted back
     * to an absolute URL or a relative URL to the repository. Examples:
     * "http://myrepo.com:8080/fedora/get/demo:1/DS1" is converted to
     * "http://local.fedora.server/fedora/get/demo:1/DS1"
     * "https://myrepo.com:8443/fedora/get/demo:1/sdef:1/getFoo?in="
     * http://myrepo.com:8080/fedora/get/demo:2/DC" is converted to
     * "http://local.fedora.server/fedora/get/demo:1/sdef:1/getFoo?in="
     * http://local.fedora.server/fedora/get/demo:2/DC"
     * "http://myrepo.com:8080/saxon..." (internal service in sDep WSDL) is
     * converted to "http://local.fedora.server/saxon..."
     *
     * @param input
     * @return String with all forms of relative repository URLs converted to
     *         the Fedora local URL syntax.
     */
    public static String makeFedoraLocalURLs(String input) {
        String output = input;

        // Detect any absolute URLs that refer to the local repository
        // and convert them to the Fedora LOCALIZATION URL syntax
        // (i.e., "http://local.fedora.server/...")\

        if (m_serverOnPort80 || m_serverOnRedirectPort443) {
            output = s_concreteLocalUrlAppContextNoPort.matcher(output)
                        .replaceAll(s_fedoraLocalAppContextPattern.pattern());

            output = s_concreteLocalUrlNoPort.matcher(output)
                        .replaceAll(s_fedoraLocalPattern.pattern());
        } else {
            output = s_concreteLocalUrlAppContext.matcher(output)
                        .replaceAll(s_fedoraLocalAppContextPattern.pattern());

            output = s_concreteLocalUrl.matcher(output)
                        .replaceAll(s_fedoraLocalPattern.pattern());
        }

        logger.debug("makeFedoraLocalURLs: input=" + input + ", output=" + output);
        return output;
    }

    /**
     * Utility method to detect instances of of dsLocation URLs that use a
     * deprecated default disseminator method
     * (/fedora/get/{PID}/fedora-system:3/getItem?itemID={DSID} and replace it
     * with the new API-A-LITE syntax for getting a datastream
     * (/fedora/get/{PID}/{DSID}
     *
     * @param input
     * @return
     */
    private static String convertGetItemURLs(String input) {
        String output = input;

        // Detect the old default disseminator syntax for getting datastreams
        // (i.e., getItem), and replace with new API-A-LITE syntax.

        output = s_getItemPattern.matcher(input).replaceAll("/");
        logger.debug("convertGetItemURLs: input=" + input + ", output=" + output);
        return output;
    }

    /*
     * Utility method to normalize the value of datastream location depending on
     * the translation context. This is mainly to deal with External (E) and
     * Redirected (R) datastream locations that are self-referential to the
     * local repository (i.e., relative repository URLs) and with Managed
     * Content (M) datastreams whose location is an internal identifier. @param
     * PID The PID of the object that contains the datastream @param ds The
     * datastream whose location is to be processed @param transContext Integer
     * value indicating the serialization or deserialization context. Valid
     * values are defined as constants in
     * org.fcrepo.server.storage.translation.DOTranslationUtility:
     * 0=DOTranslationUtility.DESERIALIZE_INSTANCE
     * 1=DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC
     * 2=DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE
     * 3=DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL
     * 2=DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE @return
     */
    public static Datastream normalizeDSLocationURLs(String PID,
                                                     Datastream origDS,
                                                     int transContext) {
        Datastream ds = origDS.copy();
        if (transContext == AS_IS) {
            return ds;
        }
        if (transContext == DOTranslationUtility.DESERIALIZE_INSTANCE) {
            if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
                // MAKE ABSOLUTE REPO URLs
                ds.DSLocation = makeAbsoluteURLs(ds.DSLocation);
            }
        } else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC) {
            if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
                // MAKE ABSOLUTE REPO URLs
                ds.DSLocation = makeAbsoluteURLs(ds.DSLocation);
            } else if (ds.DSControlGrp.equals("M")) {
                //if (!ds.DSLocation.startsWith("http://localhost:8080/fedora-demo")) {

                // MAKE DISSEMINATION URLs
                if (ds.DSCreateDT == null) {
                    ds.DSLocation =
                            s_localDissemUrlStart + PID + "/" + ds.DatastreamID;
                } else {
                    ds.DSLocation =
                            s_localDissemUrlStart
                                    + PID
                                    + "/"
                                    + ds.DatastreamID
                                    + "/"
                                    + DateUtility
                                            .convertDateToString(ds.DSCreateDT);
                }
                //}
            }
        } else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE) {
            if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
                // MAKE FEDORA LOCAL REPO URLs
                ds.DSLocation = makeFedoraLocalURLs(ds.DSLocation);
            } else if (ds.DSControlGrp.equals("M")) {
                // MAKE DISSEMINATION URLs
                if (ds.DSCreateDT == null) {
                    ds.DSLocation =
                            s_localDissemUrlStart + PID + "/" + ds.DatastreamID;
                } else {
                    ds.DSLocation =
                            s_localDissemUrlStart
                                    + PID
                                    + "/"
                                    + ds.DatastreamID
                                    + "/"
                                    + DateUtility
                                            .convertDateToString(ds.DSCreateDT);
                }
            }
        } else if (transContext == DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL) {
            //String relativeLoc=ds.DSLocation;
            if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
                // MAKE FEDORA LOCAL REPO URLs
                ds.DSLocation = makeFedoraLocalURLs(ds.DSLocation);
            } else if (ds.DSControlGrp.equals("M")) {
                // MAKE INTERNAL IDENTIFIERS (PID+DSID+DSVersionID)
                ds.DSLocation =
                        PID + "+" + ds.DatastreamID + "+" + ds.DSVersionID;
            }
        } else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE) {
            if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
                // MAKE FEDORA LOCAL REPO URLs
                ds.DSLocation = makeFedoraLocalURLs(ds.DSLocation);
            } else if (ds.DSControlGrp.equals("M")) {
                // MAKE DISSEMINATION URLs
                if (ds.DSCreateDT == null) {
                    ds.DSLocation =
                            s_localDissemUrlStart + PID + "/" + ds.DatastreamID;
                } else {
                    ds.DSLocation =
                            s_localDissemUrlStart
                                    + PID
                                    + "/"
                                    + ds.DatastreamID
                                    + "/"
                                    + DateUtility
                                            .convertDateToString(ds.DSCreateDT);
                }
            }
        }

        // In any event, look for the deprecated getItem method of the default disseminator
        // (i.e., "/fedora-system:3/getItem?itemID=") and convert to new API-A-LITE syntax.
        if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
            ds.DSLocation = convertGetItemURLs(ds.DSLocation);
        }

        return ds;
    }

    /**
     * Utility method to normalize a chunk of inline XML depending on the
     * translation context. This is mainly to deal with certain inline XML
     * datastreams found in Service Deployment objects that may contain a
     * service URL that references the host:port of the local Fedora server.
     * This method will usually only ever be called to check WSDL and
     * SERVICE_PROFILE inline XML datastream, but is of general utility for
     * dealing with any datastreams that may contain URLs that reference the
     * local Fedora server. However, it this method should be used sparingly,
     * and only on inline XML datastreams where the impact of the conversions is
     * well understood.
     *
     * @param xml
     *        a chunk of XML that's contents of an inline XML datastream
     * @param transContext
     *        Integer value indicating the serialization or deserialization
     *        context. Valid values are defined as constants in
     *        org.fcrepo.server.storage.translation.DOTranslationUtility:
     *        0=DOTranslationUtility.DESERIALIZE_INSTANCE
     *        1=DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC
     *        2=DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE
     *        3=DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL
     *        4=DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE
     * @return the inline XML contents with appropriate conversions.
     */
    public static String normalizeInlineXML(String xml, int transContext) {
        if (transContext == AS_IS) {
            return xml;
        }
        if (transContext == DOTranslationUtility.DESERIALIZE_INSTANCE) {
            // MAKE ABSOLUTE REPO URLs
            return makeAbsoluteURLs(xml);
        } else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC) {
            // MAKE ABSOLUTE REPO URLs
            return makeAbsoluteURLs(xml);
        } else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE) {
            // MAKE FEDORA LOCAL REPO URLs
            return makeFedoraLocalURLs(xml);
        } else if (transContext == DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL) {
            // MAKE FEDORA LOCAL REPO URLs
            return makeFedoraLocalURLs(xml);
        } else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE) {
            // MAKE FEDORA LOCAL REPO URLs
            return makeFedoraLocalURLs(xml);
        }
        return xml;
    }

    /**
     * Check for null values in attributes and set them to empty string so
     * 'null' does not appear in XML attribute values. This helps in XML
     * validation of required attributes. If 'null' is the attribute value then
     * validation would incorrectly consider in a valid non-empty value. Also,
     * we set some other default values here.
     *
     * @param ds
     *        The Datastream object to work on.
     * @return The Datastream value with default set.
     * @throws ObjectIntegrityException
     */

    public static Datastream setDatastreamDefaults(Datastream ds)
            throws ObjectIntegrityException {

        if ((ds.DSMIME == null || ds.DSMIME.equals(""))
                && ds.DSControlGrp.equalsIgnoreCase("X")) {
            ds.DSMIME = "text/xml";
        }

        if (ds.DSState == null || ds.DSState.equals("")) {
            ds.DSState = "A";
        }

        // For METS backward compatibility
        if (ds.DSInfoType == null || ds.DSInfoType.equals("")
                || ds.DSInfoType.equalsIgnoreCase("OTHER")) {
            ds.DSInfoType = "UNSPECIFIED";
        }

        // LOOK! For METS backward compatibility:
        // If we have a METS MDClass value, and DSFormatURI isn't already
        // assigned, preserve MDClass and MDType in a DSFormatURI.
        // Note that the system is taking over the DSFormatURI in this case.
        // Therefore, if a client subsequently modifies the DSFormatURI
        // this METS legacy informatin will be lost, in which case the inline
        // datastream will default to amdSec/techMD in a subsequent METS export.
        if (ds.DSControlGrp.equalsIgnoreCase("X")) {
            if (((DatastreamXMLMetadata) ds).DSMDClass != 0
                    && ds.DSFormatURI == null) {
                String mdClassName = "";
                String mdType = ds.DSInfoType;
                String otherType = "";
                if (((DatastreamXMLMetadata) ds).DSMDClass == 1) {
                    mdClassName = "techMD";
                } else if (((DatastreamXMLMetadata) ds).DSMDClass == 2) {
                    mdClassName = "sourceMD";
                } else if (((DatastreamXMLMetadata) ds).DSMDClass == 3) {
                    mdClassName = "rightsMD";
                } else if (((DatastreamXMLMetadata) ds).DSMDClass == 4) {
                    mdClassName = "digiprovMD";
                } else if (((DatastreamXMLMetadata) ds).DSMDClass == 5) {
                    mdClassName = "descMD";
                }
                if (!mdType.equals("MARC") && !mdType.equals("EAD")
                        && !mdType.equals("DC") && !mdType.equals("NISOIMG")
                        && !mdType.equals("LC-AV") && !mdType.equals("VRA")
                        && !mdType.equals("TEIHDR") && !mdType.equals("DDI")
                        && !mdType.equals("FGDC")) {
                    mdType = "OTHER";
                    otherType = ds.DSInfoType;
                }
                ds.DSFormatURI =
                        "info:fedora/fedora-system:format/xml.mets."
                                + mdClassName + "." + mdType + "." + otherType;
            }
        }
        return ds;
    }

    /**
     * Appends XML to a PrintWriter. Essentially, just appends all text content
     * of the inputStream, trimming any leading and trailing whitespace. It does
     * his in a streaming fashion, with resource consumption entirely comprised
     * of fixed internal buffers.
     *
     * @param in
     *        InputStreaming containing serialized XML.
     * @param writer
     *        PrintWriter to write XML content to.
     * @param encoding
     *        Character set encoding.
     */
    protected static void appendXMLStream(InputStream in,
                                          PrintWriter writer,
                                          String encoding)
            throws ObjectIntegrityException, UnsupportedEncodingException,
            StreamIOException {
        if (in == null) {
            throw new ObjectIntegrityException("Object's inline xml "
                    + "stream cannot be null.");
        }
        try {
            InputStreamReader chars =
                    new InputStreamReader(in, Charset.forName(encoding));

            /* Content buffer */
            char[] charBuf = new char[4096];

            /* Beginning/ending whitespace buffer */
            char[] wsBuf = new char[4096];

            int len;
            int start;
            int end;
            int wsLen = 0;
            boolean atBeginning = true;
            while ((len = chars.read(charBuf)) != -1) {
                start = 0;
                end = len - 1;

                /* Strip out any leading whitespace */
                if (atBeginning) {
                    while (start < len) {
                        if (charBuf[start] > 0x20) break;
                        start++;
                    }
                    if (start < len) atBeginning = false;
                }

                /*
                 * Hold aside any whitespace at the end of the current chunk. If
                 * we make it to the next chunk, then append our whitespace to
                 * the buffer. Using this methodology, we may "trim" at most
                 * {buffer length} characters from the end.
                 */

                if (wsLen > 0) {
                    /* Commit previous ending whitespace */
                    writer.write(wsBuf, 0, wsLen);
                    wsLen = 0;
                }

                while (end > start) {
                    /* Buffer current ending whitespace */
                    if (charBuf[end] > 0x20) break;
                    wsBuf[wsLen] = charBuf[end];
                    wsLen++;
                    end--;
                }

                if (start < len) {
                    writer.write(charBuf, start, end + 1 - start);
                }
            }
        } catch (UnsupportedEncodingException uee) {
            throw uee;
        } catch (IOException ioe) {
            throw new StreamIOException("Error reading from inline xml datastream.");
        } finally {
            try {
                in.close();
            } catch (IOException closeProb) {
                throw new StreamIOException("Error closing read stream.");
            }
        }
    }

    /*
     * Certain serviceDeployment datastreams require special processing to
     * fix/complete URLs and do variable substitution (such as replacing
     * 'local.fedora.server' with fedora's baseURL)
     */
    public static void normalizeDatastreams(DigitalObject obj,
                                            int transContext,
                                            String characterEncoding)
            throws UnsupportedEncodingException {
        if (transContext == AS_IS) {
            return;
        }
        if (obj.hasContentModel( Models.SERVICE_DEPLOYMENT_3_0)) {
            Iterator<String> datastreams = obj.datastreamIdIterator();
            while (datastreams.hasNext()) {
                String dsid = datastreams.next();

                if (dsid.equals("WSDL") || dsid.equals("SERVICE-PROFILE")) {
                    for (Datastream d : obj.datastreams(dsid)) {
                        if (!(d instanceof DatastreamXMLMetadata)) {
                            logger.warn(obj.getPid()
                                    + " : Refusing to normalize URLs in datastream "
                                    + dsid + " because it is not inline XML");
                            continue;
                        }

                        DatastreamXMLMetadata xd = (DatastreamXMLMetadata) d;
                        if (logger.isDebugEnabled())
                            logger.debug(obj.getPid() + " : normalising URLs in "
                                + dsid);
                        xd.xmlContent =
                                DOTranslationUtility
                                        .normalizeInlineXML(new String(xd.xmlContent,
                                                                       "UTF-8"),
                                                            transContext)
                                        .getBytes(characterEncoding);
                        xd.DSSize = xd.xmlContent.length;
                    }
                }
            }
        }
    }

    @Deprecated
    public static Disseminator setDisseminatorDefaults(Disseminator diss)
            throws ObjectIntegrityException {

        // Until future when we implement selective versioning,
        // set default to true.
        diss.dissVersionable = true;

        if (diss.dissState == null || diss.dissState.equals("")) {
            diss.dissState = "A";
        }
        return diss;
    }

    protected static String oneString(String[] idList) {
        StringBuffer out = new StringBuffer();
        for (int i = 0; i < idList.length; i++) {
            if (i > 0) {
                out.append(' ');
            }
            out.append(idList[i]);
        }
        return out.toString();
    }

    /** Reads the state attribute from a DigitalObject.
     * <p>
     * Null or empty strings are interpteted as "Active".
     * </p>
     * @param obj Object that potentially contains object state data.
     * @return String containing full state value (Active, Inactive, or Deleted)
     * @throws ObjectIntegrityException thrown when the state cannot be parsed.
     */
    public static String getStateAttribute(DigitalObject obj) throws ObjectIntegrityException {

            if (obj.getState() == null || obj.getState().equals("")) {
                return MODEL.ACTIVE.localName;
            } else {
                switch (obj.getState().charAt(0)) {
                    case 'D':
                        return MODEL.DELETED.localName;
                    case 'I':
                        return MODEL.INACTIVE.localName;
                    case 'A':
                        return MODEL.ACTIVE.localName;
                    default:
                        throw new ObjectIntegrityException("Could not determine "
                                                   + "state attribute from '"
                                                   + obj.getState() + "'");
                }
            }
    }

    /** Parse and read the object state value from raw text.
     * <p>
     * Reads a text representation of object state, and returns a "state code"
     * abbreviation corresponding to that state.  Null or empty values are interpreted
     * as "Active".
     * </p>
     *
     * XXX: It might clearer to nix state codes altogether and just use the full value
     *
     * @param rawValue Raw string to parse.  May be null
     * @return String containing the state code (A, D, or I)
     * @throws ParseException thrown when state value cannot be determined
     */
    public static String readStateAttribute(String rawValue) throws ParseException {
        if (MODEL.DELETED.looselyMatches(rawValue, true)) {
            return "D";
        } else if (MODEL.INACTIVE.looselyMatches(rawValue, true)) {
            return "I";
        } else if (MODEL.ACTIVE.looselyMatches(rawValue, true)
                    || rawValue == null
                    || rawValue.equals("")) {
            return "A";
        } else {
                throw new ParseException("Could not interpret state value of '"
                                   + rawValue + "'", 0);
        }
    }

    public static RDFName getTypeAttribute(DigitalObject obj)
            throws ObjectIntegrityException {
        if (obj.hasContentModel(SERVICE_DEFINITION_3_0)) {
            return MODEL.BDEF_OBJECT;
        }
        if (obj.hasContentModel(SERVICE_DEPLOYMENT_3_0)) {
            return MODEL.BMECH_OBJECT;
        }
        if (obj.hasContentModel( CONTENT_MODEL_3_0)) {

            // FOXML 1.0 doesn't support this type; down-convert
            return MODEL.DATA_OBJECT;
        }
        if (obj.hasContentModel( FEDORA_OBJECT_3_0)) {
            return MODEL.DATA_OBJECT;
        }
        return null;
    }

    /**
     * The audit record is created by the system, so programmatic validation
     * here is o.k. Normally, validation takes place via XML Schema and
     * Schematron.
     *
     * @param audit
     * @throws ObjectIntegrityException
     */
    protected static void validateAudit(AuditRecord audit)
            throws ObjectIntegrityException {
        if (audit.id == null || audit.id.equals("")) {
            throw new ObjectIntegrityException("Audit record must have id.");
        }
        if (audit.date == null || audit.date.equals("")) {
            throw new ObjectIntegrityException("Audit record must have date.");
        }
        if (audit.processType == null || audit.processType.equals("")) {
            throw new ObjectIntegrityException("Audit record must have processType.");
        }
        if (audit.action == null || audit.action.equals("")) {
            throw new ObjectIntegrityException("Audit record must have action.");
        }
        if (audit.componentID == null) {
            audit.componentID = ""; // for backwards compatibility, no error on null
            // throw new ObjectIntegrityException("Audit record must have componentID.");
        }
        if (audit.responsibility == null || audit.responsibility.equals("")) {
            throw new ObjectIntegrityException("Audit record must have responsibility.");
        }
    }

    protected static String getAuditTrail(DigitalObject obj)
            throws ObjectIntegrityException {
        StringWriter buf = new StringWriter();
        appendAuditTrail(obj, new PrintWriter(buf));
        return buf.toString();
    }

    protected static void appendAuditTrail(DigitalObject obj, PrintWriter writer)
            throws ObjectIntegrityException {
        appendOpenElement(writer, AUDIT.AUDIT_TRAIL, true);
        for (AuditRecord audit : obj.getAuditRecords()) {
            DOTranslationUtility.validateAudit(audit);
            appendOpenElement(writer, AUDIT.RECORD, AUDIT.ID, audit.id);
            appendFullElement(writer,
                              AUDIT.PROCESS,
                              AUDIT.TYPE,
                              audit.processType);
            appendFullElement(writer, AUDIT.ACTION, audit.action);
            appendFullElement(writer, AUDIT.COMPONENT_ID, audit.componentID);
            appendFullElement(writer,
                              AUDIT.RESPONSIBILITY,
                              audit.responsibility);
            appendFullElement(writer, AUDIT.DATE, DateUtility
                    .convertDateToString(audit.date));
            appendFullElement(writer, AUDIT.JUSTIFICATION, audit.justification);
            appendCloseElement(writer, AUDIT.RECORD);
        }
        appendCloseElement(writer, AUDIT.AUDIT_TRAIL);
    }

    protected static List<AuditRecord> getAuditRecords(XMLEventReader reader)
            throws XMLStreamException {
        List<AuditRecord> records = new ArrayList<AuditRecord>();
        String inElement = null;

        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isStartElement()) {
                StartElement element = (StartElement) event;
                inElement = element.getName().getLocalPart();
                if (inElement.equals(AUDIT.RECORD.localName)) {
                    AuditRecord record = new AuditRecord();
                    java.util.Iterator<?> it = element.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = (Attribute) it.next();
                        if (attr.getName().getLocalPart()
                                .equals(AUDIT.ID.localName)) {
                            record.id = attr.getValue();
                        }
                    }
                    records.add(record);
                } else if (inElement.equals(AUDIT.PROCESS.localName)) {
                    java.util.Iterator<?> it = element.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = (Attribute) it.next();
                        if (attr.getName().getLocalPart()
                                .equals(AUDIT.TYPE.localName)) {
                            records.get(records.size() - 1).processType =
                                    attr.getValue();
                        }
                    }
                }
            }
            if (event.isEndElement()) {
                inElement = "";
            }
            if (event.isCharacters()) {
                Characters characters = (Characters) event;
                if (!records.isEmpty()) {
                    AuditRecord record = records.get(records.size() - 1);
                    if (inElement.equals(AUDIT.ACTION.localName)) {
                        record.action = characters.getData();
                    } else if (inElement.equals(AUDIT.COMPONENT_ID.localName)) {
                        record.componentID = characters.getData();
                    } else if (inElement.equals(AUDIT.DATE.localName)) {
                        record.date =
                                DateUtility.convertStringToDate(characters
                                        .getData());
                    } else if (inElement.equals(AUDIT.JUSTIFICATION.localName)) {
                        record.justification = characters.getData();
                    } else if (inElement.equals(AUDIT.RESPONSIBILITY.localName)) {
                        record.responsibility = characters.getData();
                    }
                }
            }
        }
        return records;
    }

    /**
     * Parse an audit:auditTrail and return a list of AuditRecords.
     *
     * @since 3.0
     * @param auditTrail
     * @return
     * @throws XMLStreamException
     */
    protected static List<AuditRecord> getAuditRecords(InputStream auditTrail)
            throws XMLStreamException {
        XMLEventReader eventReader;
        synchronized (m_xmlInputFactory) {
            eventReader = m_xmlInputFactory.createXMLEventReader(auditTrail);
        }
        List<AuditRecord> records = getAuditRecords(eventReader);
        eventReader.close();
        return records;
    }

    protected static List<AuditRecord> getAuditRecords(Reader auditTrail)
            throws XMLStreamException {
        XMLEventReader eventReader;
        synchronized (m_xmlInputFactory) {
            eventReader = m_xmlInputFactory.createXMLEventReader(auditTrail);
        }
        List<AuditRecord> records = getAuditRecords(eventReader);
        eventReader.close();
        return records;
    }

    private static void appendOpenElement(PrintWriter writer,
                                          QName element,
                                          boolean declareNamespace) {
        writer.print("<");
        writer.print(element.qName);
        if (declareNamespace) {
            writer.print(" xmlns:");
            writer.print(element.namespace.prefix);
            writer.print("=\"");
            writer.print(element.namespace.uri);
            writer.print("\"");
        }
        writer.print(">\n");
    }

    private static void appendOpenElement(PrintWriter writer,
                                          QName element,
                                          QName attribute,
                                          String attributeContent) {
        writer.print("<");
        writer.print(element.qName);
        writer.print(" ");
        writer.print(attribute.localName);
        writer.print("=\"");
        writer.print(StreamUtility.enc(attributeContent));
        writer.print("\">\n");
    }

    private static void appendCloseElement(PrintWriter writer, QName element) {
        writer.print("</");
        writer.print(element.qName);
        writer.print(">\n");
    }

    private static void appendFullElement(PrintWriter writer,
                                          QName element,
                                          QName attribute,
                                          String attributeContent) {
        writer.print("<");
        writer.print(element.qName);
        writer.print(" ");
        writer.print(attribute.localName);
        writer.print("=\"");
        writer.print(StreamUtility.enc(attributeContent));
        writer.print("\"/>\n");
    }

    private static void appendFullElement(PrintWriter writer,
                                          QName element,
                                          String elementContent) {
        writer.print("<");
        writer.print(element.qName);
        writer.print(">");
        writer.print(StreamUtility.enc(elementContent));
        writer.print("</");
        writer.print(element.qName);
        writer.print(">\n");
    }
}
TOP

Related Classes of org.fcrepo.server.storage.translation.DOTranslationUtility

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.