Package org.dspace.content.packager

Source Code of org.dspace.content.packager.METSManifest$Mdref

/*
* METSManifest.java
*
* Version: $Revision: 3761 $
*
* Date: $Date: 2009-05-07 04:18:02 +0000 (Thu, 07 May 2009) $
*
* Copyright (c) 2002-2009, The DSpace Foundation.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/

package org.dspace.content.packager;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.codec.binary.Base64;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
import org.dspace.content.crosswalk.MetadataValidationException;
import org.dspace.content.crosswalk.IngestionCrosswalk;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.PluginManager;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;

/**
* <P>
* Manage the METS manifest document for METS importer classes,
* such as the package importer <code>org.dspace.content.packager.MetsSubmission</code>
* and the federated importer <code>org.dspace.app.mets.FederatedMETSImport</code>
* </P>
* <P>
* It can parse the METS document, build an internal model, and give the importers
* access to that model.  It also crosswalks
* all of the descriptive and administrative metadata in the METS
* manifest into the target DSpace Item, under control of the importer.
* </P>
*
* <P>
* It reads the following DSpace Configuration entries:
* </P>
* <UL>
* <LI>Local XML schema (XSD) declarations, in the general format:
* <br><code>mets.xsd.<em>identifier</em> = <em>namespace</em> <em>xsd-URL</em></code>
* <br> eg. <code>mets.xsd.dc =  http://purl.org/dc/elements/1.1/ dc.xsd</code>
* <br>Add a separate config entry for each schema.
* </LI>
* <p><LI>Crosswalk plugin mappings:
* These tell it the name of the crosswalk plugin to invoke for metadata sections
* with a particular value of <code>MDTYPE</code> (or <code>OTHERMDTYPE</code>)
* By default, the crosswalk mechanism will look for a plugin with the
* same name as the metadata type (e.g.  <code>"MODS"</code>,
* <code>"DC"</code>).  This example line invokes the <code>QDC</code>
* plugin when <code>MDTYPE="DC"</code>
* <br><code>mets.submission.crosswalk.DC = QDC </code>
* <br> general format is:
* <br><code>mets.submission.crosswalk.<em>mdType</em> = <em>pluginName</em> </code>
* </LI>
* </UL>
*
*
* @author Robert Tansley
* @author WeiHua Huang
* @author Rita Lee
* @author Larry Stone
* @see org.dspace.content.packager.MetsSubmission
* @see org.dspace.app.mets.FederatedMETSImport
*/
public class METSManifest
{
    /**
     * Callback interface to retrieve data streams in mdRef elements.
     * "Package" or file reader returns an input stream for the
     * given relative path, e.g. to dereference <code>mdRef</code> elements.
     */
    public interface Mdref
    {
        /**
         * Make the contents of an external resource mentioned in
         * an <code>mdRef</code> element available as an <code>InputStream</code>.
         * The implementation must use the information in the
         * <code>mdRef</code> element, and the state in the object that
         * implements this interface, to find the actual metadata content.
         * <p>
         * For example, an implementation that ingests a directory of
         * files on the local filesystem would get a relative pathname
         * out of the <code>mdRef</code> and open that file.
         *
         * @param mdRef JDOM element of mdRef in the METS manifest.
         * @return stream containing the metadata mentioned in mdRef.
         * @throw MetadataValidationException if the mdRef is unacceptable or missing required information.
         * @throw IOException if it is returned by services called by this method.
         * @throw SQLException if it is returned by services called by this method.
         * @throw AuthorizeException if it is returned by services called by this method.
         */
        public InputStream getInputStream(Element mdRef)
            throws MetadataValidationException, IOException, SQLException, AuthorizeException;
    }

    /** log4j category */
    private static Logger log = Logger.getLogger(METSManifest.class);

    /** Canonical filename of METS manifest within a package or as a bitstream. */
    public final static String MANIFEST_FILE = "mets.xml";

    /** Prefix of DSpace configuration lines that map METS metadata type to
     * crosswalk plugin names.
     */
    private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk.";

    /** prefix of config lines identifying local XML Schema (XSD) files */
    private final static String CONFIG_XSD_PREFIX = "mets.xsd.";

    /** Dublin core element namespace */
    private static Namespace dcNS = Namespace
            .getNamespace("http://purl.org/dc/elements/1.1/");

    /** Dublin core term namespace (for qualified DC) */
    private static Namespace dcTermNS = Namespace
            .getNamespace("http://purl.org/dc/terms/");

    /** METS namespace -- includes "mets" prefix for use in XPaths */
    public static Namespace metsNS = Namespace
            .getNamespace("mets", "http://www.loc.gov/METS/");

    /** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */
    private static Namespace xlinkNS = Namespace
            .getNamespace("xlink", "http://www.w3.org/1999/xlink");

    /** root element of the current METS manifest. */
    private Element mets = null;

    /** all mdRef elements in the manifest */
    private List mdFiles = null;

    /** <file> elements in "original" filegroup (bundle) */
    private List contentFiles = null;

    /** builder to use for mdRef streams, inherited from create() */
    private SAXBuilder parser = null;

    // Create list of local schemas at load time, since it depends only
    // on the DSpace configuration.
    private static String localSchemas;
    static
    {
        String dspace_dir = ConfigurationManager.getProperty("dspace.dir");
        File xsdPath1 = new File(dspace_dir+"/config/schemas/");
        File xsdPath2 = new File(dspace_dir+"/config/");

        Enumeration pe = ConfigurationManager.propertyNames();
        StringBuffer result = new StringBuffer();
        while (pe.hasMoreElements())
        {
            // config lines have the format:
            //  mets.xsd.{identifier} = {namespace} {xsd-URL}
            // e.g.
            //  mets.xsd.dc =  http://purl.org/dc/elements/1.1/ dc.xsd
            // (filename is relative to {dspace_dir}/config/schemas/)
            String key = (String)pe.nextElement();
            if (key.startsWith(CONFIG_XSD_PREFIX))
            {
                String spec  = ConfigurationManager.getProperty(key);
                String val[] = spec.trim().split("\\s+");
                if (val.length == 2)
                {
                    File xsd = new File(xsdPath1, val[1]);
                    if (!xsd.exists())
                         xsd = new File(xsdPath2, val[1]);
                    if (!xsd.exists())
                        log.warn("Schema file not found for config entry=\""+spec+"\"");
                    else
                    {
                        try
                        {
                            String u = xsd.toURL().toString();
                            if (result.length() > 0)
                                result.append(" ");
                            result.append(val[0]).append(" ").append(u);
                        }
                        catch (java.net.MalformedURLException e)
                        {
                            log.warn("Skipping badly formed XSD URL: "+e.toString());
                        }
                    }
                }
                else
                    log.warn("Schema config entry has wrong format, entry=\""+spec+"\"");
            }
        }
        localSchemas = result.toString();
        log.debug("Got local schemas = \""+localSchemas+"\"");
    }

    /**
     * Default constructor, only called internally.
     * @param builder XML parser (for parsing mdRef'd files and binData)
     * @param mets parsed METS document
     */
    private METSManifest(SAXBuilder builder, Element mets)
    {
        super();
        this.mets = mets;
        parser = builder;
    }

    /**
     * Create a new manifest object from a serialized METS XML document.
     * Parse document read from the input stream, optionally validating.
     * @param is input stream containing serialized XML
     * @param validate if true, enable XML validation using schemas
     *   in document.  Also validates any sub-documents.
     * @throws MetadataValidationException if there is any error parsing
     *          or validating the METS.
     * @return new METSManifest object.
     */
    public static METSManifest create(InputStream is, boolean validate)
            throws IOException,
            MetadataValidationException
    {
        SAXBuilder builder = new SAXBuilder(validate);

        // Set validation feature
        if (validate)
            builder.setFeature("http://apache.org/xml/features/validation/schema",
                    true);

        // Tell the parser where local copies of schemas are, to speed up
        // validation.  Local XSDs are identified in the configuration file.
        if (localSchemas.length() > 0)
            builder.setProperty(
                    "http://apache.org/xml/properties/schema/external-schemaLocation",
                    localSchemas);

        // Parse the METS file
        Document metsDocument;

        try
        {
            metsDocument = builder.build(is);

            // XXX for temporary debugging
            /*
            XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
            log.debug("Got METS DOCUMENT:");
            log.debug(outputPretty.outputString(metsDocument));
              */
        }
        catch (JDOMException je)
        {
            throw new MetadataValidationException("Error validating METS in "
                    + is.toString(),  je);
        }

        return new METSManifest(builder, metsDocument.getRootElement());
    }

    /**
     * Gets name of the profile to which this METS document conforms.
     * @return value the PROFILE attribute of mets element, or null if none.
     */
    public String getProfile()
    {
        return mets.getAttributeValue("PROFILE");
    }

    /**
     * Gets all <code>file</code> elements which make up
     *   the item's content.
     * @return a List of <code>Element</code>s.
     */
    public List getContentFiles()
        throws MetadataValidationException
    {
        if (contentFiles != null)
            return contentFiles;

        Element fileSec = mets.getChild("fileSec", metsNS);
        if (fileSec == null)
            throw new MetadataValidationException("Invalid METS Manifest: DSpace requires a fileSec element, but it is missing.");

        contentFiles = new ArrayList();
        Iterator fgi = fileSec.getChildren("fileGrp", metsNS).iterator();
        while (fgi.hasNext())
        {
            Element fg = (Element)fgi.next();
            Iterator fi = fg.getChildren("file", metsNS).iterator();
            while (fi.hasNext())
            {
                Element f = (Element)fi.next();
                contentFiles.add(f);
            }
        }
        return contentFiles;
    }

    /**
     * Gets list of all <code>mdRef</code> elements in the METS
     *   document.  Used by ingester to e.g. check that all
     *   required files are present.
     * @return a List of <code>Element</code>s.
     */
    public List getMdFiles()
        throws MetadataValidationException
    {
        if (mdFiles == null)
        {
            try
            {
                // Use a special namespace with known prefix
                // so we get the right prefix.
                XPath xpath = XPath.newInstance("descendant::mets:mdRef");
                xpath.addNamespace(metsNS);
                mdFiles = xpath.selectNodes(mets);
            }
            catch (JDOMException je)
            {
                throw new MetadataValidationException("Failed while searching for mdRef elements in manifest: ", je);
            }
        }
        return mdFiles;
    }

    /**
     * Get the "original" file element for a derived file.
     * Finds the original from which this was derived by matching the GROUPID
     * attribute that binds it to its original.  For instance, the file for
     * a thumbnail image would have the same GROUPID as its full-size version.
     * <p>
     * NOTE: This pattern of relating derived files through the GROUPID
     * attribute is peculiar to the DSpace METS SIP profile, and may not be
     * generally useful with other sorts of METS documents.
     * @param file METS file element of derived file
     * @return file Element of original or null if none found.
     */
    public Element getOriginalFile(Element file)
    {
        String groupID = file.getAttributeValue("GROUPID");
        if (groupID == null || groupID.equals(""))
            return null;

        try
        {
            XPath xpath = XPath.newInstance(
"mets:fileSec/mets:fileGrp[@USE=\"CONTENT\"]/mets:file[@GROUPID=\""+groupID+"\"]");
            xpath.addNamespace(metsNS);
            List oFiles = xpath.selectNodes(mets);
            if (oFiles.size() > 0)
            {
                log.debug("Got ORIGINAL file for derived="+file.toString());
                return (Element)oFiles.get(0);
            }
            else
                return null;
        }
        catch (JDOMException je)
        {
            log.warn("Got exception on XPATH looking for Original file, "+je.toString());
            return null;
        }
    }

    // translate bundle name from METS to DSpace; METS may be "CONTENT"
    // or "ORIGINAL" for the DSPace "ORIGINAL", rest are left alone.
    private static String normalizeBundleName(String in)
    {
        if (in.equals("CONTENT"))
            return Constants.CONTENT_BUNDLE_NAME;
        else if (in.equals("MANIFESTMD"))
            return Constants.METADATA_BUNDLE_NAME;
        return in;
    }

    /**
     * Get the DSpace bundle name corresponding to the <code>USE</code> attribute of the file group enclosing this <code>file</code> element.
     * @return DSpace bundle name
     * @throws MetadataValidationException when there is no USE attribute on the enclosing fileGrp.
     */
    public static String getBundleName(Element file)
        throws MetadataValidationException
    {
        Element fg = file.getParentElement();
        String fgUse = fg.getAttributeValue("USE");
        if (fgUse == null)
            throw new MetadataValidationException("Invalid METS Manifest: every fileGrp element must have a USE attribute.");
        return normalizeBundleName(fgUse);
    }

    /**
     * Get the "local" file name of this <code>file</code> or <code>mdRef</code> element.
     * By "local" we mean the reference to the actual resource containing
     * the data for this file, e.g. a relative path within a Zip or tar archive
     * if the METS is serving as a manifest for that sort of package.
     * @return "local" file name (i.e.  relative to package or content
     *  directory) corresponding to this <code>file</code> or <code>mdRef</code> element.
     * @throws MetadataValidationException when there is not enough information to find a resource identifier.
     */
    public static String getFileName(Element file)
        throws MetadataValidationException
    {
        Element ref;
        if (file.getName().equals("file"))
        {
            ref = file.getChild("FLocat", metsNS);
            if (ref == null)
            {
                // check for forbidden FContent child first:
                if (file.getChild("FContent", metsNS) == null)
                    throw new MetadataValidationException("Invalid METS Manifest: Every file element must have FLocat child.");
                else
                    throw new MetadataValidationException("Invalid METS Manifest: file element has forbidden FContent child, only FLocat is allowed.");
            }
        }
        else if (file.getName().equals("mdRef"))
            ref = file;
        else
            throw new MetadataValidationException("getFileName() called with recognized element type: "+file.toString());
        String loctype = ref.getAttributeValue("LOCTYPE");
        if (loctype != null && loctype.equals("URL"))
        {
            String result = ref.getAttributeValue("href", xlinkNS);
            if (result == null)
                throw new MetadataValidationException("Invalid METS Manifest: FLocat/mdRef is missing the required xlink:href attribute.");
            return result;
        }
        throw new MetadataValidationException("Invalid METS Manifest: FLocat/mdRef does not have LOCTYPE=\"URL\" attribute.");
    }

    /**
     * Returns file element corresponding to primary bitstream.
     * There is <i>ONLY</i> a primary bitstream if the first <code>div</code> under
     * first </code>structMap</code> has an </code>fptr</code>.
     *
     * @return file element of Item's primary bitstream, or null if there is none.
     */
    public Element getPrimaryBitstream()
        throws MetadataValidationException
    {
        Element firstDiv = getFirstDiv();
        Element fptr = firstDiv.getChild("fptr", metsNS);
        if (fptr == null)
            return null;
        String id = fptr.getAttributeValue("FILEID");
        if (id == null)
            throw new MetadataValidationException("fptr for Primary Bitstream is missing the required FILEID attribute.");
        Element result = getElementByXPath("descendant::mets:file[@ID=\""+id+"\"]", false);
        if (result == null)
            throw new MetadataValidationException("Cannot find file element for Primary Bitstream: looking for ID="+id);
        return result;
    }

    /** Get the metadata type from within a *mdSec element.
     * @return metadata type name.
     */
    public String getMdType(Element mdSec)
        throws MetadataValidationException
    {
        Element md = mdSec.getChild("mdRef", metsNS);
        if (md == null)
            md = mdSec.getChild("mdWrap", metsNS);
        if (md == null)
            throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element has neither mdRef nor mdWrap child.");
        String result = md.getAttributeValue("MDTYPE");
        if (result != null && result.equals("OTHER"))
            result = md.getAttributeValue("OTHERMDTYPE");
        if (result == null)
            throw new MetadataValidationException("Invalid METS Manifest: "+md.getName()+" has no MDTYPE or OTHERMDTYPE attribute.");
        return result;
    }

    /**
     *  Returns MIME type of metadata content, if available.
     *  @return MIMEtype word, or null if none is available.
     */
    public String getMdContentMimeType(Element mdSec)
        throws MetadataValidationException
    {
        Element mdWrap = mdSec.getChild("mdWrap", metsNS);
        if (mdWrap != null)
        {
            String mimeType = mdWrap.getAttributeValue("MIMETYPE");
            if (mimeType == null && mdWrap.getChild("xmlData", metsNS) != null)
            mimeType = "text/xml";
            return mimeType;
        }
        Element mdRef = mdSec.getChild("mdRef", metsNS);
        if (mdRef != null)
            return mdRef.getAttributeValue("MIMETYPE");
        return null;
    }

    /**
     * Return contents of *md element as List of XML Element objects.
     * Gets content, dereferecing mdRef if necessary, or decoding and parsing
     * a binData that contains XML.
     * @return contents of metadata section, or empty list if no XML content is available.
     * @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML.
     */
    public List getMdContentAsXml(Element mdSec, Mdref callback)
        throws MetadataValidationException, IOException, SQLException, AuthorizeException
    {
        try
        {
            Element mdRef = null;
            Element mdWrap = mdSec.getChild("mdWrap", metsNS);
            if (mdWrap != null)
            {
                Element xmlData = mdWrap.getChild("xmlData", metsNS);
                if (xmlData == null)
                {
                    Element bin = mdWrap.getChild("binData", metsNS);
                    if (bin == null)
                        throw new MetadataValidationException("Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");

                    // if binData is actually XML, return it; otherwise ignore.
                    else
                    {
                        String mimeType = mdWrap.getAttributeValue("MIMETYPE");
                        if (mimeType != null && mimeType.equalsIgnoreCase("text/xml"))
                        {
                            byte value[] = Base64.decodeBase64(bin.getText().getBytes());
                            Document mdd = parser.build(new ByteArrayInputStream(value));
                            List result = new ArrayList(1);
                            result.add(mdd.getRootElement());
                            return result;
                        }
                        else
                        {
                            log.warn("Ignoring binData section because MIMETYPE is not XML, but: "+mimeType);
                            return new ArrayList(0);
                        }
                   }
                }
                else
                {
                    return xmlData.getChildren();
                }
            }
            else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null)
            {
                String mimeType = mdRef.getAttributeValue("MIMETYPE");
                if (mimeType != null && mimeType.equalsIgnoreCase("text/xml"))
                {
                    Document mdd = parser.build(callback.getInputStream(mdRef));
                    List result = new ArrayList(1);
                    result.add(mdd.getRootElement());
                    return result;
                }
                else
                {
                    log.warn("Ignoring mdRef section because MIMETYPE is not XML, but: "+mimeType);
                    return new ArrayList(0);
                }
            }
            else
                throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
        }
        catch (JDOMException je)
        {
            throw new MetadataValidationException("Error parsing or validating metadata section in mdRef or binData within "+mdSec.toString(), je);
        }

    }

    /**
     * Return contents of *md element as stream.
     * Gets content, dereferecing mdRef if necessary, or decoding
     * a binData element if necessary.
     * @return Stream containing contents of metadata section.  Never returns null.
     * @throws MetadataValidationException if METS format does not contain any metadata.
     */
    public InputStream getMdContentAsStream(Element mdSec, Mdref callback)
        throws MetadataValidationException, IOException, SQLException, AuthorizeException
    {
        Element mdRef = null;
        Element mdWrap = mdSec.getChild("mdWrap", metsNS);
        if (mdWrap != null)
        {
            Element xmlData = mdWrap.getChild("xmlData", metsNS);
            if (xmlData == null)
            {
                Element bin = mdWrap.getChild("binData", metsNS);
                if (bin == null)
                    throw new MetadataValidationException("Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");

                else
                {
                    byte value[] = Base64.decodeBase64(bin.getText().getBytes());
                    return new ByteArrayInputStream(value);
                }
            }
            else
            {
                XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
                return new ByteArrayInputStream(
                        outputPretty.outputString(xmlData.getChildren()).getBytes());
            }
        }
        else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null)
        {
            return callback.getInputStream(mdRef);
        }
        else
            throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
    }


    // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec)
    // because mdRef and mdWrap have to be handled differently.
    // It's a lot like getMdContentAsXml but cannot use that because xwalk
    // should be called with root element OR list depending on what was given.
    private void crosswalkMdContent(Element mdSec, Mdref callback,
                IngestionCrosswalk xwalk, Context context, DSpaceObject dso)
        throws CrosswalkException, IOException, SQLException, AuthorizeException
    {
        List xml = getMdContentAsXml(mdSec,callback);

        // if we get inappropriate metadata, e.g. PREMIS for Item, let it go.
        try
        {
            xwalk.ingest(context, dso, xml);
        }
        catch (CrosswalkObjectNotSupported e)
        {
            log.warn("Skipping metadata for inappropriate type of object: Object="+dso.toString()+", error="+e.toString());
        }
    }

    // return first <div> of first <structMap>;
    // in DSpace profile, this is where item-wide dmd and other metadata
    // lives as IDrefs.
    private Element getFirstDiv()
        throws MetadataValidationException
    {
        Element sm = mets.getChild("structMap", metsNS);
        if (sm == null)
            throw new MetadataValidationException("METS document is missing the required structMap element.");

        Element result = sm.getChild("div", metsNS);
        if (result == null)
            throw new MetadataValidationException("METS document is missing the required first div element in first structMap.");

        log.debug("Got firstDiv result="+result.toString());
        return (Element)result;
    }

    // return a single Element node found by one-off path.
    // use only when path varies each time you call it.
    private Element getElementByXPath(String path, boolean nullOk)
        throws MetadataValidationException
    {
        try
        {
            XPath xpath = XPath.newInstance(path);
            xpath.addNamespace(metsNS);
            xpath.addNamespace(xlinkNS);
            Object result = xpath.selectSingleNode(mets);
            if (result == null && nullOk)
                return null;
            else if (result instanceof Element)
                return (Element)result;
            else
                throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\""+path+"\"");
        }
        catch (JDOMException je)
        {
            throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\""+path+"\"", je);
        }
    }

    // Find crosswalk for the indicated metadata type (e.g. "DC", "MODS")
    // The crosswalk plugin name MAY be indirected in config file,
    // through an entry like
    //  mets.submission.crosswalk.{mdType} = {pluginName}
    //   e.g.
    //  mets.submission.crosswalk.DC = mysite-QDC
    private IngestionCrosswalk getCrosswalk(String type)
    {
        String xwalkName = ConfigurationManager.getProperty(CONFIG_METADATA_PREFIX + type);
        if (xwalkName == null)
            xwalkName = type;
        return (IngestionCrosswalk)
          PluginManager.getNamedPlugin(IngestionCrosswalk.class, xwalkName);
    }

    /**
     * Gets all dmdSec elements containing metadata for the DSpace Item.
     *
     * @return array of Elements, each a dmdSec.  May be empty but NOT null.
     * @throws MetadataValidationException if the METS is missing a reference to item-wide
     *          DMDs in the correct place.
     */
    public Element[] getItemDmds()
        throws MetadataValidationException
    {
        // div@DMDID is actually IDREFS, a space-separated list of IDs:
        Element firstDiv = getFirstDiv();
        String dmds = firstDiv.getAttributeValue("DMDID");
        if (dmds == null)
            throw new MetadataValidationException("Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have a DMDID attribute.");
        String dmdID[] = dmds.split("\\s+");
        Element result[] = new Element[dmdID.length];

        for (int i = 0; i < dmdID.length; ++i)
            result[i] = getElementByXPath("mets:dmdSec[@ID=\""+dmdID[i]+"\"]", false);
        return result;
    }

    /**
     * Return rights metadata section(s) relevant to item as a whole.
     * @return array of rightsMd elements, possibly empty but never null.
     * @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
     */
    public Element[] getItemRightsMD()
        throws MetadataValidationException
    {
        // div@ADMID is actually IDREFS, a space-separated list of IDs:
        Element firstDiv = getFirstDiv();
        String amds = firstDiv.getAttributeValue("ADMID");
        if (amds == null)
        {
            log.debug("getItemRightsMD: No ADMID references found.");
            return new Element[0];
        }
        String amdID[] = amds.split("\\s+");
        List resultList = new ArrayList();
        for (int i = 0; i < amdID.length; ++i)
        {
            List rmds = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false).
                            getChildren("rightsMD", metsNS);
            if (rmds.size() > 0)
                resultList.addAll(rmds);
        }
        return (Element[])resultList.toArray(new Element[resultList.size()]);
    }

    /**
     * Invokes appropriate crosswalks on Item-wide descriptive metadata.
     */
    public void crosswalkItem(Context context, Item item, Element dmd, Mdref callback)
        throws MetadataValidationException,
               CrosswalkException, IOException, SQLException, AuthorizeException
    {
        String type = getMdType(dmd);
        IngestionCrosswalk xwalk = getCrosswalk(type);

        if (xwalk == null)
            throw new MetadataValidationException("Cannot process METS Manifest: "+
                "No crosswalk found for MDTYPE="+type);
        crosswalkMdContent(dmd, callback, xwalk, context, item);
    }

    /**
     * Crosswalk the metadata associated with a particular <code>file</code>
     * element into the bitstream it corresponds to.
     * @param context a dspace context.
     * @param bs bitstream target of the crosswalk
     * @param fileId value of ID attribute in the file element responsible
     *  for the contents of that bitstream.
     */
    public void crosswalkBitstream(Context context, Bitstream bitstream,
                                   String fileId, Mdref callback)
        throws MetadataValidationException,
               CrosswalkException, IOException, SQLException, AuthorizeException
    {
        Element file = getElementByXPath("descendant::mets:file[@ID=\""+fileId+"\"]", false);
        if (file == null)
            throw new MetadataValidationException("Failed in Bitstream crosswalk, Could not find file element with ID="+fileId);

        // In DSpace METS SIP spec, admin metadata is only "highly
        // recommended", not "required", so it is OK if there is no ADMID.
        String amds = file.getAttributeValue("ADMID");
        if (amds == null)
        {
            log.warn("Got no bitstream ADMID, file@ID="+fileId);
            return;
        }
        String amdID[] = amds.split("\\s+");
        for (int i = 0; i < amdID.length; ++i)
        {
            List techMDs = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false).
                                 getChildren("techMD", metsNS);
            Iterator ti = techMDs.iterator();
            while (ti.hasNext())
            {
                Element techMD = (Element)ti.next();
                if (techMD != null)
                {
                    String type = getMdType(techMD);
                    IngestionCrosswalk xwalk = getCrosswalk(type);
                    log.debug("Got bitstream techMD of type="+type+", for file ID="+fileId);
                    
                    if (xwalk == null)
                        throw new MetadataValidationException("Cannot process METS Manifest: "+
                            "No crosswalk found for techMD MDTYPE="+type);
                    crosswalkMdContent(techMD, callback, xwalk, context, bitstream);
                }
            }
        }
    }

    /**
     * Find Handle (if any) identifier labelling this manifest.
     * @return handle (never null)
     * @throws MetadataValidationException if no handle available.
     */
    public String getHandle()
        throws MetadataValidationException
    {
        // TODO: XXX Make configurable? Handle optionally passed in?
        // FIXME: Not sure if OBJID is really the right place

        String handle = mets.getAttributeValue("OBJID");

        if (handle != null && handle.startsWith("hdl:"))
        {
            return handle.substring(4);
        }
        else
        {
            throw new MetadataValidationException("Item has no valid Handle (OBJID)");
        }
    }
}
TOP

Related Classes of org.dspace.content.packager.METSManifest$Mdref

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.