Package eu.planets_project.services.validation.odfvalidator.utils

Source Code of eu.planets_project.services.validation.odfvalidator.utils.CoreOdfValidator

package eu.planets_project.services.validation.odfvalidator.utils;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;

import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import eu.planets_project.services.datatypes.Parameter;
import eu.planets_project.services.utils.ProcessRunner;

public class CoreOdfValidator {
 
 
  private static final String DOC_STRICT_SCHEMA_URL_MARKER = "doc-strict-schema-url=";
  private static final String MANIFEST_SCHEMA_URL_MARKER = "manifest-schema-url=";
  private static final String DOC_SCHEMA_URL_MARKER = "doc-schema-url=";
  private static final String DSIG_SCHEMA_URL_MARKER = "dsig-schema-url=";
 
  private static final String USER_DSIG_SCHEMA_PARAM = "user-dsig-schema";
  private static final String USER_DOC_STRICT_SCHEMA_PARAM = "user-doc-strict-schema";
  private static final String USER_DOC_SCHEMA_PARAM = "user-doc-schema";
  private static final String USER_MANIFEST_SCHEMA_PARAM = "user-manifest-schema";
  private static String STRICT_VALIDATION_PARAM = "strict-validation";
 
  private static final String FORMULA_MIMETYPE = "application/vnd.oasis.opendocument.formula";
 
  // Flag section
  private static boolean STRICT_VALIDATION = false;
  private static boolean USE_USER_DOC_SCHEMA = false;
  private static boolean USE_USER_DOC_STRICT_SCHEMA = false;
  private static boolean USE_USER_MANIFEST_SCHEMA = false;
  private static boolean USE_USER_DSIG_SCHEMA = false;
 
  // User schema files, if provided
  private static File USER_DOC_SCHEMA = null;
  private static File USER_DOC_STRICT_SCHEMA = null;
  private static File USER_MANIFEST_SCHEMA = null;
  private static File USER_DSIG_SCHEMA = null;
 
  private static HashMap<String, File> schemaList = new HashMap<String, File>();
 
  private static OdfSchemaHandler schemaHandler = OdfSchemaHandlerFactory.getSchemaHandlerInstance();
  private static OdfContentHandler contentHandler = null;
 
  private static String version = null;
   
  private static final String JING_HOME = System.getenv("JING_HOME");
  private static final String JING = "jing.jar";
 
  private static Logger log = Logger.getLogger(CoreOdfValidator.class.getName());
 
  private static OdfValidatorResult result = null;
 
  private static String mimeType = null;
 
 
 
 
  public OdfValidatorResult validate(File odfFile, List<Parameter> parameters) {
    log.setLevel(Level.INFO);
    log.info("JING installation detected: " + JING_HOME);
    contentHandler = new OdfContentHandler(odfFile);
    result = new OdfValidatorResult(odfFile.getName());
    result.setOdfGenerator(contentHandler.getOdfGenerator());
   
    // check if the input file is an ODF file at all
    if(!contentHandler.isOdfFile()) {
      result.setError(odfFile, "The input file '" + odfFile.getName() + "' is NOT an ODF file!");
      return result;
    }
    result.setIsOdfFile(contentHandler.isOdfFile());
   
   
    // File is ODF spec compliant, i.e. all mandatory files are included in container?
    boolean isCompliant = contentHandler.isOdfCompliant();
    result.setIsOdfCompliant(isCompliant);
    // if it is not compliant, e.g. if manifest entries are found that are not present in the container,
    // list the missing entries in the result.
    if(!isCompliant) {
      result.setMissingManifestEntries(contentHandler.getMissingManifestEntries());
    }
   
    // list the contained subfiles in this ODF container, as not all sub files are mandatory!
    List<File> xmlParts = new ArrayList<File>(contentHandler.getOdfSubFiles());
    // list them in the result
    result.setOdfSubFiles(xmlParts);
   
    // get the version of this ODF file and note in result
    version = contentHandler.getOdfVersion();
    result.setOdfVersion(version);
   
    // get the mimetype of this ODF file
    mimeType = contentHandler.getMimeType();
    // if this is a formula file, get the version of the embedded MathML
    if(mimeType.equalsIgnoreCase(FORMULA_MIMETYPE) || contentHandler.containsEmbeddedMathML()) {
      result.setMathMLVersion(contentHandler.getMathMLVersion()); // and set it in the result
    }
    // set the mimetype
    result.setMimeType(mimeType);
   
    // check parameters
    parseParameters(parameters);
   
    // get all necessary schemas, depending on ODF version and mimetype (MathML/formula)
    collectSchemas();
   
    log.info("Validating input file of mimeType = '" + mimeType + "'");
   
    // validate all relevant sub files in this ODF container
    for (File file : xmlParts) {
      result = validateFile(file, result);
    }
   
    // validated in strict mode? Note it in result...
    if(STRICT_VALIDATION) {
      result.setUsedStrictValidation(STRICT_VALIDATION);
    }
   
    // check if all used Namespaces are correct.
    if(!contentHandler.allNamespacesCorrect()) {
      result.setAllNamespacesCorrect(contentHandler.allNamespacesCorrect());
      Set<File> filesWithWarnings = contentHandler.getNsWarnings().keySet();
     
      // note the warnings for each erroneous namespace in the result:
      for (File file : filesWithWarnings) {
        result.setWarning(file, contentHandler.getNsWarnings().get(file));
      }
    }
   
    reset();
    return result;
  }
 
  private static String checkParentName(File subFile) {
    String parentName = subFile.getParentFile().getName();
    if(!parentName.contains("XML_CONTENT_")
        && !parentName.contains("META-INF")
        && !parentName.contains("ODFVALIDATOR_INPUT")) {
      return parentName + "/";
    }
    else {
      return "";
    }
   
  }
 
  private static OdfValidatorResult validateFile(File odfSubFile, OdfValidatorResult result) {
    String name = odfSubFile.getName();

    // Do we have a FORMULA (MathML) file?   
    if(name.equalsIgnoreCase(OdfContentHandler.CONTENT_XML)) {
      if(mimeType.equalsIgnoreCase(FORMULA_MIMETYPE) || contentHandler.subFileContainsMathML(odfSubFile)) {
        result = validateMathML(odfSubFile, schemaList.get("mathml"), result);
      }
      else {
        result = validateSubFile(odfSubFile, schemaList.get("doc"), result);
      }
    }
   
    // do we have the manifest.xml file here? Then validate it against the manifest schema!
    if(name.equalsIgnoreCase(OdfContentHandler.MANIFEST_XML)) {
      result = validateSubFile(odfSubFile, schemaList.get("manifest"), result);
    }
   
    // do we have a signature file here, then validate it against the dsig schema
    if(version.equalsIgnoreCase(OdfSchemaHandler.ODF_v1_2) && name.equalsIgnoreCase(OdfContentHandler.DOC_DSIGS_XML)
        || name.equalsIgnoreCase(OdfContentHandler.MACRO_DSIGS_XML)) {
      result = validateSubFile(odfSubFile, schemaList.get("dsig"), result);
    }
   
    // Or do we have a 'normal' ODF subfile (content.xml, settings.xml, styles.xml, meta.xml)?
    if(name.equalsIgnoreCase(OdfContentHandler.SETTINGS_XML)
        || name.equalsIgnoreCase(OdfContentHandler.STYLES_XML)
        || name.equalsIgnoreCase(OdfContentHandler.META_XML)) {
      result = validateSubFile(odfSubFile, schemaList.get("doc"), result);
    }
    return result;
  }
 
 
  private static OdfValidatorResult validateSubFile(File odfSubFile, File schema, OdfValidatorResult result) {
    ProcessRunner validator = new ProcessRunner();
    validator.setCommand(getJingValidateCmd(odfSubFile, schema));
    validator.run();
   
    String out = validator.getProcessOutputAsString();
 
    if(out.equalsIgnoreCase("")) {
      result.setValid(odfSubFile, true);
      log.info("'" + checkParentName(odfSubFile) + odfSubFile.getName() + "' is valid: " + result.componentIsValid(odfSubFile));
     
    }
    else {
      result.setValid(odfSubFile, false);
      result.setError(odfSubFile, out);
      log.info("'" + checkParentName(odfSubFile) + odfSubFile.getName() + "' is valid: " + result.componentIsValid(odfSubFile));
      log.info("Message: " + out);
    }
    return result;
  }

  private static OdfValidatorResult validateMathML(File mathmlFile, File mathmlSchema, OdfValidatorResult result) {

    SchemaFactory factory =
        SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
   
    Schema schema = null;
    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setValidating(true);
    spf.setNamespaceAware(true);
   
    SAXParser parser;
    try {
      schema = factory.newSchema(mathmlSchema);
      spf.setSchema(schema);
      parser = spf.newSAXParser();
     
      File cleanedMathML = contentHandler.cleanUpXmlForValidation(mathmlFile);
     
      if(contentHandler.containsDocTypeDeclaration()) {
        result.setWarning(mathmlFile, "Detected MathML version = '" + result.getMathMLVersion() + "': To enable validation against the " +
            "MathML 2.0 Schema, DOCTYPE declaration is ignored!");
      }
      else {
        result.setWarning(mathmlFile, "Detected MathML version = '" + result.getMathMLVersion() + "': Using MathML 2.0 schema for validation");
      }
     
     
      parser.parse(cleanedMathML, new DefaultHandler());
    } catch (ParserConfigurationException e1) {
      result.setValid(mathmlFile, false);
      result.setError(mathmlFile, e1.getMessage());
      log.info("'" + checkParentName(mathmlFile) + mathmlFile.getName() + "' is valid: " + result.componentIsValid(mathmlFile));
      return result;
    } catch (SAXException e1) {
      result.setValid(mathmlFile, false);
      result.setError(mathmlFile, e1.getMessage());
      log.info("'" + checkParentName(mathmlFile) + mathmlFile.getName() + "' is valid: " + result.componentIsValid(mathmlFile));
      return result;
    } catch (IOException e) {
      result.setValid(mathmlFile, false);
      result.setError(mathmlFile, e.getMessage());
      log.info("'" + checkParentName(mathmlFile) + mathmlFile.getName() + "' is valid: " + result.componentIsValid(mathmlFile));
      return result;
    }
   
    result.setValid(mathmlFile, true);
    log.info("'" + checkParentName(mathmlFile) + mathmlFile.getName() + "' is valid: " + result.componentIsValid(mathmlFile));
    return result;
  }
 
 
  private static ArrayList<String> getJingValidateCmd(File odfXmlFile, File schemaFile) {
    ArrayList<String> cmd = new ArrayList<String>();
    cmd.add("java");
    cmd.add("-jar");
    cmd.add(JING_HOME + File.separator + JING);
    cmd.add("-i");
    cmd.add(schemaFile.getAbsolutePath());
    cmd.add(odfXmlFile.getAbsolutePath());
    return cmd;
  }
 
  private static ArrayList<String> getJingVersionCmd() {
    ArrayList<String> cmd = new ArrayList<String>();
    cmd.add("java");
    cmd.add("-jar");
    cmd.add(JING_HOME + File.separator + JING);
    return cmd;
  }
 
  private void collectSchemas() {
    if(mimeType.equalsIgnoreCase(FORMULA_MIMETYPE)
        || contentHandler.containsEmbeddedMathML()) {
      schemaList.put("mathml", schemaHandler.getMathMLSchema());
      result.setMathMLSchema(schemaList.get("mathml"));
    }
   
    if(USE_USER_DOC_SCHEMA) {
      if(USE_USER_DOC_STRICT_SCHEMA) {
        schemaList.put("doc", USER_DOC_STRICT_SCHEMA);
        result.setStrictDocSchema(USER_DOC_STRICT_SCHEMA);
        result.setDocumentSchema(USER_DOC_STRICT_SCHEMA);
      }
      else {
        schemaList.put("doc", USER_DOC_SCHEMA);
        result.setDocumentSchema(USER_DOC_SCHEMA);
      }
    }
    else {
      schemaList.put("doc", schemaHandler.getDocumentSchema(version, STRICT_VALIDATION));
      result.setDocumentSchema(schemaList.get("doc"));
    }
    if(USE_USER_MANIFEST_SCHEMA) {
      schemaList.put("manifest", USER_MANIFEST_SCHEMA);
      result.setManifestSchema(USER_MANIFEST_SCHEMA);
    }
    else {
      schemaList.put("manifest", schemaHandler.getManifestSchema(version));
      result.setManifestSchema(schemaList.get("manifest"));
    }
    if(contentHandler.containsDsigSubFiles()) {
      if(version.equalsIgnoreCase(OdfSchemaHandler.ODF_v1_2)) {
        if(USE_USER_DSIG_SCHEMA) {
          schemaList.put("dsig", USER_DSIG_SCHEMA);
          result.setDsigSchema(USER_DSIG_SCHEMA);
        }
        else {
          schemaList.put("dsig", schemaHandler.getDsigSchema(version));
          result.setDsigSchema(schemaList.get("dsig"));
        }
      }
    }
   
  }

  private static void parseParameters(List<Parameter> parameters) {
    if(parameters!=null && parameters.size()>0) {
      for (Parameter parameter : parameters) {
        String name = parameter.getName();
       
        // Check if a custom user DSIG schema for validation is passed...
        if(name.equalsIgnoreCase(USER_DSIG_SCHEMA_PARAM)) {
          if(version.equalsIgnoreCase(OdfSchemaHandler.ODF_v1_2)) {
            String value = parameter.getValue();
            URL dsig_schema_url = parseForURL(value);
            if(dsig_schema_url!=null) {
              USER_DSIG_SCHEMA = schemaHandler.createUserDsigSchemaFromUrl(version, dsig_schema_url);
            }
            else {
              USER_DSIG_SCHEMA = schemaHandler.createUserDsigSchema(value);
            }
            USE_USER_DSIG_SCHEMA = true;
            continue;
          }
        }
        if(name.equalsIgnoreCase(STRICT_VALIDATION_PARAM)) {
          if(!version.equalsIgnoreCase(OdfSchemaHandler.ODF_v1_2)) {
            STRICT_VALIDATION = Boolean.parseBoolean(parameter.getValue());
          }
          continue;
        }
        // Check for USER_MANIFEST_SCHEMA
        if(name.equalsIgnoreCase(USER_MANIFEST_SCHEMA_PARAM)) {
          String value = parameter.getValue();
          // check if a URL to a schema is passed?
          URL manifest_schema_url = parseForURL(value);
          if(manifest_schema_url!=null) {
            USER_MANIFEST_SCHEMA = schemaHandler.createUserManifestSchemaFromUrl(version, manifest_schema_url);
          }
          else {
            USER_MANIFEST_SCHEMA = schemaHandler.createUserManifestSchema(value);
          }
          USE_USER_MANIFEST_SCHEMA = true;
          continue;
        }
        // check for USER_DOC_SCHEMA?
        if(name.equalsIgnoreCase(USER_DOC_SCHEMA_PARAM)) {
          String value = parameter.getValue();
          URL doc_schema_url = parseForURL(value);
          if(doc_schema_url!=null) {
            USER_DOC_SCHEMA = schemaHandler.createUserDocSchemaFromUrl(version, doc_schema_url);
          }
          else {
            USER_DOC_SCHEMA = schemaHandler.createUserDocSchema(version, value);
          }
          USE_USER_DOC_SCHEMA = true;
          continue;
        }
       
        if(name.equalsIgnoreCase(USER_DOC_STRICT_SCHEMA_PARAM)) {
          String value = parameter.getValue();
          URL strict_schema_url = parseForURL(value);
         
          if(USE_USER_DOC_SCHEMA) {
            if(strict_schema_url!=null) {
              USER_DOC_STRICT_SCHEMA = schemaHandler.createUserDocStrictSchemaFromUrl(version, strict_schema_url, USER_DOC_SCHEMA);
            }
            else {
              USER_DOC_STRICT_SCHEMA = schemaHandler.createUserDocStrictSchema(version, value, USER_DOC_SCHEMA);
            }
            USE_USER_DOC_STRICT_SCHEMA = true;
            STRICT_VALIDATION = true;
            continue;
          }
          else {
            log.warning("Strict user schema provided, but missing doc schema! Please provide the doc schema first, because it is referenced in the strict schema! Then try again, thanks!");
            log.warning("Using default schemas instead!");
          }
        }
      }
      if(STRICT_VALIDATION && USE_USER_DOC_SCHEMA && !USE_USER_DOC_STRICT_SCHEMA) {
        log.warning("WARNING: You have enabled STRICT VALIDATION and passed only a not-strict user-doc-schema! Disabling STRICT_VALIDATION!");
        STRICT_VALIDATION = false;
      }
    }
   
  }

  private static URL parseForURL(String parameterValue) {
    URL url = null;
    if(parameterValue.contains(DOC_SCHEMA_URL_MARKER)
        || parameterValue.contains(MANIFEST_SCHEMA_URL_MARKER)
        || parameterValue.contains(DOC_STRICT_SCHEMA_URL_MARKER)
        || parameterValue.contains(DSIG_SCHEMA_URL_MARKER)) {
      try {
        url = URI.create(parameterValue.substring(parameterValue.indexOf("=")+1)).toURL();
      } catch (MalformedURLException e) {
        log.severe("No valid URL found in this Parameter!");
        return null;
      }
    }
    return url;
  }
 
  public static String getToolVersion() {
    ProcessRunner cmd = new ProcessRunner(getJingVersionCmd());
    cmd.run();
    String out = cmd.getProcessOutputAsString();
    if(!out.equalsIgnoreCase("") && out!=null) {
      String[] parts = out.split("\n");
      String version = parts[0];
      String[] lineParts = version.split(" ");
      version = lineParts[2];
      return version;
    }
    else {
      return "unknown";
    }
  }
 
  private void reset() {
    STRICT_VALIDATION = false;
    USE_USER_DOC_SCHEMA = false;
    USE_USER_DOC_STRICT_SCHEMA = false;
    USE_USER_MANIFEST_SCHEMA = false;
    USE_USER_DSIG_SCHEMA = false;
    USER_DOC_SCHEMA = null;
    USER_DOC_STRICT_SCHEMA = null;
    USER_MANIFEST_SCHEMA = null;
    USER_DSIG_SCHEMA = null;
    mimeType = null;
  }
}
TOP

Related Classes of eu.planets_project.services.validation.odfvalidator.utils.CoreOdfValidator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.