/**
* see
* https://github.com/WolfgangFahl/w3cValidator/blob/master/LICENSE
*/
package com.bitplan.w3ccheck;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.ws.rs.core.MediaType;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import org.apache.commons.io.IOUtils;
import org.eclipse.persistence.oxm.annotations.XmlCDATA;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.multipart.FormDataBodyPart;
import com.sun.jersey.multipart.FormDataMultiPart;
/**
* Wrapper for W3C Validator Soap responses
* see <a href="http://validator.w3.org/docs/api.html#requestformat">W3C Markup Validation Service Request Format</a>
* @author wf
*
* the JaxB annotations of this Wrapper are designed to be able to unmarshal a W3C Markup Validation Service
* Soap 1.2 response to this Java Object
*
* on the class level the root node of the SOAP message is covered:
* <pre>
* {@code
* <env:Envelope xmlns:env="http://www.w3.org/2003/05/soap-envelope">
* ...
* </env:Envelope>
* }
* </pre>
* According to <a href="http://validator.w3.org/docs/api.html#requestformat">W3C Markup Validation Service Request Format documentation</a>:
* A SOAP response for the validation of a document (invalid) will look like this:
* <pre>
* {@code
* <?xml version="1.0" encoding="UTF-8"?>
* <env:Envelope xmlns:env="http://www.w3.org/2003/05/soap-envelope">
* <env:Body>
* <m:markupvalidationresponse
* env:encodingStyle="http://www.w3.org/2003/05/soap-encoding"
* xmlns:m="http://www.w3.org/2005/10/markup-validator">
* <m:uri>http://qa-dev.w3.org/wmvs/HEAD/dev/tests/xhtml1-bogus-element.html</m:uri>
* <m:checkedby>http://validator.w3.org/</m:checkedby>
* <m:doctype>-//W3C//DTD XHTML 1.0 Transitional//EN</m:doctype>
* <m:charset>utf-8</m:charset>
* <m:validity>false</m:validity>
* <m:errors>
* <m:errorcount>1</m:errorcount>
* <m:errorlist>
*
* <m:error>
* <m:line>13</m:line>
* <m:col>6</m:col>
* <m:source><![CDATA[
* <foo<strong title="Position where error was detected.">></strong>This phrase is enclosed in a bogus FOO element.</foo>
* ]]>
* </m:source>
* <m:explanation>
* <![CDATA[
* <p> ... </p<p>
* ]]>
* </m:explanation>
* <m:messageid>76</m:messageid>
* <m:message>element "foo" undefined</m:message>
* </m:error>
* </m:errorlist>
* </m:errors>
* <m:warnings>
* <m:warningcount>0</m:warningcount>
* <m:warninglist>
* ...
* </m:warninglist>
* </m:warnings>
* </m:markupvalidationresponse>
* </env:Body>
* </env:Envelope>
* }
* </pre>
* the structure of this W3CValidator class is aligned to this format
*/
@XmlRootElement(name = "Envelope", namespace = "http://www.w3.org/2003/05/soap-envelope")
// no getters/setters are used - all fields are initialized to make this safe
// setting should only be done via the check function
@XmlAccessorType(XmlAccessType.FIELD)
public class W3CValidator {
/**
* set to true if Logging should be enabled
*/
public static boolean debug=false;
/**
* Logging may be enabled by setting debug to true
*/
protected static java.util.logging.Logger LOGGER = java.util.logging.Logger
.getLogger("com.bitplan.w3ccheck");
/**
* the URL of the official W3C Markup Validation service
* if you'd like to run the tests against your own installation you might want to modify this
*/
public static String url="http://validator.w3.org/check";
/**
* create a W3CValidator result for the default url http://validator.w3.org/check with the given html
*
* @param html - the html code to be checked
* @return - a W3CValidator response according to the SOAP response format or null if the
* http response status of the Validation service is other than 200
* explained at response http://validator.w3.org/docs/api.html#requestformat
* @throws JAXBException if there is something wrong with the response message so that it
* can not be unmarshalled
*/
public static W3CValidator check(String html) throws JAXBException {
W3CValidator result=check(url,html);
return result;
}
/**
* create a W3CValidator result for the given url with the given html
*
* @param url - the url of the validator e.g. "http://validator.w3.org/check"
* @param html - the html code to be checked
* @return - a W3CValidator response according to the SOAP response format or null if the
* http response status of the Validation service is other than 200
* explained at response http://validator.w3.org/docs/api.html#requestformat
* @throws JAXBException if there is something wrong with the response message so that it
* can not be unmarshalled
*/
public static W3CValidator check(String url, String html)
throws JAXBException {
// initialize the return value
W3CValidator result = null;
// create a WebResource to access the given url
WebResource resource = Client.create().resource(url);
// prepare form data for posting
FormDataMultiPart form = new FormDataMultiPart();
// set the output format to soap12
// triggers the various outputs formats of the validator. If unset, the usual Web format will be sent.
// If set to soap12,
// the SOAP1.2 interface will be triggered. See the SOAP 1.2 response format description at
// http://validator.w3.org/docs/api.html#requestformat
form.field("output", "soap12");
// make sure Unicode 0x0 chars are removed from html (if any)
// see https://github.com/WolfgangFahl/w3cValidator/issues/1
Pattern pattern = Pattern.compile("[\\000]*");
Matcher matcher = pattern.matcher(html);
if (matcher.find()) {
html = matcher.replaceAll("");
}
// The document to validate, POSTed as multipart/form-data
FormDataBodyPart fdp = new FormDataBodyPart("uploaded_file",
IOUtils.toInputStream(html),
// new FileInputStream(tmpHtml),
MediaType.APPLICATION_OCTET_STREAM_TYPE);
// attach the inputstream as upload info to the form
form.bodyPart(fdp);
// now post the form via the Internet/Intranet
ClientResponse response = resource.type(MediaType.MULTIPART_FORM_DATA)
.post(ClientResponse.class, form);
// in debug mode show the response status
if (debug)
LOGGER.log(Level.INFO, "response status for '"+url+"'=" + response.getStatus());
// if the http Status is ok
if (response.getStatus() == 200) {
// get the XML encoded SOAP 1.2 response format
String responseXml = response.getEntity(String.class);
// in debug mode show the full xml
if (debug)
LOGGER.log(Level.INFO,responseXml);
// unmarshal the xml message to the format to a W3CValidator Java object
JAXBContext context = JAXBContext.newInstance(W3CValidator.class);
Unmarshaller u = context.createUnmarshaller();
StringReader xmlReader = new StringReader(responseXml);
// this step will convert from xml text to Java Object
result = (W3CValidator) u.unmarshal(xmlReader);
}
// return the result which might be null if the response status was other than 200
return result;
} // check
/**
* field that holds the structure for the Body node of the message
* <pre>
* {@code
* <env:Body>
* ...
* </env:Body>
* }
* </pre>
*/
@XmlElement(name = "Body")
// initialize Body
public Body body = new Body();
/**
* structure for the Body node of the message
* <pre>
* {@code
* <env:Body>
* ...
* </env:Body>
* }
* </pre>
*/
@XmlRootElement(name = "Body", namespace = "http://www.w3.org/2003/05/soap-envelope")
@XmlAccessorType(XmlAccessType.FIELD)
public static class Body {
@XmlElement(name = "markupvalidationresponse", namespace = "http://www.w3.org/2005/10/markup-validator")
public ValidationResponse response = new ValidationResponse();
/**
* The main element of the validation response. Encloses all other information about the validation results.
* @author wf
* <pre>
* {@code
* <m:markupvalidationresponse env:encodingStyle="http://www.w3.org/2003/05/soap-encoding" xmlns:m="http://www.w3.org/2005/10/markup-validator">
* ...
* </m:markupvalidationresponse>
* }
* </pre>
*/
@XmlAccessorType(XmlAccessType.FIELD)
public static class ValidationResponse {
/**
* the address of the document validated. Will (likely?) be upload://Form Submission if an uploaded document or fragment was validated. In EARL terms, this is the TestSubject.
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String uri;
/**
* Location of the service which provided the validation result. In EARL terms, this is the Assertor
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String checkedby;
/**
* Detected (or forced) Document Type for the validated document
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String doctype;
/**
* Detected (or forced) Character Encoding for the validated document
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String charset;
/**
* Whether or not the document validated passed or not formal validation (true|false boolean)
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public boolean validity;
/**
* Encapsulates all data about errors encountered through the validation process
* <pre>
* {@code
* <m:errors>
* ...
* </m:errors>
* }
* </pre>
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public Errors errors = new Errors();
/**
* wrapped list of validation errors structurally equivalent to Warnings
*/
public static class Errors {
/**
* a child of errors, counts the number of errors listed
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public int errorcount;
/**
* a child of errors, contains the list of errors (surprise!)
*/
@XmlElementWrapper(name = "errorlist", namespace = "http://www.w3.org/2005/10/markup-validator")
@XmlElement(name = "error", namespace = "http://www.w3.org/2005/10/markup-validator")
public List<ValidationError> errorlist = new ArrayList<ValidationError>();
/**
* a child of errorlist, contains the information on a single validation error.
*/
@XmlRootElement(name = "error", namespace = "http://www.w3.org/2005/10/markup-validator")
public static class ValidationError extends ValidationAtom {
} // Error
} // Errors
/**
* Encapsulates all data about warnings encountered through the validation process
* <pre>
* {@code
* <m:warnings>
* ...
* </m:warnings>
* }
* </pre>
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public Warnings warnings = new Warnings();
/**
* wrapped list of validation warnings structurally equivalent to Errors
*/
public static class Warnings {
/**
* a child of warnings, counts the number of warnings listed
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public int warningcount;
/**
* a child of warnings, contains the list of warnings (surprise!)
*/
@XmlElementWrapper(name = "warninglist", namespace = "http://www.w3.org/2005/10/markup-validator")
@XmlElement(name = "warning", namespace = "http://www.w3.org/2005/10/markup-validator")
public List<ValidationWarning> warninglist = new ArrayList<ValidationWarning>();
/**
* a child of warninglist, contains the information on a single validation warning.
*/
@XmlRootElement(name = "warning", namespace = "http://www.w3.org/2005/10/markup-validator")
public static class ValidationWarning extends ValidationAtom {
} // ValidationWarning
} // Warnings
/**
* base class for ValidationError and ValidationWarning
* containing e.g. line, col and message
*
*/
public static class ValidationAtom {
/**
* Within the source code of the validated document, refers to the line where
* the error was detected.
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public int line;
/**
* Within the source code of the validated document, refers to the column of the
* line where the error was detected.
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public int col;
/**
* Snippet of the source where the error was found. Given as HTML fragment within CDATA block.
*/
@XmlCDATA
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String source;
/**
* Explanation for the error. Given as HTML fragment within CDATA block.
*/
@XmlCDATA
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String explanation;
/**
* The number/identifier of the error, as addressed internally by the validator
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public int messageid;
/**
* The actual error message
*/
@XmlElement(namespace = "http://www.w3.org/2005/10/markup-validator")
public String message;
/**
* convert this W3CValidator to a human readable string
*/
public String toString() {
String kind=this.getClass().getSimpleName();
String result=kind + " line " + line + " col " + col + ":'" + message + "'";
return result;
}
} // ValidationAtom
} // ValidationResponse
} // Body
} // W3CValidator