/**
* License Agreement.
*
* Rich Faces - Natural Ajax for Java Server Faces (JSF)
*
* Copyright (C) 2007 Exadel, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.ajax4jsf.webapp.nekko;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.Iterator;
import java.util.Properties;
import java.util.Set;
import javax.servlet.ServletException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.ajax4jsf.application.AjaxViewHandler;
import org.ajax4jsf.webapp.HtmlParser;
import org.ajax4jsf.xml.serializer.Method;
import org.ajax4jsf.xml.serializer.OutputPropertiesFactory;
import org.ajax4jsf.xml.serializer.Serializer;
import org.ajax4jsf.xml.serializer.SerializerFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.html.dom.HTMLDocumentImpl;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.HTMLAugmentations;
import org.cyberneko.html.HTMLConfiguration;
import org.cyberneko.html.filters.DefaultFilter;
import org.cyberneko.html.filters.ElementRemover;
import org.cyberneko.html.filters.Purifier;
import org.cyberneko.html.filters.Writer;
import org.cyberneko.html.parsers.DOMFragmentParser;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* @author shura
*
*/
public class NekkoParser implements HtmlParser {
private static final Log _log = LogFactory.getLog(NekkoParser.class);
private HtmlSAXParser _parser;
// private HtmlWriter _writer= new HtmlWriter();
private ElementRemover remover = new ElementRemover();
private XMLDocumentFilter[] _filters = {
new ViewStateFilter()/* ,remover */, new HtmlCorrectionFilter(),
new Purifier() /* , _writer */};
private DOMFragmentParser viewStateParser;// = new DOMFragmentParser();
private Document viewStateDocument;// = new HTMLDocumentImpl();
private DocumentFragment fragment = null;
private Set _scripts;
private Set _styles;
private String _viewState;
private String _encoding;
private Serializer _serializer;
private String _publicId = "-//W3C//DTD XHTML 1.0 Transitional//EN";
private String _systemid = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
private String _namespace = "http://www.w3.org/1999/xhtml";
private String _outputEncoding;
/**
*
*/
public NekkoParser() {
}
/**
*
*/
public void init() {
_parser = new HtmlSAXParser(getHtmlConfig());
Properties properties = OutputPropertiesFactory
.getDefaultMethodProperties(Method.XHTML);
// Properties properties =
// OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
// properties.put("encoding",_encoding);
_serializer = SerializerFactory.getSerializer(properties);
// serializer.setOutputStream(output);
// _parser.setContentHandler(serializer.asContentHandler());
viewStateParser = new DOMFragmentParser();
// Set parser features
try {
viewStateParser
.setProperty(
"http://cyberneko.org/html/properties/names/elems",
"lower");
viewStateParser
.setProperty(
"http://cyberneko.org/html/properties/names/attrs",
"lower");
} catch (SAXException e) {
_log.error("Exception in DOM parser configuration", e);
}
try {
// Create Document Builder Factory
DocumentBuilderFactory docFactory = DocumentBuilderFactory
.newInstance();
// Create Document Builder
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
viewStateDocument = docBuilder.newDocument();
} catch (ParserConfigurationException e) {
viewStateDocument = new HTMLDocumentImpl();
_log
.error(
"Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
e);
}
// viewStateDocument = new HTMLDocumentImpl();
remover.removeElement("style");
}
/**
* Reset parser state
*/
public void reset() {
_scripts = null;
_styles = null;
_viewState = null;
_parser.reset();
_serializer.reset();
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#parseHtml(java.io.InputStream,
* java.io.OutputStream)
*/
public void parseHtml(InputStream input, java.io.Writer output)
throws IOException {
InputSource src = new InputSource(input);
parseSAXSource(src, output);
}
private void parseSAXSource(InputSource src, java.io.Writer output)
throws IOException {
// PrintWriter printWriter = null;
fragment = null;
if (null != _viewState) {
fragment = viewStateDocument.createDocumentFragment();
try {
viewStateParser.parse(new InputSource(new StringReader(
_viewState)), fragment);
} catch (Exception e) {
fragment = null;
}
// TODO - parse view state to DOM Fragment.
}
try {
if (null != _encoding) {
_parser
.setProperty(
"http://cyberneko.org/html/properties/default-encoding",
_encoding);
}
Properties properties = _serializer.getOutputFormat();
if (null != _outputEncoding) {
properties.put("encoding", _outputEncoding);
}
_serializer.setOutputFormat(properties);
_serializer.setWriter(output);
// _serializer.setOutputStream(new OutputStream(){
//
// public void write(int b) throws IOException {
// // TODO Auto-generated method stub
//
// }});
_parser.setContentHandler(_serializer.asContentHandler());
// printWriter = new PrintWriter(output);
// _writer.setWriter(printWriter);
// _writer.setEncoding(_encoding);
_parser.parse(src);
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
// _writer.setWriter(null);
// if(null != printWriter){
// printWriter.flush();
// printWriter.close();
// }
}
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#parseHtml(java.io.Reader,
* java.io.OutputStream)
*/
public void parseHtml(Reader input, java.io.Writer output)
throws IOException {
InputSource src = new InputSource(input);
parseSAXSource(src, output);
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setEncoding(java.lang.String)
*/
public void setInputEncoding(String encoding) {
_encoding = encoding;
}
public void setOutputEncoding(String encoding) {
_outputEncoding = encoding;
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setMoveElements(boolean)
*/
public void setMoveElements(boolean move) {
// TODO Auto-generated method stub
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setScripts(java.util.Set)
*/
public void setScripts(Set scripts) {
_scripts = scripts;
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setStyles(java.util.Set)
*/
public void setStyles(Set styles) {
_styles = styles;
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setDoctype(java.lang.String)
*/
public void setDoctype(String doctype) {
this._publicId = doctype;
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setViewState(java.lang.String)
*/
public void setViewState(String viewState) {
_viewState = viewState;
}
private static class HtmlSAXParser extends AbstractSAXParser {
/**
* Default constructor.
*
* @throws ServletException
*/
public HtmlSAXParser(HTMLConfiguration config) {
super(config);
}
}
private class ViewStateFilter extends DefaultFilter {
private boolean haveHtml = false;
private boolean haveHead = false;
private boolean headParsed = false;
private int stateMarkerLevel = -1;
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#reset(org.apache.xerces.xni.parser.XMLComponentManager)
*/
public void reset(XMLComponentManager componentManager)
throws XMLConfigurationException {
haveHead = false;
haveHtml = false;
headParsed = false;
stateMarkerLevel = -1;
super.reset(componentManager);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#startElement(org.apache.xerces.xni.QName,
* org.apache.xerces.xni.XMLAttributes,
* org.apache.xerces.xni.Augmentations)
*/
public void startElement(QName element, XMLAttributes attributes,
Augmentations augs) throws XNIException {
if (stateMarkerLevel >= 0) {
stateMarkerLevel++;
if (null != fragment) {
return;
}
}
if (!headParsed) {
if ("html".equalsIgnoreCase(element.rawname)) {
haveHtml = true;
} else if ("head".equalsIgnoreCase(element.rawname)) {
haveHead = true;
super.startElement(element, attributes, augs);
insertResources();
return;
} else {
if (!haveHtml) {
insertStartElement("html");
}
insertStartElement("head");
insertResources();
insertEndElement("head");
}
}
if (isStateMarker(element, attributes)) {
stateMarkerLevel = 0;
return;
}
;
super.startElement(element, attributes, augs);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#characters(org.apache.xerces.xni.XMLString,
* org.apache.xerces.xni.Augmentations)
*/
public void characters(XMLString text, Augmentations augs)
throws XNIException {
if (stateMarkerLevel >= 0) {
if (null != fragment) {
return;
}
}
super.characters(text, augs);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#endElement(org.apache.xerces.xni.QName,
* org.apache.xerces.xni.Augmentations)
*/
public void endElement(QName element, Augmentations augs)
throws XNIException {
if (stateMarkerLevel >= 0) {
stateMarkerLevel--;
if (null != fragment || stateMarkerLevel == -1) {
return;
}
}
super.endElement(element, augs);
}
private void insertResources() {
headParsed = true;
if (null != _styles) {
for (Iterator iter = _styles.iterator(); iter.hasNext();) {
String style = (String) iter.next();
QName element = new QName(null, "link", "link", null);
XMLAttributes attrs = new XMLAttributesImpl();
attrs.addAttribute(new QName(null, "href", "href", null),
"CDATA", style);
attrs.addAttribute(new QName(null, "type", "type", null),
"CDATA", "text/css");
attrs.addAttribute(new QName(null, "rel", "rel", null),
"CDATA", "stylesheet");
Augmentations augs = new HTMLAugmentations();
super.emptyElement(element, attrs, augs);
}
}
if (null != _scripts) {
for (Iterator iter = _scripts.iterator(); iter.hasNext();) {
String script = (String) iter.next();
QName element = new QName(null, "script", "script", null);
XMLAttributes attrs = new XMLAttributesImpl();
attrs.addAttribute(new QName(null, "src", "src", null),
"CDATA", script);
attrs.addAttribute(new QName(null, "type", "type", null),
"CDATA", "text/javascript");
Augmentations augs = new HTMLAugmentations();
super.startElement(element, attrs, augs);
super.endElement(element, augs);
}
}
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#emptyElement(org.apache.xerces.xni.QName,
* org.apache.xerces.xni.XMLAttributes,
* org.apache.xerces.xni.Augmentations)
*/
public void emptyElement(QName name, XMLAttributes attributes,
Augmentations augmentation) throws XNIException {
if (stateMarkerLevel >= 0) {
if (null != fragment) {
return;
}
}
if (!headParsed) {
if ("head".equalsIgnoreCase(name.rawname)) {
haveHead = true;
super.startElement(name, attributes, augmentation);
insertResources();
insertEndElement(name.rawname);
return;
}
}
if (isStateMarker(name, attributes)) {
return;
}
;
super.emptyElement(name, attributes, augmentation);
}
/**
* @param name
* @param attributes
*/
private boolean isStateMarker(QName name, XMLAttributes attributes) {
if (name.rawname.equalsIgnoreCase("span")
&& AjaxViewHandler.STATE_MARKER_KEY.equals(attributes
.getValue("id"))) {
// STATE marker element - out real content.
if (null != fragment) {
try {
_serializer.asDOMSerializer().serialize(fragment);
} catch (IOException e) {
// Break output.
}
}
return true;
}
return false;
}
void insertStartElement(String name) {
QName element = new QName(null, name, name, null);
XMLAttributes attrs = new XMLAttributesImpl();
Augmentations augs = new HTMLAugmentations();
super.startElement(element, attrs, augs);
}
void insertEndElement(String name) {
QName element = new QName(null, name, name, null);
// XMLAttributes attrs = new XMLAttributesImpl();
Augmentations augs = new HTMLAugmentations();
super.endElement(element, augs);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#endDocument(org.apache.xerces.xni.Augmentations)
*/
public void endDocument(Augmentations augs) throws XNIException {
if (!haveHtml) {
insertEndElement("html");
}
super.endDocument(augs);
}
}
private static class HtmlWriter extends Writer {
/**
*
*/
public HtmlWriter() {
super();
fEncoding = "UTF-8";
}
public void setEncoding(String encoding) {
this.fEncoding = encoding;
}
public void setWriter(PrintWriter writer) {
this.fPrinter = writer;
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.Writer#emptyElement(org.apache.xerces.xni.QName,
* org.apache.xerces.xni.XMLAttributes,
* org.apache.xerces.xni.Augmentations)
*/
public void emptyElement(QName element, XMLAttributes attributes,
Augmentations augs) throws XNIException {
// TODO Auto-generated method stub
super.emptyElement(element, attributes, augs);
printEndElement(element);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#endCDATA(org.apache.xerces.xni.Augmentations)
*/
public void endCDATA(Augmentations augs) throws XNIException {
// TODO Auto-generated method stub
super.endCDATA(augs);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#startCDATA(org.apache.xerces.xni.Augmentations)
*/
public void startCDATA(Augmentations augs) throws XNIException {
// TODO Auto-generated method stub
super.startCDATA(augs);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#textDecl(java.lang.String,
* java.lang.String, org.apache.xerces.xni.Augmentations)
*/
public void textDecl(String version, String encoding, Augmentations augs)
throws XNIException {
// TODO Auto-generated method stub
super.textDecl(version, encoding, augs);
}
/*
* (non-Javadoc)
*
* @see org.cyberneko.html.filters.DefaultFilter#xmlDecl(java.lang.String,
* java.lang.String, java.lang.String,
* org.apache.xerces.xni.Augmentations)
*/
public void xmlDecl(String version, String encoding, String standalone,
Augmentations augs) throws XNIException {
// TODO Auto-generated method stub
super.xmlDecl(version, encoding, standalone, augs);
}
}
/**
* Factory method for create and configure HTML parser configuration. Create
* configuration for use in parsing, set nessesary features and properties
*
* @return
* @throws ServletException
*/
protected HTMLConfiguration getHtmlConfig() {
HTMLConfiguration _config = new HTMLConfiguration();
try {
if (this.getPublicid() != null || this.getSystemid() != null) {
_config.setFeature(
"http://cyberneko.org/html/features/insert-doctype",
true);
_config.setFeature(
"http://cyberneko.org/html/features/override-doctype",
true);
}
if (this.getPublicid() != null) {
_config.setProperty(
"http://cyberneko.org/html/properties/doctype/pubid",
getPublicid());
}
if (this.getSystemid() != null) {
_config.setProperty(
"http://cyberneko.org/html/properties/doctype/sysid",
getSystemid());
}
if (this.getNamespace() != null) {
_config.setFeature("http://xml.org/sax/features/namespaces",
true);
_config
.setFeature(
"http://cyberneko.org/html/features/override-namespaces",
true);
_config.setFeature(
"http://cyberneko.org/html/features/insert-namespaces",
true);
_config.setProperty(
"http://cyberneko.org/html/properties/namespaces-uri",
getNamespace());
}
// config
// .setFeature(
// "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
// true);
_config
.setFeature(
"http://cyberneko.org/html/features/scanner/cdata-sections",
true);
_config
.setFeature(
"http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
true);
_config
.setFeature(
"http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
true);
_config.setFeature(
"http://cyberneko.org/html/features/insert-doctype", true);
_config.setFeature(
"http://cyberneko.org/html/features/insert-namespaces",
true);
//
// Set properties
// http://cyberneko.org/html/features/insert-namespaces
// _config
// .setProperty(
// "http://cyberneko.org/html/properties/default-encoding",
// encoding);
_config
.setProperty(
"http://cyberneko.org/html/properties/names/elems",
"lower");
_config
.setProperty(
"http://cyberneko.org/html/properties/names/attrs",
"lower");
_config.setProperty("http://cyberneko.org/html/properties/filters",
_filters);
} catch (XMLConfigurationException e) {
// throw new ServletException("error set Neko feature ", e);
}
return _config;
}
private String getNamespace() {
// TODO Auto-generated method stub
return this._namespace;
}
private String getSystemid() {
// TODO Auto-generated method stub
return this._systemid;
}
private String getPublicid() {
// TODO Auto-generated method stub
return this._publicId;
}
/**
* @param namespace
* The namespace to set.
*/
public void setNamespace(String namespace) {
_namespace = namespace;
}
/**
* @param publicId
* The publicId to set.
*/
public void setPublicId(String publicId) {
_publicId = publicId;
}
/**
* @param systemid
* The systemid to set.
*/
public void setSystemid(String systemid) {
_systemid = systemid;
}
public boolean setMime(String mimeType) {
return false;
}
}