/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package org.apache.shindig.gadgets.rewrite;
import org.apache.shindig.common.uri.Uri;
import org.apache.shindig.common.util.Utf8UrlCoder;
import org.apache.shindig.common.xml.DomUtil;
import org.apache.shindig.config.ContainerConfig;
import org.apache.shindig.gadgets.Gadget;
import org.apache.shindig.gadgets.http.HttpRequest;
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.servlet.ProxyBase;
import org.apache.shindig.gadgets.spec.View;
import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLEncoder;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
/**
* Perform rewriting of HTML content including
* - Concatenating & proxying of referred script content
* - Concatenating & proxying of stylesheet links
* - Moving all style into head and converting @imports into links
* - Proxying referred content of images and embeds
*/
public class HTMLContentRewriter implements GadgetRewriter, RequestRewriter {
private final static int MAX_URL_LENGTH = 1500;
private final static String JS_MIME_TYPE = "text/javascript";
public final static Set<String> TAGS = ImmutableSet.of("img", "embed", "link", "script", "style");
private final static ImmutableMap<String, ImmutableSet<String>> LINKING_TAG_ATTRS = ImmutableMap.of(
"img", ImmutableSet.of("src"),
"embed", ImmutableSet.of("src")
);
private final ContentRewriterFeatureFactory rewriterFeatureFactory;
private final CssRequestRewriter cssRewriter;
private final ContentRewriterUris rewriterUris;
private static final String DEFAULT_CONCAT_URL_BASE = "/gadgets/concat?";
static final String CONTENT_REWRITE_KEY = "gadgets.content-rewrite";
static final String PROXY_URL_KEY = "proxy-url";
static final String CONCAT_URL_KEY = "concat-url";
private final ContainerConfig config;
@Inject
public HTMLContentRewriter(ContentRewriterFeatureFactory rewriterFeatureFactory,
ContentRewriterUris rewriterUris,
CssRequestRewriter cssRewriter, ContainerConfig config) {
this.rewriterFeatureFactory = rewriterFeatureFactory;
this.rewriterUris = rewriterUris;
this.cssRewriter = cssRewriter;
this.config = config;
}
public boolean rewrite(HttpRequest request, HttpResponse original,
MutableContent content) {
if (RewriterUtils.isHtml(request, original)) {
ContentRewriterFeature feature = rewriterFeatureFactory.get(request);
return rewriteImpl(feature, request.getGadget(), request.getUri(), content,
request.getContainer());
}
return false;
}
public void rewrite(Gadget gadget, MutableContent content) {
// Don't rewrite urls if caja is enabled since caja will inline them anyway
if (gadget.getSpec().getModulePrefs().getFeatures().containsKey("caja") ||
"1".equals(gadget.getContext().getParameter("caja"))) {
return;
}
String container = gadget.getContext().getContainer();
ContentRewriterFeature feature = rewriterFeatureFactory.get(gadget.getSpec(), container);
Uri contentBase = gadget.getSpec().getUrl();
View view = gadget.getCurrentView();
if (view != null && view.getHref() != null) {
contentBase = view.getHref();
}
rewriteImpl(feature, gadget.getSpec().getUrl(), contentBase, content,
container);
}
boolean rewriteImpl(ContentRewriterFeature feature, Uri gadgetUri,
Uri contentBase, MutableContent content, String container) {
if (!feature.isRewriteEnabled() || content.getDocument() == null) {
return false;
}
// Get ALL interesting tags
List<Element> tagList =
DomUtil.getElementsByTagNameCaseInsensitive(content.getDocument(), TAGS);
Element head = (Element)DomUtil.getFirstNamedChildNode(
content.getDocument().getDocumentElement(), "head");
boolean mutated = false;
// 1st step. Rewrite links in all embedded style tags. Convert @import statements into
// links and add them to the tag list.
// Move all style and link tags into head and concat the link tags
mutated = rewriteStyleTags(head, tagList, feature, gadgetUri, contentBase, container);
// Concat script links
mutated |= rewriteJsTags(tagList, feature, gadgetUri, contentBase, container);
// Rewrite links in images, embeds etc
mutated |= rewriteContentReferences(tagList, feature, gadgetUri, contentBase, container);
if (mutated) {
MutableContent.notifyEdit(content.getDocument());
}
return mutated;
}
protected boolean rewriteStyleTags(Element head, List<Element> elementList,
ContentRewriterFeature feature, Uri gadgetUri, Uri contentBase, String container) {
if (!feature.getIncludedTags().contains("style")) {
return false;
}
boolean mutated = false;
// Filter to just style tags
Iterable<Element> styleTags = Lists.newArrayList(Iterables.filter(elementList,
new Predicate<Element>() {
public boolean apply(Element element) {
return element.getNodeName().equalsIgnoreCase("style");
}
}));
LinkRewriter linkRewriter = createLinkRewriter(gadgetUri, feature, container);
for (Element styleTag : styleTags) {
mutated |= true;
if (styleTag.getParentNode() != head) {
styleTag.getParentNode().removeChild(styleTag);
head.appendChild(styleTag);
}
List<String> extractedUrls = cssRewriter.rewrite(
styleTag, contentBase, linkRewriter, true);
for (String extractedUrl : extractedUrls) {
// Add extracted urls as link elements to head
Element newLink = head.getOwnerDocument().createElement("link");
newLink.setAttribute("rel", "stylesheet");
newLink.setAttribute("type", "text/css");
newLink.setAttribute("href", extractedUrl);
head.appendChild(newLink);
elementList.add(newLink);
}
}
// Filter to just stylesheet link tags
List<Element> linkTags = Lists.newArrayList(Iterables.filter(elementList,
new Predicate<Element>() {
public boolean apply(Element element) {
return element.getNodeName().equalsIgnoreCase("link") &&
("stylesheet".equalsIgnoreCase(element.getAttribute("rel")) ||
element.getAttribute("type").toLowerCase().contains("css"));
}
}));
String concatBase = getConcatBase(gadgetUri.toJavaUri(), feature, "text/css", container);
concatenateTags(feature, linkTags, concatBase, contentBase, "href");
return mutated;
}
@SuppressWarnings("unchecked")
protected LinkRewriter createLinkRewriter(Uri gadgetUri, ContentRewriterFeature feature, String container) {
Object object = config.getProperty(container, CONTENT_REWRITE_KEY);
String proxyBaseNoGadget = null;
if(object instanceof JSONObject) {
try {
proxyBaseNoGadget = ((JSONObject)object).getString(PROXY_URL_KEY);
} catch (JSONException e) {
return null;
}
} else {
proxyBaseNoGadget = (String)((ImmutableMap)object).get(PROXY_URL_KEY);
}
return new ProxyingLinkRewriter(gadgetUri, feature, proxyBaseNoGadget);
}
@SuppressWarnings("unchecked")
protected String getConcatBase(URI gadgetUri, ContentRewriterFeature feature, String mimeType, String container) {
Object contentRewrite = config.getProperty(container, CONTENT_REWRITE_KEY);
String concatBaseNoGadget = DEFAULT_CONCAT_URL_BASE;
if(contentRewrite instanceof JSONObject) {
try {
concatBaseNoGadget = ((JSONObject)contentRewrite).getString(CONCAT_URL_KEY);
} catch (JSONException e) {
System.out.println("\n\n\nTungnd>>>>>>>>>>>>> go here");
e.printStackTrace();
System.out.println("\n\n\n");
}
} else {
concatBaseNoGadget = (String)((ImmutableMap)contentRewrite).get(CONCAT_URL_KEY);
}
//String concatBaseNoGadget = rewriterUris.getConcatBase(container);
return concatBaseNoGadget +
ProxyBase.REWRITE_MIME_TYPE_PARAM +
'=' + mimeType +
((gadgetUri == null) ? "" : "&gadget=" + Utf8UrlCoder.encode(gadgetUri.toString())) +
"&fp=" + feature.getFingerprint() +'&';
}
protected boolean rewriteJsTags(List<Element> elementList, ContentRewriterFeature feature,
Uri gadgetUri, Uri contentBase, String container) {
if (!feature.getIncludedTags().contains("script")) {
return false;
}
boolean mutated = false;
// Filter to just script tags
List<Element> scriptTags = Lists.newArrayList(Iterables.filter(elementList,
new Predicate<Element>() {
public boolean apply(Element node) {
if (node.getNodeName().equalsIgnoreCase("script")) {
String type = node.getAttribute("type");
return type == null || type.length() == 0 || type.equalsIgnoreCase(JS_MIME_TYPE);
}
return false;
}
}));
String concatBase = getConcatBase(gadgetUri.toJavaUri(), feature, JS_MIME_TYPE, container);
List<Element> concatenateable = Lists.newArrayList();
for (int i = 0; i < scriptTags.size(); i++) {
Element scriptTag = scriptTags.get(i);
Element nextSciptTag = null;
if (i + 1 < scriptTags.size()) {
nextSciptTag = scriptTags.get(i+1);
}
if (scriptTag.hasAttribute("src") &&
feature.shouldRewriteURL(scriptTag.getAttribute("src"))) {
mutated = true;
concatenateable.add(scriptTag);
if (nextSciptTag == null ||
!nextSciptTag.equals(getNextSiblingElement(scriptTag))) {
// Next tag is not concatenateable
concatenateTags(feature, concatenateable, concatBase, contentBase, "src");
concatenateable.clear();
}
} else {
concatenateTags(feature, concatenateable, concatBase, contentBase, "src");
concatenateable.clear();
}
}
concatenateTags(feature, concatenateable, concatBase, contentBase, "src");
return mutated;
}
protected boolean rewriteContentReferences(List<Element> elementList,
ContentRewriterFeature feature, Uri gadgetUri, Uri contentBase, String container) {
boolean mutated = false;
LinkRewriter rewriter = createLinkRewriter(gadgetUri, feature, container);
final Set<String> tagNames = Sets.intersection(LINKING_TAG_ATTRS.keySet(), feature.getIncludedTags());
// Filter to just style tags
Iterable<Element> tags = Iterables.filter(elementList, new Predicate<Element>() {
public boolean apply(Element node) {
return tagNames.contains(node.getNodeName().toLowerCase());
}
});
for (Element node : tags) {
NamedNodeMap attributes = node.getAttributes();
Set<String> rewriteable = LINKING_TAG_ATTRS.get(node.getNodeName().toLowerCase());
for (int i = 0; i < attributes.getLength(); i++) {
Node attr = attributes.item(i);
if (rewriteable.contains(attr.getNodeName().toLowerCase())) {
mutated = true;
attr.setNodeValue(rewriter.rewrite(attr.getNodeValue(), contentBase));
}
}
}
return mutated;
}
private static void concatenateTags(final ContentRewriterFeature feature,
List<Element> tags, String concatBase, Uri contentBase,
final String attr) {
// Filter out excluded URLs
tags = Lists.newArrayList(Iterables.filter(tags, new Predicate<Element>() {
public boolean apply(Element element) {
return (element.hasAttribute(attr) && feature.shouldRewriteURL(element.getAttribute(attr)));
}
}));
// Eliminate duplicates while maintaining order
LinkedHashSet<Uri> nodeRefList = Sets.newLinkedHashSet();
for (Element tag : tags) {
try {
nodeRefList.add(contentBase.resolve(Uri.parse(tag.getAttribute(attr))));
} catch (IllegalArgumentException e) {
// Same behavior as JavascriptTagMerger
// Perhaps switch to ignoring script src instead?
throw new RuntimeException(e);
}
}
List<Uri> concatented = getConcatenatedUris(concatBase, nodeRefList);
for (int i = 0; i < tags.size(); i++) {
if (i < concatented.size()) {
// Set new URLs into existing tags
tags.get(i).setAttribute(attr, concatented.get(i).toString());
} else {
// Remove remainder
tags.get(i).getParentNode().removeChild(tags.get(i));
}
}
}
private static List<Uri> getConcatenatedUris(String concatBase, LinkedHashSet<Uri> uris) {
List<Uri> concatUris = Lists.newLinkedList();
int paramIndex = 1;
StringBuilder builder = null;
int maxUriLen = MAX_URL_LENGTH + concatBase.length();
try {
int uriIx = 0, lastUriIx = (uris.size() - 1);
//
for (Uri uri : uris) {
if (paramIndex == 1) {
builder = new StringBuilder(concatBase);
} else {
builder.append('&');
}
builder.append(paramIndex).append('=')
.append(URLEncoder.encode(uri.toString(), "UTF-8"));
if (builder.length() > maxUriLen ||
uriIx == lastUriIx) {
// Went over URI length warning limit or on the last uri
concatUris.add(Uri.parse(builder.toString()));
builder = null;
paramIndex = 0;
}
++paramIndex;
++uriIx;
}
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
return concatUris;
}
private Element getNextSiblingElement(Element elem) {
Node n = elem;
n = n.getNextSibling();
while (n != null && n.getNodeType() != Node.ELEMENT_NODE) {
n = n.getNextSibling();
}
return (Element)n;
}
}