Package com.canoo.webtest.steps.verify

Source Code of com.canoo.webtest.steps.verify.VerifyLinks

// Copyright � 2002-2007 Canoo Engineering AG, Switzerland.
package com.canoo.webtest.steps.verify;

import com.canoo.webtest.boundary.HtmlUnitBoundary;
import com.canoo.webtest.engine.Context;
import com.canoo.webtest.engine.StepFailedException;
import com.canoo.webtest.engine.RegExStringVerifier;
import com.canoo.webtest.interfaces.IVerificationStep;
import com.canoo.webtest.steps.Step;
import com.canoo.webtest.util.ConversionUtil;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.xml.sax.SAXException;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
* @author Dierk Koenig, Urs-Peter Häss
* @author Marc Guillemot, Paul King, Brian Hubbard
* @webtest.step category="Core"
* name="verifyLinks"
* alias="verifylinks"
* description="This step checks the validity of all links on the current page. Non-<key>HTML</key> pages (CSS, <key>javascript</key>, <key>XML</key> files) are not checked for internal links. Non-<key>HTTP</key> links (mail addresses, ftp etc.) are not checked or followed."
*/
public class VerifyLinks extends Step implements IVerificationStep
{
    private static final Logger LOG = Logger.getLogger(VerifyLinks.class);
    private String fBaseHost;
    private int fMaxDepth;
    private String fMaxDepthStr;
    private int fCurrentDepth;
    private boolean fOnsiteonly;
    private String fExcludes;
    private String fIncludes;
    private final Set fFailedVisits = new HashSet();
    private final Set fVisitedUrls = new HashSet();
    private int fValidLinks;
    private boolean fIgnoreForeignJSErrors;

    protected Set getFailedVisits() {
        return fFailedVisits;
    }

    public String getDepth() {
        return fMaxDepthStr;
    }

    /**
     * @webtest.parameter required="no"
     * default="0"
     * description="The <em>depth</em> parameter defines the depth of the recursive search for broken links on sub-pages."
     */
    public void setDepth(String depth) {
        fMaxDepthStr = depth;
    }

    /**
     * @webtest.parameter required="no"
     * default="&lt;empty&gt;"
     * description="If <em>excludes</em> is set then each link found is compared to the defined string (via regexp), if it matches then the link is not followed."
     */
    public void setExcludes(String regex) {
        fExcludes = regex;
    }

    public String getExcludes() {
        return fExcludes;
    }

    /**
     * @webtest.parameter required="no"
     * default="&lt;all&gt;"
     * description="If <em>includes</em> is set then each link found is compared to the defined string (via regexp), if it matches then the link is processed, others are ignored."
     */
    public void setIncludes(String regex) {
        fIncludes = regex;
    }

    public String getIncludes() {
        return fIncludes;
    }

    /**
     * @webtest.parameter required="no"
     * default="false"
     * description="If <em>onsiteonly</em> is set to <em>true</em>, the recursive search for invalid links is limited to the local host.
     * Only the initial link to a foreign host is checked, but no deeper search is performed."
     */
    public void setOnsiteonly(final boolean onsiteonly) {
        fOnsiteonly = onsiteonly;
    }
   
    /**
     *
     * @webtest.parameter required="no"
     * default="false"
     * description="Indicates if JavaScript errors should be ignored on visited pages from a different host
     * than the current page."
     */
    public void setIgnoreForeignJSErrors(final boolean b)
    {
      fIgnoreForeignJSErrors = b;
    }

    public void doExecute() throws SAXException, MalformedURLException {
        verifyProperties();
        nullResponseCheck();
        final Context context = getContext();
        final HtmlPage htmlPage = context.getCurrentHtmlResponse(this);
        LOG.info("Examining page with title=" + htmlPage.getTitleText());
        if (!StringUtils.isEmpty(getIncludes())) {
            LOG.info("Only including links which match '" + getIncludes() + "'");
        }
        if (!StringUtils.isEmpty(getExcludes())) {
            LOG.info("Excluding links which match '" + getExcludes() + "'");
        }
        fBaseHost = htmlPage.getWebResponse().getRequestUrl().getHost();
        final WebClient client = context.getWebClient();
        checkVisits(client, htmlPage);
        if (!fFailedVisits.isEmpty()) {
            throw new StepFailedException(fFailedVisits.size() + " broken link(s): " + brokenLinksToString(), this);
        }
    }

  protected void addComputedParameters(final Map map)
    {
      map.put("-> valid links", String.valueOf(fValidLinks));
    }

    protected void checkVisits(final WebClient webClient, final HtmlPage response) {
        final Set urls = getGoodLinks(response);
        final RegExStringVerifier verifier = new RegExStringVerifier();
        for (final Iterator iter = urls.iterator(); iter.hasNext();) {
            final URL url = (URL) iter.next();
            if (fVisitedUrls.contains(url)) {
                LOG.debug("Skipped already visited: " + url);
                fValidLinks++;
                continue;
            }
            if (!StringUtils.isEmpty(getIncludes()) && (!verifier.verifyStrings(getIncludes(), url.toString()))) {
                LOG.info("Skipped link as it doesn't match the includes list: " + url);
                continue;
            }
            if (!StringUtils.isEmpty(getExcludes()) && (verifier.verifyStrings(getExcludes(), url.toString()))) {
                LOG.info("Skipped link as matched the excludes list: " + url);
                continue;
            }
            visit(response, url, webClient);
        }
    }

    protected void visit(final HtmlPage referingPage, final URL url, final WebClient webClient) {
      final boolean ignoreJSErrorsOriginal = webClient.isThrowExceptionOnScriptError();
      if (fIgnoreForeignJSErrors && isForeignHost(url))
      {
        LOG.info("Ignore JS errors (if any) for " + url);
        webClient.setThrowExceptionOnScriptError(false);
      }
        final Page response = HtmlUnitBoundary.tryGetPageNoFail(url, webClient);
        webClient.setThrowExceptionOnScriptError(ignoreJSErrorsOriginal);

    fVisitedUrls.add(url);
        if (response == null) {
            fFailedVisits.add(new ZFailedLink(url, referingPage.getWebResponse().getRequestUrl()));
        }
        else {
            fValidLinks++;

          if (response instanceof HtmlPage) {
                followRecursively((HtmlPage) response, webClient);
          }
        }
    }

    protected void followRecursively(final HtmlPage htmlPage, final WebClient webClient) {
        LOG.debug("fMaxDepth = " + fMaxDepth);
        if (fCurrentDepth < fMaxDepth && !stopHunting(htmlPage)) {
            ++fCurrentDepth;
            checkVisits(webClient, htmlPage);
            --fCurrentDepth;
        }
    }

    protected String brokenLinksToString() {
        StringBuffer sb = new StringBuffer();
        for (Iterator iter = fFailedVisits.iterator(); iter.hasNext();) {
            ZFailedLink failedLink = (ZFailedLink) iter.next();
            sb.append(failedLink.getFailedUrl()).append(" on ").append(failedLink.getReferingUrl()).append("; ");
        }
        return sb.toString();
    }

    static int getLinkCount(final HtmlPage response) {
        return getGoodLinks(response).size();
    }

    /**
     * Gets all HTTP links in the response
     *
     * @param response
     * @return a set of {@link URL}
     */
    static Set getGoodLinks(final HtmlPage response) {
        LOG.info("Looking for links in " + response);
        final Set urls = new HashSet();

        for (final Iterator iter = response.getAnchors().iterator(); iter.hasNext();) {
            processLink(response, (HtmlAnchor) iter.next(), urls);
        }

        LOG.info(urls.size() + " different links found in page " + response.getWebResponse().getRequestUrl());
        return urls;
    }

    private static void processLink(final HtmlPage response, final HtmlAnchor link, final Set urls) {
        try {
            final URL url = response.getFullyQualifiedUrl(link.getHrefAttribute());
            final String protocol = url.getProtocol();
            if ("http".equals(protocol) || "https".equals(protocol)) {
                LOG.info("Adding url to check: " + url);
                urls.add(url);
            }
            else {
                LOG.info("Skipped link due to protocol: " + url);
            }
        }
        catch (final MalformedURLException e) {
            LOG.info("Skipped link due to bad url: " + link.getHrefAttribute());
        }
    }

    protected boolean stopHunting(final HtmlPage htmlPage) {
        return fOnsiteonly && isForeignHost(htmlPage.getWebResponse().getRequestUrl());
    }

    protected boolean isForeignHost(final URL url) {
        return !fBaseHost.equals(url.getHost());
    }

    protected void verifyProperties() {
        fMaxDepth = ConversionUtil.convertToInt(getDepth(), 0);
        optionalIntegerParamCheck(getDepth(), "depth", true);
    }
}

/**
* Utility data holder
*/
class ZFailedLink
{
    private URL fFailedUrl;
    private URL fReferingUrl;

    ZFailedLink(final URL failedUrl, final URL referingUrl) {
        fFailedUrl = failedUrl;
        fReferingUrl = referingUrl;
    }

    public URL getFailedUrl() {
        return fFailedUrl;
    }

    public URL getReferingUrl() {
        return fReferingUrl;
    }
}
TOP

Related Classes of com.canoo.webtest.steps.verify.VerifyLinks

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.