Source Code of org.xwiki.test.misc.PDFTest

/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.xwiki.test.misc;


import java.awt.geom.Rectangle2D;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


import junit.framework.TestCase;


import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
import org.apache.pdfbox.pdmodel.interactive.action.type.PDAction;
import org.apache.pdfbox.pdmodel.interactive.action.type.PDActionGoTo;
import org.apache.pdfbox.pdmodel.interactive.action.type.PDActionURI;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageXYZDestination;
import org.apache.pdfbox.util.PDFText2HTML;
import org.apache.pdfbox.util.PDFTextStripperByArea;


public class PDFTest extends TestCase
{
    /**
     * Verify that the PDF export feature works on a single simple page by downloading the PDF and parsing it using
     * PDFBox.
     * 
     * @see "XWIKI-7048: PDF export templates can display properties of other objects if the XWiki.PDFClass object is
     *      missing"
     */
    public void testExportSingleSimplePageAsPDF() throws Exception
    {
        // We're using Dashboard.WebHome page because it has objects of type XWiki.GadgetClass and they have a title
        // property which was mistaken with the title property of XWiki.PDFClass before XWIKI-7048 was fixed. The gadget
        // title contains Velocity code that isn't wrapped in a Velocity macro so it is printed as is if not rendered in
        // the right context.
        String text = getPDFContent(new URL("http://localhost:8080/xwiki/bin/export/Dashboard/WebHome?format=pdf"));
        assertTrue("Invalid content", text.contains("Welcome to your wiki"));
        assertFalse("Invalid content", text.contains("$services.localization.render("));
    }


    /**
     * Verify that we can export content having links to attachments.
     * 
     * @see "XWIKI-8978: PDF Export does not handle XWiki links to attached files properly"
     */
    public void testExportContentWithAttachmentLink() throws Exception
    {
        URL pdfExportURL = new URL("http://localhost:8080/xwiki/bin/export/Sandbox/WebHome?format=pdf");
        Map<String, String> urls = extractURLs(pdfExportURL);
        assertTrue(urls.containsKey("XWikiLogo.png"));
        assertEquals("http://localhost:8080/xwiki/bin/download/Sandbox/WebHome/XWikiLogo.png",
            urls.get("XWikiLogo.png"));


        // Ideally we should be asserting for a value of 1 (for the embedded XWikiLogo.png image) but it seems the PDF
        // contains 2 image objects (for some reason I don't understand ATM - they seem to be variations of the same
        // image - the logo - in color, in black and white, etc).
        assertEquals(2, getImages(pdfExportURL).size());
    }


    /**
     * Verify the PDF export with table of contents.
     * 
     * @see "XWIKI-9370: PDF Export doesn't list the Table of Contents under certain circumstances"
     */
    public void testTableOfContents() throws Exception
    {
        Map<String, String> internalLinks =
            extractToLinks(new URL("http://localhost:8080/xwiki/bin/export/Sandbox/WebHome"
                + "?format=pdf&pdftoc=1&attachments=1&pdfcover=0"), 0);
        // Make sure we have a Table of Contents.
        assertTrue(internalLinks.containsKey("Mixed list"));
        // Make sure the Table of Contents links point to their corresponding heading.
        for (Map.Entry<String, String> entry : internalLinks.entrySet()) {
            assertTrue(entry.getValue().contains(entry.getKey()));
        }
    }


    private String getPDFContent(URL url) throws Exception
    {
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        InputStream is = connection.getInputStream();
        PDDocument pdd = PDDocument.load(is);
        PDFText2HTML stripper = new PDFText2HTML("UTF-8");
        String text = stripper.getText(pdd);
        pdd.close();
        is.close();
        return text;
    }


    private Map<String, PDXObjectImage> getImages(URL url) throws Exception
    {
        Map<String, PDXObjectImage> results = new HashMap<>();


        PDDocument document = PDDocument.load(url);
        List<PDPage> list = document.getDocumentCatalog().getAllPages();
        for (PDPage page : list) {
            PDResources pdResources = page.getResources();
            Map pageImages = pdResources.getImages();
            if (pageImages != null) {
                Iterator imageIter = pageImages.keySet().iterator();
                while (imageIter.hasNext()) {
                    String key = (String) imageIter.next();
                    PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                    results.put(key, pdxObjectImage);
                }
            }
        }


        return results;
    }


    private Map<String, String> extractURLs(URL url) throws Exception
    {
        Map<String, String> urls = new HashMap<String, String>();
        PDDocument document = null;
        try {
            document = PDDocument.load(url);
            for (Map.Entry<String, PDAction> entry : extractLinks(document).entrySet()) {
                if (entry.getValue() instanceof PDActionURI) {
                    PDActionURI uri = (PDActionURI) entry.getValue();
                    urls.put(entry.getKey(), uri.getURI());
                }
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
        return urls;
    }


    private Map<String, String> extractToLinks(URL url, int tocPageIndex) throws Exception
    {
        Map<String, String> internalLinks = new HashMap<String, String>();
        PDDocument document = null;
        try {
            document = PDDocument.load(url);
            PDPage tocPage = (PDPage) document.getDocumentCatalog().getAllPages().get(tocPageIndex);
            for (Map.Entry<String, PDAction> entry : extractLinks(tocPage).entrySet()) {
                if (entry.getValue() instanceof PDActionGoTo) {
                    PDActionGoTo anchor = (PDActionGoTo) entry.getValue();
                    internalLinks.put(entry.getKey(), getDestinationText(anchor.getDestination()));
                }
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
        return internalLinks;
    }


    @SuppressWarnings("unchecked")
    private Map<String, PDAction> extractLinks(PDDocument document) throws Exception
    {
        Map<String, PDAction> links = new HashMap<String, PDAction>();
        for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) {
            links.putAll(extractLinks(page));
        }
        return links;
    }


    /**
     * Code adapted from http://www.docjar.com/html/api/org/apache/pdfbox/examples/pdmodel/PrintURLs.java.html
     */
    private Map<String, PDAction> extractLinks(PDPage page) throws Exception
    {
        Map<String, PDAction> links = new HashMap<String, PDAction>();
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        List<PDAnnotation> annotations = page.getAnnotations();
        // First setup the text extraction regions.
        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annotation = annotations.get(j);
            if (annotation instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annotation;
                PDRectangle rect = link.getRectangle();
                // Need to reposition link rectangle to match text space.
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.findRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.findMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    // Do nothing.
                }


                Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                stripper.addRegion(String.valueOf(j), awtRect);
            }
        }


        stripper.extractRegions(page);


        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annotation = annotations.get(j);
            if (annotation instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annotation;
                String label = stripper.getTextForRegion(String.valueOf(j)).trim();
                links.put(label, link.getAction());
            }
        }


        return links;
    }


    private String getDestinationText(PDDestination destination) throws Exception
    {
        if (destination instanceof PDPageXYZDestination) {
            return getDestinationText((PDPageXYZDestination) destination);
        } else if (destination instanceof PDPageDestination) {
            return "Page " + ((PDPageDestination) destination).getPageNumber();
        }
        return destination.toString();
    }


    private String getDestinationText(PDPageXYZDestination destination) throws Exception
    {
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.addRegion("destination", getRectangleBelowDestination(destination));
        stripper.extractRegions(destination.getPage());
        return stripper.getTextForRegion("destination").trim();
    }


    private Rectangle2D getRectangleBelowDestination(PDPageXYZDestination destination)
    {
        PDPage page = destination.getPage();
        PDRectangle pageSize = page.findMediaBox();
        float x = destination.getLeft();
        float y = pageSize.getHeight() - destination.getTop();
        float width = pageSize.getWidth();
        float height = destination.getTop();
        return new Rectangle2D.Float(x, y, width, height);
    }
}
Source Code of org.xwiki.test.misc.PDFTest

Related Classes of org.xwiki.test.misc.PDFTest