Package org.apache.pdfbox.preflight.process

Source Code of org.apache.pdfbox.preflight.process.TrailerValidationProcess

/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/

package org.apache.pdfbox.preflight.process;

import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED;
import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED_E;
import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED_H;
import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED_L;
import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED_N;
import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED_O;
import static org.apache.pdfbox.preflight.PreflightConstants.DICTIONARY_KEY_LINEARIZED_T;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER;
import static org.apache.pdfbox.preflight.PreflightConstants.TRAILER_DICTIONARY_KEY_ENCRYPT;
import static org.apache.pdfbox.preflight.PreflightConstants.TRAILER_DICTIONARY_KEY_ID;
import static org.apache.pdfbox.preflight.PreflightConstants.TRAILER_DICTIONARY_KEY_INFO;
import static org.apache.pdfbox.preflight.PreflightConstants.TRAILER_DICTIONARY_KEY_PREV;
import static org.apache.pdfbox.preflight.PreflightConstants.TRAILER_DICTIONARY_KEY_ROOT;
import static org.apache.pdfbox.preflight.PreflightConstants.TRAILER_DICTIONARY_KEY_SIZE;

import java.io.IOException;
import java.util.List;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.persistence.util.COSObjectKey;
import org.apache.pdfbox.preflight.PreflightConstants;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.utils.COSUtils;

public class TrailerValidationProcess extends AbstractProcess
{

    public void validate(PreflightContext ctx) throws ValidationException
    {
        PDDocument pdfDoc = ctx.getDocument();

        COSDictionary linearizedDict = getLinearizedDictionary(pdfDoc);
        if (linearizedDict != null)
        {
            // it is a linearized PDF, check the linearized dictionary
            checkLinearizedDictionnary(ctx, linearizedDict);

            // if the pdf is a linearized pdf. the first trailer must be checked
            // and it must have the same ID than the last trailer.
            // According to the PDF version, trailers are available by the trailer key word (pdf <= 1.4)
            // or in the dictionary of the XRef stream ( PDF >= 1.5)
            String pdfVersion = pdfDoc.getDocument().getHeaderString();
            if (pdfVersion != null && pdfVersion.matches("%PDF-1\\.[1-4]"))
            {
                checkTrailersForLinearizedPDF14(ctx);
            }
            else
            {
                checkTrailersForLinearizedPDF15(ctx);
            }

        }
        else
        {
            // If the PDF isn't a linearized one, only the last trailer must be checked
            checkMainTrailer(ctx, pdfDoc.getDocument().getTrailer());
        }
    }

    /**
     * Extracts and compares first and last trailers for PDF version between 1.1 and 1.4
     *
     * @param ctx the preflight context.
     */
    protected void checkTrailersForLinearizedPDF14(PreflightContext ctx)
    {
        COSDictionary first = ctx.getXrefTableResolver().getFirstTrailer();
        if (first == null)
        {
            addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER, "There are no trailer in the PDF file"));
        }
        else
        {
            COSDictionary last = ctx.getXrefTableResolver().getLastTrailer();
            COSDocument cosDoc = new COSDocument();
            checkMainTrailer(ctx, first);
            if (!compareIds(first, last, cosDoc))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                        "ID is different in the first and the last trailer"));
            }
            COSUtils.closeDocumentQuietly(cosDoc);
        }
    }

    /**
     * Accesses and compares First and Last trailers for a PDF version higher than 1.4.
     *
     * @param ctx the preflight context.
     */
    protected void checkTrailersForLinearizedPDF15(PreflightContext ctx)
    {
        PDDocument pdfDoc = ctx.getDocument();
        try
        {
            COSDocument cosDocument = pdfDoc.getDocument();
            List<COSObject> xrefs = cosDocument.getObjectsByType(COSName.XREF);

            if (xrefs.isEmpty())
            {
                // no XRef CosObject, may by this pdf file used the PDF 1.4 syntaxe
                checkTrailersForLinearizedPDF14(ctx);

            }
            else
            {

                long min = Long.MAX_VALUE;
                long max = Long.MIN_VALUE;
                COSDictionary firstTrailer = null;
                COSDictionary lastTrailer = null;

                // Search First and Last trailers according to offset position.
                for (COSObject co : xrefs)
                {
                    long offset = cosDocument.getXrefTable().get(new COSObjectKey(co));
                    if (offset < min)
                    {
                        min = offset;
                        firstTrailer = (COSDictionary) co.getObject();
                    }

                    if (offset > max)
                    {
                        max = offset;
                        lastTrailer = (COSDictionary) co.getObject();
                    }

                }

                checkMainTrailer(ctx, firstTrailer);
                if (!compareIds(firstTrailer, lastTrailer, cosDocument))
                {
                    addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                            "ID is different in the first and the last trailer"));
                }
            }
        }
        catch (IOException e)
        {
            addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER,
                    "Unable to check PDF Trailers due to : " + e.getMessage()));
        }
    }

    /**
     * Return true if the ID of the first dictionary is the same as the id of the last dictionary Return false
     * otherwise.
     *
     * @param first the first dictionary for comparison.
     * @param last the last dictionary for comparison.
     * @return true if the IDs of the first and last dictionary are the same.
     */
    protected boolean compareIds(COSDictionary first, COSDictionary last, COSDocument cosDocument)
    {
        COSBase idFirst = first.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));
        COSBase idLast = last.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));

        if (idFirst == null || idLast == null)
        {
            return false;
        }

        // ---- cast two COSBase to COSArray.
        COSArray af = COSUtils.getAsArray(idFirst, cosDocument);
        COSArray al = COSUtils.getAsArray(idLast, cosDocument);

        // ---- if one COSArray is null, the PDF/A isn't valid
        if ((af == null) || (al == null))
        {
            return false;
        }

        // ---- compare both arrays
        boolean isEqual = true;
        for (Object of : af.toList())
        {
            boolean oneIsEquals = false;
            for (Object ol : al.toList())
            {
                // ---- according to PDF Reference 1-4, ID is an array containing two
                // strings
                if (!oneIsEquals)
                    oneIsEquals = ((COSString) ol).getString().equals(((COSString) of).getString());
                else
                    break;
            }
            isEqual = isEqual && oneIsEquals;
            if (!isEqual)
            {
                break;
            }
        }
        return isEqual;
    }

    /**
     * check if all keys are authorized in a trailer dictionary and if the type is valid.
     *
     * @param ctx the preflight context.
     * @param trailer the trailer dictionary.
     */
    protected void checkMainTrailer(PreflightContext ctx, COSDictionary trailer)
    {
        boolean id = false;
        boolean root = false;
        boolean size = false;
        boolean prev = false;
        boolean info = false;
        boolean encrypt = false;

        for (Object key : trailer.keySet())
        {
            if (!(key instanceof COSName))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_DICTIONARY_KEY_INVALID,
                        "Invalid key in The trailer dictionary"));
                return;
            }

            String cosName = ((COSName) key).getName();
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ENCRYPT))
            {
                encrypt = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_SIZE))
            {
                size = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_PREV))
            {
                prev = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ROOT))
            {
                root = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_INFO))
            {
                info = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ID))
            {
                id = true;
            }
        }

        COSDocument cosDocument = ctx.getDocument().getDocument();
        // PDF/A Trailer dictionary must contain the ID key
        if (!id)
        {
            addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_MISSING_ID,
                    "The trailer dictionary doesn't contain ID"));
        }
        else
        {
            COSBase trailerId = trailer.getItem(TRAILER_DICTIONARY_KEY_ID);
            if (!COSUtils.isArray(trailerId, cosDocument))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                        "The trailer dictionary contains an id but it isn't an array"));
            }
        }
        // PDF/A Trailer dictionary mustn't contain the Encrypt key
        if (encrypt)
        {
            addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_ENCRYPT,
                    "The trailer dictionary contains Encrypt"));
        }
        // PDF Trailer dictionary must contain the Size key
        if (!size)
        {
            addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_MISSING_SIZE,
                    "The trailer dictionary doesn't contain Size"));
        }
        else
        {
            COSBase trailerSize = trailer.getItem(TRAILER_DICTIONARY_KEY_SIZE);
            if (!COSUtils.isInteger(trailerSize, cosDocument))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                        "The trailer dictionary contains a size but it isn't an integer"));
            }
        }

        // PDF Trailer dictionary must contain the Root key
        if (!root)
        {
            addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_MISSING_ROOT,
                    "The trailer dictionary doesn't contain Root"));
        }
        else
        {
            COSBase trailerRoot = trailer.getItem(TRAILER_DICTIONARY_KEY_ROOT);
            if (!COSUtils.isDictionary(trailerRoot, cosDocument))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                        "The trailer dictionary contains a root but it isn't a dictionary"));
            }
        }
        // PDF Trailer dictionary may contain the Prev key
        if (prev)
        {
            COSBase trailerPrev = trailer.getItem(TRAILER_DICTIONARY_KEY_PREV);
            if (!COSUtils.isInteger(trailerPrev, cosDocument))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                        "The trailer dictionary contains a prev but it isn't an integer"));
            }
        }
        // PDF Trailer dictionary may contain the Info key
        if (info)
        {
            COSBase trailerInfo = trailer.getItem(TRAILER_DICTIONARY_KEY_INFO);
            if (!COSUtils.isDictionary(trailerInfo, cosDocument))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                        "The trailer dictionary contains an info but it isn't a dictionary"));
            }
        }
    }

    /**
     * According to the PDF Reference, A linearized PDF contain a dictionary as first object (linearized dictionary) and
     * only this one in the first section.
     *
     * @param document the document to validate.
     * @return the linearization dictionary or null.
     */
    protected COSDictionary getLinearizedDictionary(PDDocument document)
    {
        // ---- Get Ref to obj
        COSDocument cDoc = document.getDocument();
        List<?> lObj = cDoc.getObjects();
        for (Object object : lObj)
        {
            COSBase curObj = ((COSObject) object).getObject();
            if (curObj instanceof COSDictionary
                    && ((COSDictionary) curObj).keySet().contains(COSName.getPDFName(DICTIONARY_KEY_LINEARIZED)))
            {
                return (COSDictionary) curObj;
            }
        }
        return null;
    }

    /**
     * Check if mandatory keys of linearized dictionary are present.
     *
     * @param ctx the preflight context.
     * @param linearizedDict the linearization dictionary.
     */
    protected void checkLinearizedDictionnary(PreflightContext ctx, COSDictionary linearizedDict)
    {
        // ---- check if all keys are authorized in a linearized dictionary
        // ---- Linearized dictionary must contain the lhoent keys
        boolean l = false;
        boolean h = false;
        boolean o = false;
        boolean e = false;
        boolean n = false;
        boolean t = false;

        for (Object key : linearizedDict.keySet())
        {
            if (!(key instanceof COSName))
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_DICTIONARY_KEY_INVALID,
                        "Invalid key in The Linearized dictionary"));
                return;
            }

            String cosName = ((COSName) key).getName();
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_L))
            {
                l = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_H))
            {
                h = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_O))
            {
                o = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_E))
            {
                e = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_N))
            {
                n = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_T))
            {
                t = true;
            }
        }

        if (!(l && h && o && e && t && n))
        {
            addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_SYNTAX_DICT_INVALID,
                    "Invalid key in The Linearized dictionary"));
        }

        return;
    }

}
TOP

Related Classes of org.apache.pdfbox.preflight.process.TrailerValidationProcess

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.