Package org.pdfbox.util

Source Code of org.pdfbox.util.PDFStreamEngine$StreamResources

/**
* Copyright (c) 2003-2006, www.pdfbox.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
*    this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of pdfbox; nor the names of its
*    contributors may be used to endorse or promote products derived from this
*    software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* http://www.pdfbox.org
*
*/
package org.pdfbox.util;

import java.io.IOException;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Stack;

import org.pdfbox.cos.COSObject;
import org.pdfbox.cos.COSStream;
import org.pdfbox.exceptions.WrappedIOException;

import org.pdfbox.pdmodel.PDPage;
import org.pdfbox.pdmodel.PDResources;

import org.pdfbox.pdmodel.font.PDFont;

import org.pdfbox.pdmodel.graphics.PDGraphicsState;

import org.pdfbox.util.operator.OperatorProcessor;

/**
* This class will run through a PDF content stream and execute certain operations
* and provide a callback interface for clients that want to do things with the stream.
* See the PDFTextStripper class for an example of how to use this class.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.35 $
*/
public class PDFStreamEngine
{
    private static final byte[] SPACE_BYTES = { (byte)32 };

    private PDGraphicsState graphicsState = null;

    private Matrix textMatrix = null;
    private Matrix textLineMatrix = null;
    private Stack graphicsStack = new Stack();
    //private PDResources resources = null;
   
    private Map operators = new HashMap();
   
    private Stack streamResourcesStack = new Stack();
   
    private PDPage page;
   
    private Map documentFontCache = new HashMap();
   
    /**
     * This is a simple internal class used by the Stream engine to handle the
     * resources stack.
     */
    private static class StreamResources
    {
        private Map fonts;
        private Map colorSpaces;
        private Map xobjects;
        private Map graphicsStates;
        private PDResources resources;
    }

    /**
     * Constructor.
     */
    public PDFStreamEngine()
    {
        //default constructor
    }
   
    /**
     * Constructor with engine properties.  The property keys are all
     * PDF operators, the values are class names used to execute those
     * operators.
     *
     * @param properties The engine properties.
     *
     * @throws IOException If there is an error setting the engine properties.
     */
    public PDFStreamEngine( Properties properties ) throws IOException
    {
        try
        {
            Iterator keys = properties.keySet().iterator();
            while( keys.hasNext() )
            {
                String operator = (String)keys.next();
                String operatorClass = properties.getProperty( operator );
                OperatorProcessor op = (OperatorProcessor)Class.forName( operatorClass ).newInstance();
                registerOperatorProcessor(operator, op);
            }
        }
        catch( Exception e )
        {
            throw new WrappedIOException( e );
        }
    }
   
    /**
     * Register a custom operator processor with the engine.
     *
     * @param operator The operator as a string.
     * @param op Processor instance.
     */
    public void registerOperatorProcessor( String operator, OperatorProcessor op )
    {
        op.setContext( this );
        operators.put( operator, op );
    }
   
    /**
     * This method must be called between processing documents.  The
     * PDFStreamEngine caches information for the document between pages
     * and this will release the cached information.  This only needs
     * to be called if processing a new document.
     *
     */
    public void resetEngine()
    {
        documentFontCache.clear();
    }

    /**
     * This will process the contents of the stream.
     *
     * @param aPage The page.
     * @param resources The location to retrieve resources.
     * @param cosStream the Stream to execute.
     *
     *
     * @throws IOException if there is an error accessing the stream.
     */
    public void processStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException
    {
        graphicsState = new PDGraphicsState();
        textMatrix = null;
        textLineMatrix = null;
        graphicsStack.clear();
        streamResourcesStack.clear();
       
        processSubStream( aPage, resources, cosStream );
    }
   
    /**
     * Process a sub stream of the current stream.
     *
     * @param aPage The page used for drawing.
     * @param resources The resources used when processing the stream.
     * @param cosStream The stream to process.
     *
     * @throws IOException If there is an exception while processing the stream.
     */
    public void processSubStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException
    {
        page = aPage;
        if( resources != null )
        {
            StreamResources sr = new StreamResources();
            sr.fonts = resources.getFonts( documentFontCache );
            sr.colorSpaces = resources.getColorSpaces();
            sr.xobjects = resources.getXObjects();
            sr.graphicsStates = resources.getGraphicsStates();
            sr.resources = resources;
            streamResourcesStack.push(sr);
        }
        try
        {
            List arguments = new ArrayList();
            List tokens = cosStream.getStreamTokens();
            if( tokens != null )
            {
                Iterator iter = tokens.iterator();
                while( iter.hasNext() )
                {
                    Object next = iter.next();
                    if( next instanceof COSObject )
                    {
                        arguments.add( ((COSObject)next).getObject() );
                    }
                    else if( next instanceof PDFOperator )
                    {
                        processOperator( (PDFOperator)next, arguments );
                        arguments = new ArrayList();
                    }
                    else
                    {
                        arguments.add( next );
                    }
                }
            }
        }
        finally
        {
            if( resources != null )
            {
                streamResourcesStack.pop();
            }
        }
       
    }

    /**
     * A method provided as an event interface to allow a subclass to perform
     * some specific functionality when a character needs to be displayed.
     *
     * @param text The character to be displayed.
     */
    protected void showCharacter( TextPosition text )
    {
        //subclasses can override to provide specific functionality.
    }

    /**
     * You should override this method if you want to perform an action when a
     * string is being shown.
     *
     * @param string The string to display.
     *
     * @throws IOException If there is an error showing the string
     */
    public void showString( byte[] string ) throws IOException
    {
        float spaceWidth = 0;
        float spacing = 0;
        StringBuffer stringResult = new StringBuffer(string.length);
       
        float characterHorizontalDisplacement = 0;
        float characterVerticalDisplacement = 0;
        float spaceDisplacement = 0;
        float fontSize = graphicsState.getTextState().getFontSize();
        float horizontalScaling = graphicsState.getTextState().getHorizontalScalingPercent()/100f;
        float verticalScaling = horizontalScaling;//not sure if this is right but what else to do???
        float rise = graphicsState.getTextState().getRise();
        final float wordSpacing = graphicsState.getTextState().getWordSpacing();
        final float characterSpacing = graphicsState.getTextState().getCharacterSpacing();
        float wordSpacingDisplacement = 0;
       
        PDFont font = graphicsState.getTextState().getFont();
       
        //This will typically be 1000 but in the case of a type3 font
        //this might be a different number
        float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 );
        float averageWidth = font.getAverageFontWidth();

        Matrix initialMatrix = new Matrix();
        initialMatrix.setValue(0,0,1);
        initialMatrix.setValue(0,1,0);
        initialMatrix.setValue(0,2,0);
        initialMatrix.setValue(1,0,0);
        initialMatrix.setValue(1,1,1);
        initialMatrix.setValue(1,2,0);
        initialMatrix.setValue(2,0,0);
        initialMatrix.setValue(2,1,rise);
        initialMatrix.setValue(2,2,1);


        //this
        int codeLength = 1;
        Matrix ctm = graphicsState.getCurrentTransformationMatrix();
       
        //lets see what the space displacement should be
        spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1 )/glyphSpaceToTextSpaceFactor);
        if( spaceDisplacement == 0 )
        {
            spaceDisplacement = (averageWidth/glyphSpaceToTextSpaceFactor);
            //The average space width appears to be higher than necessary
            //so lets make it a little bit smaller.
            spaceDisplacement *= .80f;
        }
        int pageRotation = page.findRotation();
        Matrix trm = initialMatrix.multiply( textMatrix ).multiply( ctm );
        float x = trm.getValue(2,0);
        float y = trm.getValue(2,1);
        if( pageRotation == 0 )
        {
            trm.setValue( 2,1, -y + page.findMediaBox().getHeight() );
        }
        else if( pageRotation == 90 )
        {
            trm.setValue( 2,0, y );
            trm.setValue( 2,1, x );
        }
        else if( pageRotation == 270 )
        {
            trm.setValue( 2,0, -y  + page.findMediaBox().getHeight() );
            trm.setValue( 2,1, x );
        }
        for( int i=0; i<string.length; i+=codeLength )
        {
            codeLength = 1;

            String c = font.encode( string, i, codeLength );
            if( c == null && i+1<string.length)
            {
                //maybe a multibyte encoding
                codeLength++;
                c = font.encode( string, i, codeLength );
            }
            stringResult.append( c );

            //todo, handle horizontal displacement
            characterHorizontalDisplacement += (font.getFontWidth( string, i, codeLength )/glyphSpaceToTextSpaceFactor);
            characterVerticalDisplacement =
                Math.max(
                    characterVerticalDisplacement,
                    font.getFontHeight( string, i, codeLength)/glyphSpaceToTextSpaceFactor);


            // PDF Spec - 5.5.2 Word Spacing
            //
            // Word spacing works the same was as character spacing, but applies
            // only to the space character, code 32.
            //
            // Note: Word spacing is applied to every occurrence of the single-byte
            // character code 32 in a string.  This can occur when using a simple
            // font or a composite font that defines code 32 as a single-byte code.
            // It does not apply to occurrences of the byte value 32 in multiple-byte
            // codes.
            //
            // RDD - My interpretation of this is that only character code 32's that
            // encode to spaces should have word spacing applied.  Cases have been
            // observed where a font has a space character with a character code
            // other than 32, and where word spacing (Tw) was used.  In these cases,
            // applying word spacing to either the non-32 space or to the character
            // code 32 non-space resulted in errors consistent with this interpretation.
            //
            if( (string[i] == 0x20) && c.equals( " " ) )
            {
                spacing += wordSpacing + characterSpacing;
            }
            else
            {
                spacing += characterSpacing;
            }
            // We want to update the textMatrix using the width, in text space units.
            //
           
        }
       
        //The adjustment will always be zero.  The adjustment as shown in the
        //TJ operator will be handled separately.
        float adjustment=0;
        //todo, need to compute the vertical displacement
        float ty = 0;
        float tx = ((characterHorizontalDisplacement-adjustment/glyphSpaceToTextSpaceFactor)*fontSize + spacing)
                   *horizontalScaling;
       
        float xScale = trm.getXScale();
        float yScale = trm.getYScale();
        float xPos = trm.getXPosition();
        float yPos = trm.getYPosition();
        spaceWidth = spaceDisplacement * xScale * fontSize;
        wordSpacingDisplacement = wordSpacing*xScale * fontSize;
        Matrix td = new Matrix();
        td.setValue( 2, 0, tx );
        td.setValue( 2, 1, ty );           
       
        float xPosBefore = textMatrix.getXPosition();
        float yPosBefore = textMatrix.getYPosition();
        textMatrix = td.multiply( textMatrix );

        float totalStringWidth = 0;
        float totalStringHeight = characterVerticalDisplacement * fontSize * yScale;
        if( pageRotation == 0 )
        {
            totalStringWidth = (textMatrix.getXPosition() - xPosBefore);
        }
        else if( pageRotation == 90 )
        {
            totalStringWidth = (textMatrix.getYPosition() - yPosBefore);
        }
        else if( pageRotation == 270 )
        {
            totalStringWidth = (yPosBefore - textMatrix.getYPosition());
        }
        showCharacter(
                new TextPosition(
                    xPos,
                    yPos,
                    xScale,
                    yScale,
                    totalStringWidth,
                    totalStringHeight,
                    spaceWidth,
                    stringResult.toString(),
                    font,
                    fontSize,
                    wordSpacingDisplacement ));
    }
   
    /**
     * This is used to handle an operation.
     *
     * @param operation The operation to perform.
     * @param arguments The list of arguments.
     *
     * @throws IOException If there is an error processing the operation.
     */
    public void processOperator( String operation, List arguments ) throws IOException
    {
        PDFOperator oper = PDFOperator.getOperator( operation );
        processOperator( oper, arguments );
    }

    /**
     * This is used to handle an operation.
     *
     * @param operator The operation to perform.
     * @param arguments The list of arguments.
     *
     * @throws IOException If there is an error processing the operation.
     */
    protected void processOperator( PDFOperator operator, List arguments ) throws IOException
    {
        String operation = operator.getOperation();
        OperatorProcessor processor = (OperatorProcessor)operators.get( operation );
        if( processor != null )
        {
            processor.process( operator, arguments );
        }
    }
  
    /**
     * @return Returns the colorSpaces.
     */
    public Map getColorSpaces()
    {
        return ((StreamResources) streamResourcesStack.peek()).colorSpaces;
    }
   
    /**
     * @return Returns the colorSpaces.
     */
    public Map getXObjects()
    {
        return ((StreamResources) streamResourcesStack.peek()).xobjects;
    }
   
    /**
     * @param value The colorSpaces to set.
     */
    public void setColorSpaces(Map value)
    {
        ((StreamResources) streamResourcesStack.peek()).colorSpaces = value;
    }
    /**
     * @return Returns the fonts.
     */
    public Map getFonts()
    {
        return ((StreamResources) streamResourcesStack.peek()).fonts;
    }
    /**
     * @param value The fonts to set.
     */
    public void setFonts(Map value)
    {
        ((StreamResources) streamResourcesStack.peek()).fonts = value;
    }
    /**
     * @return Returns the graphicsStack.
     */
    public Stack getGraphicsStack()
    {
        return graphicsStack;
    }
    /**
     * @param value The graphicsStack to set.
     */
    public void setGraphicsStack(Stack value)
    {
        graphicsStack = value;
    }
    /**
     * @return Returns the graphicsState.
     */
    public PDGraphicsState getGraphicsState()
    {
        return graphicsState;
    }
    /**
     * @param value The graphicsState to set.
     */
    public void setGraphicsState(PDGraphicsState value)
    {
        graphicsState = value;
    }
    /**
     * @return Returns the graphicsStates.
     */
    public Map getGraphicsStates()
    {
        return ((StreamResources) streamResourcesStack.peek()).graphicsStates;
    }
    /**
     * @param value The graphicsStates to set.
     */
    public void setGraphicsStates(Map value)
    {
        ((StreamResources) streamResourcesStack.peek()).graphicsStates = value;
    }
    /**
     * @return Returns the textLineMatrix.
     */
    public Matrix getTextLineMatrix()
    {
        return textLineMatrix;
    }
    /**
     * @param value The textLineMatrix to set.
     */
    public void setTextLineMatrix(Matrix value)
    {
        textLineMatrix = value;
    }
    /**
     * @return Returns the textMatrix.
     */
    public Matrix getTextMatrix()
    {
        return textMatrix;
    }
    /**
     * @param value The textMatrix to set.
     */
    public void setTextMatrix(Matrix value)
    {
        textMatrix = value;
    }
    /**
     * @return Returns the resources.
     */
    public PDResources getResources()
    {
        return ((StreamResources) streamResourcesStack.peek()).resources;
    }
   
    /**
     * Get the current page that is being processed.
     *
     * @return The page being processed.
     */
    public PDPage getCurrentPage()
    {
        return page;
    }
}
TOP

Related Classes of org.pdfbox.util.PDFStreamEngine$StreamResources

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.