/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
package org.apache.pdfbox.preflight.font.util;
import static org.apache.pdfbox.preflight.PreflightConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC;
import static org.apache.pdfbox.preflight.PreflightConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP;
import static org.apache.pdfbox.preflight.PreflightConstants.FONT_DICTIONARY_VALUE_ENCODING_PDFDOC;
import static org.apache.pdfbox.preflight.PreflightConstants.FONT_DICTIONARY_VALUE_ENCODING_WIN;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cff.IndexData;
import org.apache.fontbox.cff.Type1CharStringParser;
import org.apache.fontbox.cff.Type1FontUtil;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.encoding.MacRomanEncoding;
import org.apache.pdfbox.encoding.PdfDocEncoding;
import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;
public final class Type1Parser
{
public static final Log LOGGER = LogFactory.getLog(Type1Parser.class);
protected static final char NAME_START = '/';
protected static final String NOTDEF = NAME_START + ".notdef";
protected static final int DEFAULT_LEN_IV = 4;
private static final String PS_STANDARD_ENCODING = "StandardEncoding";
private static final String PS_ISOLATIN_ENCODING = "ISOLatin1Encoding";
private static final String TOKEN_ENCODING = "US-ASCII";
/**
* The PostScript font stream.
*/
private PeekInputStream fontProgram = null;
/**
* The length in bytes of the clear-text portion of the Type1 font program.
*/
private int clearTextSize = 0;
/**
* The length in bytes of the eexec encrypted portion of the type1 font program.
*/
private int eexecSize = 0;
/**
* This counter is used to know how many byte have been read. It is used to read the clear part of the font. this
* computer is updated during the parsing of the encoding part too but it is not used.
*/
private int numberOfReadBytes = 0;
/**
* Object which contains information coming from the parsing.
*/
private Type1 type1Font = null;
private Type1Parser(InputStream type1, int length1, int length2, Encoding enc) throws IOException
{
super();
this.fontProgram = new PeekInputStream(type1);
this.clearTextSize = length1;
this.eexecSize = length2;
// ---- Instantiate the Encoding Map
if (enc != null)
{
this.type1Font = new Type1(enc);
}
else
{
this.type1Font = new Type1(new StandardEncoding());
}
this.type1Font.addCidWithLabel(-1, NOTDEF);
}
/**
*
* @param fontProgram
* the stream of the font program extracted from the PDF file.
* @param clearTextLength
* the length in bytes of the clear part of the font program.
* @param eexecLength
* the length in bytes of the encoded part.
* @return
* @throws IOException
*/
public static Type1Parser createParser(InputStream fontProgram, int clearTextLength, int eexecLength)
throws IOException
{
Encoding encoding = getEncodingObject("");
return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);
}
/**
*
* @param fontProgram
* the stream of the font program extracted from the PDF file.
* @param clearTextLength
* the length in bytes of the clear part of the font program.
* @param eexecLength
* the length in bytes of the encoded part.
* @param encodingName
* The name of encoding which is used by this font program.
* @return
* @throws IOException
*/
public static Type1Parser createParserWithEncodingName(InputStream fontProgram, int clearTextLength,
int eexecLength, String encodingName) throws IOException
{
Encoding encoding = getEncodingObject(encodingName);
return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);
}
private static Encoding getEncodingObject(String encodingName)
{
Encoding encoding = new StandardEncoding();
if (FONT_DICTIONARY_VALUE_ENCODING_MAC.equals(encodingName))
{
encoding = new MacRomanEncoding();
}
else if (FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP.equals(encodingName))
{
encoding = new MacRomanEncoding();
}
else if (FONT_DICTIONARY_VALUE_ENCODING_WIN.equals(encodingName))
{
encoding = new WinAnsiEncoding();
}
else if (FONT_DICTIONARY_VALUE_ENCODING_PDFDOC.equals(encodingName))
{
encoding = new PdfDocEncoding();
}
return encoding;
}
/**
*
* @param fontProgram
* the stream of the font program extracted from the PDF file.
* @param clearTextLength
* the length in bytes of the clear part of the font program.
* @param eexecLength
* the length in bytes of the encoded part.
* @param encodingName
* The encoding object which is used by this font program.
* @return
* @throws IOException
*/
public static Type1Parser createParserWithEncodingObject(InputStream fontProgram, int clearTextLength,
int eexecLength, Encoding encoding) throws IOException
{
return new Type1Parser(fontProgram, clearTextLength, eexecLength, encoding);
}
public Type1 parse() throws IOException
{
parseClearPartOfFontProgram(this.fontProgram);
decodeAndParseEExecPart(this.fontProgram);
return this.type1Font;
}
private void parseClearPartOfFontProgram(PeekInputStream stream) throws IOException
{
skipComments(stream);
parseFontInformationUntilEncodingPart(stream);
}
private void decodeAndParseEExecPart(PeekInputStream stream) throws IOException
{
byte[] eexecPart = readEexec(stream);
byte[] decodedEExecPart = decodeEexec(eexecPart);
PeekInputStream eexecStream = new PeekInputStream(new ByteArrayInputStream(decodedEExecPart));
parseEExecPart(eexecStream);
}
private void skipComments(PeekInputStream stream) throws IOException
{
int nextChar = stream.peek();
while (nextChar == '%')
{
if (nextChar == -1)
{
throw new IOException("Unexpected End Of File during a comment parsing");
}
readLine(stream);
nextChar = stream.peek();
}
}
private void parseFontInformationUntilEncodingPart(PeekInputStream stream) throws IOException
{
byte[] token = readToken(stream);
while (!isEExecKeyWord(token))
{
// add here specific operation to memorize useful information
if (isEncodingKeyWord(token))
{
parseEncodingDefinition(stream);
}
token = readToken(stream);
}
while (!isStartOfEExecReached())
{
readNextCharacter(stream);
}
}
private void parseEncodingDefinition(PeekInputStream stream) throws IOException
{
byte[] token = readToken(stream);
String readableToken = new String(token, TOKEN_ENCODING);
if (PS_ISOLATIN_ENCODING.equals(readableToken))
{
this.type1Font.initEncodingWithISOLatin1Encoding();
}
else if (PS_STANDARD_ENCODING.equals(readableToken))
{
this.type1Font.initEncodingWithStandardEncoding();
}
else
{
try
{
Integer.parseInt(readableToken);
throwExceptionIfUnexpectedToken("array", readToken(stream));
readEndSetEncodingValues(stream);
}
catch (NumberFormatException e)
{
throw new IOException("Invalid encoding : Expected int value before \"array\" "
+ "key word if the Encoding isn't Standard or ISOLatin");
}
}
}
private void parseEExecPart(PeekInputStream stream) throws IOException
{
int lenIV = DEFAULT_LEN_IV;
byte[] previousToken = new byte[0];
while (!isEndOfStream(stream))
{
byte[] token = readToken(stream);
if (isLenIVKeyWord(token))
{
// lenIV belong to Private Dictionary.
// If you create a method to parse PrivateDict, please update this function
byte[] l = readToken(stream);
lenIV = Integer.parseInt(new String(l, TOKEN_ENCODING));
}
else if (isBeginOfBinaryPart(token))
{
try
{
int lengthOfBinaryPart = Integer.parseInt(new String(previousToken, TOKEN_ENCODING));
skipSingleBlankSeparator(stream);
stream.read(new byte[lengthOfBinaryPart], 0, lengthOfBinaryPart);
token = readToken(stream); // read the end of binary part
}
catch (NumberFormatException e)
{
throw new IOException("Binary part found but previous token wasn't an integer");
}
}
else if (isCharStringKeyWord(token))
{
parseCharStringArray(stream, lenIV);
}
previousToken = token;
}
}
private void parseCharStringArray(PeekInputStream stream, int lenIV) throws IOException
{
int numberOfElements = readNumberOfCharStrings(stream);
goToBeginOfCharStringElements(stream);
while (numberOfElements > 0)
{
byte[] labelToken = readToken(stream);
String label = new String(labelToken, TOKEN_ENCODING);
if (label.equals("end"))
{
// TODO thrown exception ? add an error/warning in the PreflightContext ??
LOGGER.warn("[Type 1] Invalid number of elements in the CharString");
break;
}
byte[] sizeOfCharStringToken = readToken(stream);
int sizeOfCharString = Integer.parseInt(new String(sizeOfCharStringToken, TOKEN_ENCODING));
readToken(stream); // skip "RD" or "-|" token
skipSingleBlankSeparator(stream); // "RD" or "-|" are followed by a space
byte[] descBinary = new byte[sizeOfCharString];
stream.read(descBinary, 0, sizeOfCharString);
byte[] description = Type1FontUtil.charstringDecrypt(descBinary, lenIV);
Type1CharStringParser t1p = new Type1CharStringParser();
// TODO provide the local subroutine indexes
List<Object> operations = t1p.parse(description, new IndexData(0));
type1Font.addGlyphDescription(label, new GlyphDescription(operations));
readToken(stream); // skip "ND" or "|-" token
--numberOfElements;
}
}
private void goToBeginOfCharStringElements(PeekInputStream stream) throws IOException
{
byte[] token = new byte[0];
do
{
token = readToken(stream);
} while (isNotBeginKeyWord(token));
}
private boolean isNotBeginKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return !"begin".equals(word);
}
private boolean isBeginOfBinaryPart(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return ("RD".equals(word) || "-|".equals(word));
}
private boolean isLenIVKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "/lenIV".equals(word);
}
private boolean isCharStringKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "/CharStrings".equals(word);
}
private int readNumberOfCharStrings(PeekInputStream stream) throws IOException
{
byte[] token = readToken(stream);
String word = new String(token, TOKEN_ENCODING);
try
{
return Integer.parseInt(word);
}
catch (NumberFormatException e)
{
throw new IOException("Number of CharStrings elements is expected.");
}
}
private void throwExceptionIfUnexpectedToken(String expectedValue, byte[] token) throws IOException
{
String valueToCheck = new String(token, TOKEN_ENCODING);
if (!expectedValue.equals(valueToCheck))
{
throw new IOException(expectedValue + " was expected but we received " + valueToCheck);
}
}
private void readEndSetEncodingValues(PeekInputStream stream) throws IOException
{
byte[] token = readToken(stream);
boolean lastTokenWasReadOnly = false;
while (!(lastTokenWasReadOnly && isDefKeyWord(token)))
{
if (isDupKeyWord(token))
{
byte[] cidToken = readToken(stream);
byte[] labelToken = readToken(stream);
String cid = new String(cidToken, TOKEN_ENCODING);
String label = new String(labelToken, TOKEN_ENCODING);
try
{
this.type1Font.addCidWithLabel(Integer.parseInt(cid), label);
}
catch (NumberFormatException e)
{
throw new IOException("Invalid encoding : Expected CID value before \"" + label + "\" label");
}
}
else
{
lastTokenWasReadOnly = isReadOnlyKeyWord(token);
}
token = readToken(stream);
}
}
private byte[] readEexec(PeekInputStream stream) throws IOException
{
int BUFFER_SIZE = 1024;
byte[] buffer = new byte[BUFFER_SIZE];
ByteArrayOutputStream eexecPart = new ByteArrayOutputStream();
int lr = 0;
int total = 0;
do
{
lr = stream.read(buffer, 0, BUFFER_SIZE);
if (lr == BUFFER_SIZE && (total + BUFFER_SIZE < eexecSize))
{
eexecPart.write(buffer, 0, BUFFER_SIZE);
total += BUFFER_SIZE;
}
else if (lr > 0 && (total + lr < eexecSize))
{
eexecPart.write(buffer, 0, lr);
total += lr;
}
else if (lr > 0 && (total + lr >= eexecSize))
{
eexecPart.write(buffer, 0, eexecSize - total);
total += (eexecSize - total);
}
} while (eexecSize > total && lr > 0);
IOUtils.closeQuietly(eexecPart);
return eexecPart.toByteArray();
}
private byte[] decodeEexec(byte[] eexec)
{
return Type1FontUtil.eexecDecrypt(eexec);
}
private byte[] readLine(PeekInputStream stream) throws IOException
{
ArrayList<Byte> bytes = new ArrayList<Byte>();
int currentCharacter = 0;
do
{
currentCharacter = readNextCharacter(stream);
bytes.add((byte) (currentCharacter & 0xFF));
} while (!('\n' == currentCharacter || '\r' == currentCharacter));
if ('\r' == currentCharacter && '\n' == stream.peek())
{
currentCharacter = readNextCharacter(stream);
bytes.add((byte) (currentCharacter & 0xFF));
}
byte[] result = new byte[bytes.size()];
for (int i = 0; i < bytes.size(); ++i)
{
result[i] = bytes.get(i);
}
return result;
}
private byte[] readToken(PeekInputStream stream) throws IOException
{
byte[] token = new byte[0];
skipBlankSeparators(stream);
int nextByte = stream.peek();
if (nextByte < 0)
{
throw new IOException("Unexpected End Of File");
}
if (nextByte == '(')
{
token = readStringLiteral(stream);
}
else if (nextByte == '[')
{
token = readArray(stream);
}
else if (nextByte == '{')
{
token = readProcedure(stream);
}
else
{
token = readNameOrArgument(stream);
}
return token;
}
private byte[] readStringLiteral(PeekInputStream stream) throws IOException
{
int opened = 0;
List<Integer> buffer = new ArrayList<Integer>();
int currentByte = 0;
do
{
currentByte = readNextCharacter(stream);
if (currentByte < 0)
{
throw new IOException("Unexpected End Of File");
}
if (currentByte == '(')
{
opened++;
}
else if (currentByte == ')')
{
opened--;
}
buffer.add(currentByte);
} while (opened != 0);
return convertListOfIntToByteArray(buffer);
}
private byte[] readArray(PeekInputStream stream) throws IOException
{
int opened = 0;
List<Integer> buffer = new ArrayList<Integer>();
int currentByte = 0;
do
{
currentByte = readNextCharacter(stream);
if (currentByte < 0)
{
throw new IOException("Unexpected End Of File");
}
if (currentByte == '[')
{
opened++;
}
else if (currentByte == ']')
{
opened--;
}
buffer.add(currentByte);
} while (opened != 0);
return convertListOfIntToByteArray(buffer);
}
private byte[] readProcedure(PeekInputStream stream) throws IOException
{
int opened = 0;
List<Integer> buffer = new ArrayList<Integer>();
int currentByte = 0;
do
{
currentByte = readNextCharacter(stream);
if (currentByte < 0)
{
throw new IOException("Unexpected End Of File");
}
if (currentByte == '{')
{
opened++;
}
else if (currentByte == '}')
{
opened--;
}
buffer.add(currentByte);
} while (opened != 0);
return convertListOfIntToByteArray(buffer);
}
private byte[] readNameOrArgument(PeekInputStream stream) throws IOException
{
List<Integer> buffer = new ArrayList<Integer>();
int nextByte = 0;
do
{
int currentByte = readNextCharacter(stream);
if (currentByte < 0)
{
throw new IOException("Unexpected End Of File");
}
buffer.add(currentByte);
nextByte = stream.peek();
} while (isNotBlankSperator(nextByte) && isNotBeginOfName(nextByte) && isNotSeparator(nextByte));
return convertListOfIntToByteArray(buffer);
}
private boolean isNotBeginOfName(int character)
{
return ('/' != character);
}
private boolean isNotSeparator(int character)
{
return !('{' == character || '}' == character || '[' == character || ']' == character);
}
private byte[] convertListOfIntToByteArray(List<Integer> input)
{
byte[] res = new byte[input.size()];
for (int i = 0; i < res.length; ++i)
{
res[i] = input.get(i).byteValue();
}
return res;
}
private int readNextCharacter(PeekInputStream stream) throws IOException
{
int currentByte = stream.read();
this.numberOfReadBytes++;
return currentByte;
}
private void skipBlankSeparators(PeekInputStream stream) throws IOException
{
int nextByte = stream.peek();
while (isBlankSperator(nextByte))
{
readNextCharacter(stream);
nextByte = stream.peek();
}
}
private void skipSingleBlankSeparator(PeekInputStream stream) throws IOException
{
int nextByte = stream.peek();
if (isBlankSperator(nextByte))
{
readNextCharacter(stream);
}
}
private boolean isBlankSperator(int character)
{
return (character == ' ' || character == '\n' || character == '\r');
}
private boolean isNotBlankSperator(int character)
{
return !isBlankSperator(character);
}
private boolean isEExecKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "eexec".equals(word);
}
private boolean isDefKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "def".equals(word);
}
private boolean isReadOnlyKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "readonly".equals(word);
}
private boolean isEncodingKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "/Encoding".equals(word);
}
private boolean isDupKeyWord(byte[] token) throws IOException
{
String word = new String(token, TOKEN_ENCODING);
return "dup".equals(word);
}
private boolean isStartOfEExecReached()
{
return (this.numberOfReadBytes == this.clearTextSize);
}
private boolean isEndOfStream(PeekInputStream stream)
{
try
{
skipBlankSeparators(stream);
return false;
}
catch (IOException e)
{
return true;
}
}
}