Source Code of org.apache.padaf.preflight.font.type1.Type1Parser

/*****************************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 ****************************************************************************/


package org.apache.padaf.preflight.font.type1;


import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_PDFDOC;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_WIN;


import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;


import org.apache.commons.io.IOUtils;
import org.apache.fontbox.cff.Type1CharStringParser;
import org.apache.fontbox.cff.Type1FontUtil;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.encoding.MacRomanEncoding;
import org.apache.pdfbox.encoding.PdfDocEncoding;
import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;


public class Type1Parser {
  protected static final char NAME_START = '/';
  protected static final String NOTDEF = NAME_START + ".notdef";
  protected static final int DEFAULT_LEN_IV = 4;
  
  private static final String PS_STANDARD_ENCODING = "StandardEncoding";
  private static final String PS_ISOLATIN_ENCODING = "ISOLatin1Encoding";


  private static final String TOKEN_ENCODING = "US-ASCII";


  /**
   * The PostScript font stream.
   */
  private PeekInputStream fontProgram = null;
  /**
   * The length in bytes of the clear-text portion of the Type1 font program.
   */
  private int clearTextSize = 0;
  /**
   * The length in bytes of the eexec encrypted portion of the type1 font
   * program.
   */
  private int eexecSize = 0;


  /**
   * This counter is used to know how many byte have been read.
   * It is used to read the clear part of the font. this computer is
   * updated during the parsing of the encoding part too but it is not 
   * used.
   */
  private int numberOfReadBytes = 0;
  
  /**
   * Object which contains information coming from the parsing.
   */
  private Type1 type1Font = null;
  
  private Type1Parser(InputStream type1, int length1, int length2, Encoding enc) throws IOException {
    super();
    this.fontProgram = new PeekInputStream(type1);
    this.clearTextSize = length1;
    this.eexecSize = length2;
    // ---- Instantiate the Encoding Map
    if (enc != null) {
      this.type1Font = new Type1(enc);
    } else {
      this.type1Font = new Type1(new StandardEncoding());
    }
    this.type1Font.addCidWithLabel(-1, NOTDEF);
  }


  /**
   * 
   * @param fontProgram the stream of the font program extracted from the PDF file.
   * @param clearTextLength the length in bytes of the clear part of the font program.
   * @param eexecLength the length in bytes of the encoded part.
   * @return
   * @throws IOException
   */
  public static Type1Parser createParser(
      InputStream fontProgram, 
      int clearTextLength, 
      int eexecLength) throws IOException {


    Encoding encoding = getEncodingObject("");
    return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);    
  }


  /**
   * 
   * @param fontProgram the stream of the font program extracted from the PDF file.
   * @param clearTextLength the length in bytes of the clear part of the font program.
   * @param eexecLength the length in bytes of the encoded part.
   * @param encodingName The name of encoding which is used by this font program.
   * @return
   * @throws IOException
   */
  public static Type1Parser createParserWithEncodingName(
      InputStream fontProgram, 
      int clearTextLength, 
      int eexecLength, 
      String encodingName) throws IOException {


    Encoding encoding = getEncodingObject(encodingName);
    return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);
  }


  private static Encoding getEncodingObject(String encodingName) {
    Encoding encoding = new StandardEncoding();
    if (FONT_DICTIONARY_VALUE_ENCODING_MAC.equals(encodingName)) {
      encoding = new MacRomanEncoding();
    } else if (FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP.equals(encodingName)) {
      encoding = new MacRomanEncoding();
    } else if (FONT_DICTIONARY_VALUE_ENCODING_WIN.equals(encodingName)) {
      encoding = new WinAnsiEncoding();
    } else if (FONT_DICTIONARY_VALUE_ENCODING_PDFDOC.equals(encodingName)) {
      encoding = new PdfDocEncoding();
    }
    return encoding;
  }


  /**
   * 
   * @param fontProgram the stream of the font program extracted from the PDF file.
   * @param clearTextLength the length in bytes of the clear part of the font program.
   * @param eexecLength the length in bytes of the encoded part.
   * @param encodingName The encoding object which is used by this font program.
   * @return
   * @throws IOException
   */
  public static Type1Parser createParserWithEncodingObject(
      InputStream fontProgram, 
      int clearTextLength, 
      int eexecLength, 
      Encoding encoding)  throws IOException {


    return new Type1Parser(fontProgram, clearTextLength, eexecLength, encoding);
  }


  public Type1 parse() throws IOException {
    parseClearPartOfFontProgram(this.fontProgram);
    decodeAndParseEExecPart(this.fontProgram);
    return this.type1Font;
  }


  private void parseClearPartOfFontProgram(PeekInputStream stream) throws IOException {
    skipComments(stream);
    parseFontInformationUntilEncodingPart(stream);
  }


  private void decodeAndParseEExecPart(PeekInputStream stream) throws IOException {
    byte[] eexecPart = readEexec(stream);
    byte[] decodedEExecPart = decodeEexec(eexecPart);
    PeekInputStream eexecStream = new PeekInputStream(new ByteArrayInputStream(decodedEExecPart));
    parseEExecPart(eexecStream);
  }


  private void skipComments(PeekInputStream stream) throws IOException {
    int nextChar = stream.peek();
    while (nextChar == '%') {
      if (nextChar == -1) {
        throw new IOException("Unexpected End Of File during a comment parsing");
      }
      readLine(stream);
      nextChar = stream.peek();
    }
  }


  private void parseFontInformationUntilEncodingPart(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    while (!isEExecKeyWord(token)) {
      // add here specific operation to memorize useful information
      if (isEncodingKeyWord(token)) {
        parseEncodingDefinition(stream);
      }
      token = readToken(stream);
    }


    while (!isStartOfEExecReached()) {
      readNextCharacter(stream);
    }
  }


  private void parseEncodingDefinition(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    String readableToken = new String(token, TOKEN_ENCODING);
    if (PS_ISOLATIN_ENCODING.equals(readableToken)) {
      this.type1Font.initEncodingWithISOLatin1Encoding();
    } else if (PS_STANDARD_ENCODING.equals(readableToken)) {
      this.type1Font.initEncodingWithStandardEncoding();
    } else {
      try {
        Integer.parseInt(readableToken);
        throwExceptionIfUnexpectedToken("array", readToken(stream));
        readEndSetEncodingValues(stream);
      } catch (NumberFormatException e) {
        throw new IOException("Invalid encoding : Expected int value before \"array\" " 
            + "key word if the Encoding isn't Standard or ISOLatin");
      }
    }
  }


  private void parseEExecPart(PeekInputStream stream) throws IOException {
    int lenIV = DEFAULT_LEN_IV;
    byte[] previousToken = new byte[0];
    while(!isEndOfStream(stream)) {
      byte[] token = readToken(stream);
      if (isLenIVKeyWord(token)) {
        // lenIV belong to Private Dictionary. 
        // If you create a method to parse PrivateDict, please update this function
        byte[] l = readToken(stream);
        lenIV = Integer.parseInt(new String(l, TOKEN_ENCODING));
      } else if (isBeginOfBinaryPart(token)) {
        try {
          int lengthOfBinaryPart = Integer.parseInt(new String(previousToken, TOKEN_ENCODING));
          skipSingleBlankSeparator(stream);
          stream.read(new byte[lengthOfBinaryPart], 0, lengthOfBinaryPart);
          token = readToken(stream); // read the end of binary part
        } catch (NumberFormatException e) {
          throw new IOException("Binary part found but previous token wasn't an integer");
        }
      } else if (isCharStringKeyWord(token)) {
        parseCharStringArray(stream, lenIV);
      }
      previousToken = token;
    }
  }


  private void parseCharStringArray(PeekInputStream stream, int lenIV) throws IOException {
    int numberOfElements = readNumberOfCharStrings(stream);
    goToBeginOfCharStringElements(stream);
    
    while (numberOfElements > 0) {
      readCharStringElement(stream, lenIV);
      --numberOfElements;
    }
  }


  private void goToBeginOfCharStringElements(PeekInputStream stream) throws IOException {
    byte[] token = new byte[0];
    do {
      token = readToken(stream);
    } while(isNotBeginKeyWord(token));
  }


  private void readCharStringElement(PeekInputStream stream, int lenIV) throws IOException {
    byte[] labelToken = readToken(stream);
    String label = new String(labelToken, TOKEN_ENCODING);


    byte[] sizeOfCharStringToken = readToken(stream);
    int sizeOfCharString = Integer.parseInt(new String(sizeOfCharStringToken,TOKEN_ENCODING));


    readToken(stream); // skip "RD" or "-|" token
    skipSingleBlankSeparator(stream); // "RD" or "-|" are followed by a space


    byte[] descBinary = new byte[sizeOfCharString];
    stream.read(descBinary, 0, sizeOfCharString);
    byte[] description = Type1FontUtil.charstringDecrypt(descBinary, lenIV);
    Type1CharStringParser t1p = new Type1CharStringParser();
    List<Object> operations = t1p.parse(description);
    type1Font.addGlyphDescription(label, new GlyphDescription(operations));


    readToken(stream); // skip "ND" or "|-" token
  }
  
  private boolean isNotBeginKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return !"begin".equals(word);    
  }


  private boolean isBeginOfBinaryPart(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return ("RD".equals(word) || "-|".equals(word));
  }


  private boolean isLenIVKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/lenIV".equals(word);
  }


  private boolean isCharStringKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/CharStrings".equals(word);
  }


  private int readNumberOfCharStrings(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    String word = new String(token, TOKEN_ENCODING);
    try {
      return Integer.parseInt(word);
    } catch (NumberFormatException e) {
      throw new IOException("Number of CharStrings elements is expected.");
    }
  }
  
  private void throwExceptionIfUnexpectedToken(String expectedValue, byte[] token) throws IOException {
    String valueToCheck = new String(token, TOKEN_ENCODING);
    if (!expectedValue.equals(valueToCheck)) {
      throw new IOException(expectedValue + " was expected but we received " + valueToCheck);
    }
  }


  private void readEndSetEncodingValues(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    boolean lastTokenWasReadOnly = false;
    while ( !(lastTokenWasReadOnly && isDefKeyWord(token)) ) {
      if (isDupKeyWord(token)) {
        byte[] cidToken = readToken(stream);
        byte[] labelToken = readToken(stream);
        String cid = new String(cidToken, TOKEN_ENCODING);
        String label = new String(labelToken, TOKEN_ENCODING);
        try {  
          this.type1Font.addCidWithLabel(Integer.parseInt(cid), label);
        } catch (NumberFormatException e) {
          throw new IOException("Invalid encoding : Expected CID value before \"" + label + "\" label");
        }
      } else {
        lastTokenWasReadOnly = isReadOnlyKeyWord(token);
      }
      token = readToken(stream);
    }
  }


  private byte[] readEexec(PeekInputStream stream) throws IOException {
    int BUFFER_SIZE = 1024;
    byte[] buffer = new byte[BUFFER_SIZE];
    ByteArrayOutputStream eexecPart = new ByteArrayOutputStream();
    int lr = 0;
    int total = 0;
    do {
      lr = stream.read(buffer, 0, BUFFER_SIZE);
      if (lr == BUFFER_SIZE && (total + BUFFER_SIZE < eexecSize)) {
        eexecPart.write(buffer, 0, BUFFER_SIZE);
        total += BUFFER_SIZE;
      } else if (lr > 0 && (total + lr < eexecSize)) {
        eexecPart.write(buffer, 0, lr);
        total += lr;
      } else if (lr > 0 && (total + lr >= eexecSize)) {
        eexecPart.write(buffer, 0, eexecSize - total);
        total += (eexecSize - total);
      }
    } while (eexecSize > total && lr > 0);
    IOUtils.closeQuietly(eexecPart);
    return eexecPart.toByteArray();
  }


  private byte[] decodeEexec(byte[] eexec) {
    return Type1FontUtil.eexecDecrypt(eexec);
  }


  private byte[] readLine(PeekInputStream stream) throws IOException {
    ArrayList<Byte> bytes = new ArrayList<Byte>();
    int currentCharacter = 0;


    do {
      currentCharacter = readNextCharacter(stream);
      bytes.add((byte)(currentCharacter & 0xFF));
    } while ( !('\n' == currentCharacter || '\r' == currentCharacter)) ;


    if ('\r' == currentCharacter && '\n' == stream.peek()) {
      currentCharacter = readNextCharacter(stream);
      bytes.add((byte)(currentCharacter & 0xFF));
    }


    byte[] result = new byte[bytes.size()];
    for (int i = 0 ; i < bytes.size(); ++i) {
      result[i] = bytes.get(i);
    }
    return result;
  }


  private byte[] readToken(PeekInputStream stream) throws IOException {
    byte[] token = new byte[0];
    skipBlankSeparators(stream);


    int nextByte = stream.peek();
    if (nextByte < 0) {
      throw new IOException("Unexpected End Of File");
    } 


    if (nextByte == '(') {
      token = readStringLiteral(stream);
    } else if (nextByte == '[') {
      token = readArray(stream);
    } else if (nextByte == '{') {
      token = readProcedure(stream);
    } else {
      token = readNameOrArgument(stream); 
    }


    return token;
  }
  
  private byte[] readStringLiteral(PeekInputStream stream) throws IOException {
    int opened = 0;
    List<Integer> buffer = new ArrayList<Integer>();
    
    int currentByte = 0;
    do {
      currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }


      if (currentByte == '(') {
        opened++;
      } else if (currentByte == ')') {
        opened--;
      }


      buffer.add(currentByte);
    } while (opened != 0);


    return convertListOfIntToByteArray(buffer);
  }


  private byte[] readArray(PeekInputStream stream) throws IOException {
    int opened = 0;
    List<Integer> buffer = new ArrayList<Integer>();
    
    int currentByte = 0;
    do {
      currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }


      if (currentByte == '[') {
        opened++;
      } else if (currentByte == ']') {
        opened--;
      }


      buffer.add(currentByte);
    } while (opened != 0);


    return convertListOfIntToByteArray(buffer);
  }


  private byte[] readProcedure(PeekInputStream stream) throws IOException {
    int opened = 0;
    List<Integer> buffer = new ArrayList<Integer>();
    
    int currentByte = 0;
    do {
      currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }


      if (currentByte == '{') {
        opened++;
      } else if (currentByte == '}') {
        opened--;
      }


      buffer.add(currentByte);
    } while (opened != 0);


    return convertListOfIntToByteArray(buffer);
  }
  
  private byte[] readNameOrArgument(PeekInputStream stream) throws IOException {
    List<Integer> buffer = new ArrayList<Integer>();
    int nextByte = 0;
    do {
      int currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }
      buffer.add(currentByte);
      nextByte = stream.peek();
    } while (isNotBlankSperator(nextByte) && isNotBeginOfName(nextByte) && isNotSeparator(nextByte));


    return convertListOfIntToByteArray(buffer);
  }
  
  private boolean isNotBeginOfName(int character) {
    return ('/' != character);
  }
  
  private boolean isNotSeparator(int character) {
    return !('{' == character || '}' == character || '[' == character || ']' == character);
  }
  
  private byte[] convertListOfIntToByteArray(List<Integer> input) {
    byte[] res = new byte[input.size()];
    for (int i = 0; i < res.length; ++i) {
      res[i] = input.get(i).byteValue();
    }
    return res;
  }


  private int readNextCharacter(PeekInputStream stream) throws IOException {
    int currentByte = stream.read();
    this.numberOfReadBytes++;
    return currentByte;
  }


  private void skipBlankSeparators(PeekInputStream stream) throws IOException {
    int nextByte = stream.peek();
    while (isBlankSperator(nextByte)) {
      readNextCharacter(stream);
      nextByte = stream.peek();
    }
  }


  private void skipSingleBlankSeparator(PeekInputStream stream) throws IOException {
    int nextByte = stream.peek();
    if(isBlankSperator(nextByte)) {
      readNextCharacter(stream);
    }
  }
  private boolean isBlankSperator(int character) {
    return (character == ' ' || character == '\n' || character == '\r');
  }


  private boolean isNotBlankSperator(int character) {
    return !isBlankSperator(character);
  }
  
  private boolean isEExecKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "eexec".equals(word);
  }


  private boolean isDefKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "def".equals(word);
  }
  
  private boolean isReadOnlyKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "readonly".equals(word);
  }
  
  private boolean isEncodingKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/Encoding".equals(word);
  }


  private boolean isDupKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/Encoding".equals(word);
  }


  private boolean isStartOfEExecReached() {
    return (this.numberOfReadBytes == this.clearTextSize);
  }
  
  private boolean isEndOfStream(PeekInputStream stream) {
    try {
      skipBlankSeparators(stream);
      return false;
    } catch (IOException e) {
      return true;
    }
  }
}
Source Code of org.apache.padaf.preflight.font.type1.Type1Parser

Related Classes of org.apache.padaf.preflight.font.type1.Type1Parser