Package org.apache.padaf.preflight.font.type1

Source Code of org.apache.padaf.preflight.font.type1.Type1Parser

/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/

package org.apache.padaf.preflight.font.type1;

import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_PDFDOC;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_WIN;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.fontbox.cff.Type1CharStringParser;
import org.apache.fontbox.cff.Type1FontUtil;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.encoding.MacRomanEncoding;
import org.apache.pdfbox.encoding.PdfDocEncoding;
import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;

public class Type1Parser {
  protected static final char NAME_START = '/';
  protected static final String NOTDEF = NAME_START + ".notdef";
  protected static final int DEFAULT_LEN_IV = 4;
 
  private static final String PS_STANDARD_ENCODING = "StandardEncoding";
  private static final String PS_ISOLATIN_ENCODING = "ISOLatin1Encoding";

  private static final String TOKEN_ENCODING = "US-ASCII";

  /**
   * The PostScript font stream.
   */
  private PeekInputStream fontProgram = null;
  /**
   * The length in bytes of the clear-text portion of the Type1 font program.
   */
  private int clearTextSize = 0;
  /**
   * The length in bytes of the eexec encrypted portion of the type1 font
   * program.
   */
  private int eexecSize = 0;

  /**
   * This counter is used to know how many byte have been read.
   * It is used to read the clear part of the font. this computer is
   * updated during the parsing of the encoding part too but it is not
   * used.
   */
  private int numberOfReadBytes = 0;
 
  /**
   * Object which contains information coming from the parsing.
   */
  private Type1 type1Font = null;
 
  private Type1Parser(InputStream type1, int length1, int length2, Encoding enc) throws IOException {
    super();
    this.fontProgram = new PeekInputStream(type1);
    this.clearTextSize = length1;
    this.eexecSize = length2;
    // ---- Instantiate the Encoding Map
    if (enc != null) {
      this.type1Font = new Type1(enc);
    } else {
      this.type1Font = new Type1(new StandardEncoding());
    }
    this.type1Font.addCidWithLabel(-1, NOTDEF);
  }

  /**
   *
   * @param fontProgram the stream of the font program extracted from the PDF file.
   * @param clearTextLength the length in bytes of the clear part of the font program.
   * @param eexecLength the length in bytes of the encoded part.
   * @return
   * @throws IOException
   */
  public static Type1Parser createParser(
      InputStream fontProgram,
      int clearTextLength,
      int eexecLength) throws IOException {

    Encoding encoding = getEncodingObject("");
    return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);   
  }

  /**
   *
   * @param fontProgram the stream of the font program extracted from the PDF file.
   * @param clearTextLength the length in bytes of the clear part of the font program.
   * @param eexecLength the length in bytes of the encoded part.
   * @param encodingName The name of encoding which is used by this font program.
   * @return
   * @throws IOException
   */
  public static Type1Parser createParserWithEncodingName(
      InputStream fontProgram,
      int clearTextLength,
      int eexecLength,
      String encodingName) throws IOException {

    Encoding encoding = getEncodingObject(encodingName);
    return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);
  }

  private static Encoding getEncodingObject(String encodingName) {
    Encoding encoding = new StandardEncoding();
    if (FONT_DICTIONARY_VALUE_ENCODING_MAC.equals(encodingName)) {
      encoding = new MacRomanEncoding();
    } else if (FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP.equals(encodingName)) {
      encoding = new MacRomanEncoding();
    } else if (FONT_DICTIONARY_VALUE_ENCODING_WIN.equals(encodingName)) {
      encoding = new WinAnsiEncoding();
    } else if (FONT_DICTIONARY_VALUE_ENCODING_PDFDOC.equals(encodingName)) {
      encoding = new PdfDocEncoding();
    }
    return encoding;
  }

  /**
   *
   * @param fontProgram the stream of the font program extracted from the PDF file.
   * @param clearTextLength the length in bytes of the clear part of the font program.
   * @param eexecLength the length in bytes of the encoded part.
   * @param encodingName The encoding object which is used by this font program.
   * @return
   * @throws IOException
   */
  public static Type1Parser createParserWithEncodingObject(
      InputStream fontProgram,
      int clearTextLength,
      int eexecLength,
      Encoding encodingthrows IOException {

    return new Type1Parser(fontProgram, clearTextLength, eexecLength, encoding);
  }

  public Type1 parse() throws IOException {
    parseClearPartOfFontProgram(this.fontProgram);
    decodeAndParseEExecPart(this.fontProgram);
    return this.type1Font;
  }

  private void parseClearPartOfFontProgram(PeekInputStream stream) throws IOException {
    skipComments(stream);
    parseFontInformationUntilEncodingPart(stream);
  }

  private void decodeAndParseEExecPart(PeekInputStream stream) throws IOException {
    byte[] eexecPart = readEexec(stream);
    byte[] decodedEExecPart = decodeEexec(eexecPart);
    PeekInputStream eexecStream = new PeekInputStream(new ByteArrayInputStream(decodedEExecPart));
    parseEExecPart(eexecStream);
  }

  private void skipComments(PeekInputStream stream) throws IOException {
    int nextChar = stream.peek();
    while (nextChar == '%') {
      if (nextChar == -1) {
        throw new IOException("Unexpected End Of File during a comment parsing");
      }
      readLine(stream);
      nextChar = stream.peek();
    }
  }

  private void parseFontInformationUntilEncodingPart(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    while (!isEExecKeyWord(token)) {
      // add here specific operation to memorize useful information
      if (isEncodingKeyWord(token)) {
        parseEncodingDefinition(stream);
      }
      token = readToken(stream);
    }

    while (!isStartOfEExecReached()) {
      readNextCharacter(stream);
    }
  }

  private void parseEncodingDefinition(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    String readableToken = new String(token, TOKEN_ENCODING);
    if (PS_ISOLATIN_ENCODING.equals(readableToken)) {
      this.type1Font.initEncodingWithISOLatin1Encoding();
    } else if (PS_STANDARD_ENCODING.equals(readableToken)) {
      this.type1Font.initEncodingWithStandardEncoding();
    } else {
      try {
        Integer.parseInt(readableToken);
        throwExceptionIfUnexpectedToken("array", readToken(stream));
        readEndSetEncodingValues(stream);
      } catch (NumberFormatException e) {
        throw new IOException("Invalid encoding : Expected int value before \"array\" "
            + "key word if the Encoding isn't Standard or ISOLatin");
      }
    }
  }

  private void parseEExecPart(PeekInputStream stream) throws IOException {
    int lenIV = DEFAULT_LEN_IV;
    byte[] previousToken = new byte[0];
    while(!isEndOfStream(stream)) {
      byte[] token = readToken(stream);
      if (isLenIVKeyWord(token)) {
        // lenIV belong to Private Dictionary.
        // If you create a method to parse PrivateDict, please update this function
        byte[] l = readToken(stream);
        lenIV = Integer.parseInt(new String(l, TOKEN_ENCODING));
      } else if (isBeginOfBinaryPart(token)) {
        try {
          int lengthOfBinaryPart = Integer.parseInt(new String(previousToken, TOKEN_ENCODING));
          skipSingleBlankSeparator(stream);
          stream.read(new byte[lengthOfBinaryPart], 0, lengthOfBinaryPart);
          token = readToken(stream); // read the end of binary part
        } catch (NumberFormatException e) {
          throw new IOException("Binary part found but previous token wasn't an integer");
        }
      } else if (isCharStringKeyWord(token)) {
        parseCharStringArray(stream, lenIV);
      }
      previousToken = token;
    }
  }

  private void parseCharStringArray(PeekInputStream stream, int lenIV) throws IOException {
    int numberOfElements = readNumberOfCharStrings(stream);
    goToBeginOfCharStringElements(stream);
   
    while (numberOfElements > 0) {
      readCharStringElement(stream, lenIV);
      --numberOfElements;
    }
  }

  private void goToBeginOfCharStringElements(PeekInputStream stream) throws IOException {
    byte[] token = new byte[0];
    do {
      token = readToken(stream);
    } while(isNotBeginKeyWord(token));
  }

  private void readCharStringElement(PeekInputStream stream, int lenIV) throws IOException {
    byte[] labelToken = readToken(stream);
    String label = new String(labelToken, TOKEN_ENCODING);

    byte[] sizeOfCharStringToken = readToken(stream);
    int sizeOfCharString = Integer.parseInt(new String(sizeOfCharStringToken,TOKEN_ENCODING));

    readToken(stream); // skip "RD" or "-|" token
    skipSingleBlankSeparator(stream); // "RD" or "-|" are followed by a space

    byte[] descBinary = new byte[sizeOfCharString];
    stream.read(descBinary, 0, sizeOfCharString);
    byte[] description = Type1FontUtil.charstringDecrypt(descBinary, lenIV);
    Type1CharStringParser t1p = new Type1CharStringParser();
    List<Object> operations = t1p.parse(description);
    type1Font.addGlyphDescription(label, new GlyphDescription(operations));

    readToken(stream); // skip "ND" or "|-" token
  }
 
  private boolean isNotBeginKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return !"begin".equals(word);   
  }

  private boolean isBeginOfBinaryPart(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return ("RD".equals(word) || "-|".equals(word));
  }

  private boolean isLenIVKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/lenIV".equals(word);
  }

  private boolean isCharStringKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/CharStrings".equals(word);
  }

  private int readNumberOfCharStrings(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    String word = new String(token, TOKEN_ENCODING);
    try {
      return Integer.parseInt(word);
    } catch (NumberFormatException e) {
      throw new IOException("Number of CharStrings elements is expected.");
    }
  }
 
  private void throwExceptionIfUnexpectedToken(String expectedValue, byte[] token) throws IOException {
    String valueToCheck = new String(token, TOKEN_ENCODING);
    if (!expectedValue.equals(valueToCheck)) {
      throw new IOException(expectedValue + " was expected but we received " + valueToCheck);
    }
  }

  private void readEndSetEncodingValues(PeekInputStream stream) throws IOException {
    byte[] token = readToken(stream);
    boolean lastTokenWasReadOnly = false;
    while ( !(lastTokenWasReadOnly && isDefKeyWord(token)) ) {
      if (isDupKeyWord(token)) {
        byte[] cidToken = readToken(stream);
        byte[] labelToken = readToken(stream);
        String cid = new String(cidToken, TOKEN_ENCODING);
        String label = new String(labelToken, TOKEN_ENCODING);
        try
          this.type1Font.addCidWithLabel(Integer.parseInt(cid), label);
        } catch (NumberFormatException e) {
          throw new IOException("Invalid encoding : Expected CID value before \"" + label + "\" label");
        }
      } else {
        lastTokenWasReadOnly = isReadOnlyKeyWord(token);
      }
      token = readToken(stream);
    }
  }

  private byte[] readEexec(PeekInputStream stream) throws IOException {
    int BUFFER_SIZE = 1024;
    byte[] buffer = new byte[BUFFER_SIZE];
    ByteArrayOutputStream eexecPart = new ByteArrayOutputStream();
    int lr = 0;
    int total = 0;
    do {
      lr = stream.read(buffer, 0, BUFFER_SIZE);
      if (lr == BUFFER_SIZE && (total + BUFFER_SIZE < eexecSize)) {
        eexecPart.write(buffer, 0, BUFFER_SIZE);
        total += BUFFER_SIZE;
      } else if (lr > 0 && (total + lr < eexecSize)) {
        eexecPart.write(buffer, 0, lr);
        total += lr;
      } else if (lr > 0 && (total + lr >= eexecSize)) {
        eexecPart.write(buffer, 0, eexecSize - total);
        total += (eexecSize - total);
      }
    } while (eexecSize > total && lr > 0);
    IOUtils.closeQuietly(eexecPart);
    return eexecPart.toByteArray();
  }

  private byte[] decodeEexec(byte[] eexec) {
    return Type1FontUtil.eexecDecrypt(eexec);
  }

  private byte[] readLine(PeekInputStream stream) throws IOException {
    ArrayList<Byte> bytes = new ArrayList<Byte>();
    int currentCharacter = 0;

    do {
      currentCharacter = readNextCharacter(stream);
      bytes.add((byte)(currentCharacter & 0xFF));
    } while ( !('\n' == currentCharacter || '\r' == currentCharacter)) ;

    if ('\r' == currentCharacter && '\n' == stream.peek()) {
      currentCharacter = readNextCharacter(stream);
      bytes.add((byte)(currentCharacter & 0xFF));
    }

    byte[] result = new byte[bytes.size()];
    for (int i = 0 ; i < bytes.size(); ++i) {
      result[i] = bytes.get(i);
    }
    return result;
  }

  private byte[] readToken(PeekInputStream stream) throws IOException {
    byte[] token = new byte[0];
    skipBlankSeparators(stream);

    int nextByte = stream.peek();
    if (nextByte < 0) {
      throw new IOException("Unexpected End Of File");
    }

    if (nextByte == '(') {
      token = readStringLiteral(stream);
    } else if (nextByte == '[') {
      token = readArray(stream);
    } else if (nextByte == '{') {
      token = readProcedure(stream);
    } else {
      token = readNameOrArgument(stream);
    }

    return token;
  }
 
  private byte[] readStringLiteral(PeekInputStream stream) throws IOException {
    int opened = 0;
    List<Integer> buffer = new ArrayList<Integer>();
   
    int currentByte = 0;
    do {
      currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }

      if (currentByte == '(') {
        opened++;
      } else if (currentByte == ')') {
        opened--;
      }

      buffer.add(currentByte);
    } while (opened != 0);

    return convertListOfIntToByteArray(buffer);
  }

  private byte[] readArray(PeekInputStream stream) throws IOException {
    int opened = 0;
    List<Integer> buffer = new ArrayList<Integer>();
   
    int currentByte = 0;
    do {
      currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }

      if (currentByte == '[') {
        opened++;
      } else if (currentByte == ']') {
        opened--;
      }

      buffer.add(currentByte);
    } while (opened != 0);

    return convertListOfIntToByteArray(buffer);
  }

  private byte[] readProcedure(PeekInputStream stream) throws IOException {
    int opened = 0;
    List<Integer> buffer = new ArrayList<Integer>();
   
    int currentByte = 0;
    do {
      currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }

      if (currentByte == '{') {
        opened++;
      } else if (currentByte == '}') {
        opened--;
      }

      buffer.add(currentByte);
    } while (opened != 0);

    return convertListOfIntToByteArray(buffer);
  }
 
  private byte[] readNameOrArgument(PeekInputStream stream) throws IOException {
    List<Integer> buffer = new ArrayList<Integer>();
    int nextByte = 0;
    do {
      int currentByte = readNextCharacter(stream);
      if (currentByte < 0) {
        throw new IOException("Unexpected End Of File");
      }
      buffer.add(currentByte);
      nextByte = stream.peek();
    } while (isNotBlankSperator(nextByte) && isNotBeginOfName(nextByte) && isNotSeparator(nextByte));

    return convertListOfIntToByteArray(buffer);
  }
 
  private boolean isNotBeginOfName(int character) {
    return ('/' != character);
  }
 
  private boolean isNotSeparator(int character) {
    return !('{' == character || '}' == character || '[' == character || ']' == character);
  }
 
  private byte[] convertListOfIntToByteArray(List<Integer> input) {
    byte[] res = new byte[input.size()];
    for (int i = 0; i < res.length; ++i) {
      res[i] = input.get(i).byteValue();
    }
    return res;
  }

  private int readNextCharacter(PeekInputStream stream) throws IOException {
    int currentByte = stream.read();
    this.numberOfReadBytes++;
    return currentByte;
  }

  private void skipBlankSeparators(PeekInputStream stream) throws IOException {
    int nextByte = stream.peek();
    while (isBlankSperator(nextByte)) {
      readNextCharacter(stream);
      nextByte = stream.peek();
    }
  }

  private void skipSingleBlankSeparator(PeekInputStream stream) throws IOException {
    int nextByte = stream.peek();
    if(isBlankSperator(nextByte)) {
      readNextCharacter(stream);
    }
  }
  private boolean isBlankSperator(int character) {
    return (character == ' ' || character == '\n' || character == '\r');
  }

  private boolean isNotBlankSperator(int character) {
    return !isBlankSperator(character);
  }
 
  private boolean isEExecKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "eexec".equals(word);
  }

  private boolean isDefKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "def".equals(word);
  }
 
  private boolean isReadOnlyKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "readonly".equals(word);
  }
 
  private boolean isEncodingKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/Encoding".equals(word);
  }

  private boolean isDupKeyWord(byte[] token) throws IOException {
    String word = new String(token, TOKEN_ENCODING);
    return "/Encoding".equals(word);
  }

  private boolean isStartOfEExecReached() {
    return (this.numberOfReadBytes == this.clearTextSize);
  }
 
  private boolean isEndOfStream(PeekInputStream stream) {
    try {
      skipBlankSeparators(stream);
      return false;
    } catch (IOException e) {
      return true;
    }
  }
}
TOP

Related Classes of org.apache.padaf.preflight.font.type1.Type1Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.