482 lines
13 KiB
PHP
482 lines
13 KiB
PHP
|
<?php
|
||
|
//
|
||
|
// FPDI - Version 1.1
|
||
|
//
|
||
|
// Copyright 2004,2005 Setasign - Jan Slabon
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
//
|
||
|
|
||
|
if (!defined ('PDF_TYPE_NULL'))
|
||
|
define ('PDF_TYPE_NULL', 0);
|
||
|
if (!defined ('PDF_TYPE_NUMERIC'))
|
||
|
define ('PDF_TYPE_NUMERIC', 1);
|
||
|
if (!defined ('PDF_TYPE_TOKEN'))
|
||
|
define ('PDF_TYPE_TOKEN', 2);
|
||
|
if (!defined ('PDF_TYPE_HEX'))
|
||
|
define ('PDF_TYPE_HEX', 3);
|
||
|
if (!defined ('PDF_TYPE_STRING'))
|
||
|
define ('PDF_TYPE_STRING', 4);
|
||
|
if (!defined ('PDF_TYPE_DICTIONARY'))
|
||
|
define ('PDF_TYPE_DICTIONARY', 5);
|
||
|
if (!defined ('PDF_TYPE_ARRAY'))
|
||
|
define ('PDF_TYPE_ARRAY', 6);
|
||
|
if (!defined ('PDF_TYPE_OBJDEC'))
|
||
|
define ('PDF_TYPE_OBJDEC', 7);
|
||
|
if (!defined ('PDF_TYPE_OBJREF'))
|
||
|
define ('PDF_TYPE_OBJREF', 8);
|
||
|
if (!defined ('PDF_TYPE_OBJECT'))
|
||
|
define ('PDF_TYPE_OBJECT', 9);
|
||
|
if (!defined ('PDF_TYPE_STREAM'))
|
||
|
define ('PDF_TYPE_STREAM', 10);
|
||
|
|
||
|
|
||
|
require_once("wrapper_functions.php");
|
||
|
require_once("pdf_parser.php");
|
||
|
|
||
|
class fpdi_pdf_parser extends pdf_parser {
|
||
|
|
||
|
/**
|
||
|
* Pages
|
||
|
* Index beginns at 0
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
var $pages;
|
||
|
|
||
|
/**
|
||
|
* Page count
|
||
|
* @var integer
|
||
|
*/
|
||
|
var $page_count;
|
||
|
|
||
|
/**
|
||
|
* actual page number
|
||
|
* @var integer
|
||
|
*/
|
||
|
var $pageno;
|
||
|
|
||
|
/**
|
||
|
* PDF Version of imported Document
|
||
|
* @var string
|
||
|
*/
|
||
|
var $pdfVersion;
|
||
|
|
||
|
/**
|
||
|
* FPDI Reference
|
||
|
* @var object
|
||
|
*/
|
||
|
var $fpdi;
|
||
|
|
||
|
/**
|
||
|
* Constructor
|
||
|
*
|
||
|
* @param string $filename Source-Filename
|
||
|
* @param object $fpdi Object of type fpdi
|
||
|
*/
|
||
|
function fpdi_pdf_parser($filename,&$fpdi) {
|
||
|
$this->fpdi =& $fpdi;
|
||
|
$this->filename = $filename;
|
||
|
|
||
|
parent::pdf_parser($filename);
|
||
|
|
||
|
// Get Info
|
||
|
$this->getInfo();
|
||
|
|
||
|
// resolve Pages-Dictonary
|
||
|
$pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
|
||
|
|
||
|
// Read pages
|
||
|
$this->read_pages($this->c, $pages, $this->pages);
|
||
|
|
||
|
// count pages;
|
||
|
$this->page_count = count($this->pages);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Overwrite parent::error()
|
||
|
*
|
||
|
* @param string $msg Error-Message
|
||
|
*/
|
||
|
function error($msg) {
|
||
|
$this->fpdi->error($msg);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get pagecount from sourcefile
|
||
|
*
|
||
|
* @return int
|
||
|
*/
|
||
|
function getPageCount() {
|
||
|
return $this->page_count;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Set pageno
|
||
|
*
|
||
|
* @param int $pageno Pagenumber to use
|
||
|
*/
|
||
|
function setPageno($pageno) {
|
||
|
$pageno-=1;
|
||
|
|
||
|
if ($pageno < 0 || $pageno >= $this->getPageCount()) {
|
||
|
$this->fpdi->error("Pagenumber is wrong!");
|
||
|
}
|
||
|
|
||
|
$this->pageno = $pageno;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get page-resources from current page
|
||
|
*
|
||
|
* @return array
|
||
|
*/
|
||
|
function getPageResources() {
|
||
|
return $this->_getPageResources($this->pages[$this->pageno]);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get page-resources from /Page
|
||
|
*
|
||
|
* @param array $obj Array of pdf-data
|
||
|
*/
|
||
|
function _getPageResources ($obj) { // $obj = /Page
|
||
|
$obj = $this->pdf_resolve_object($this->c, $obj);
|
||
|
|
||
|
// If the current object has a resources
|
||
|
// dictionary associated with it, we use
|
||
|
// it. Otherwise, we move back to its
|
||
|
// parent object.
|
||
|
if (isset ($obj[1][1]['/Resources'])) {
|
||
|
$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
|
||
|
if ($res[0] == PDF_TYPE_OBJECT)
|
||
|
return $res[1];
|
||
|
return $res;
|
||
|
} else {
|
||
|
if (!isset ($obj[1][1]['/Parent'])) {
|
||
|
return false;
|
||
|
} else {
|
||
|
$res = $this->_getPageResources($obj[1][1]['/Parent']);
|
||
|
if ($res[0] == PDF_TYPE_OBJECT)
|
||
|
return $res[1];
|
||
|
return $res;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
function getInfo() {
|
||
|
$avail_infos = array("Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped");
|
||
|
|
||
|
$_infos = $this->pdf_resolve_object($this->c,$this->xref['trailer'][1]['/Info']);
|
||
|
$infos = array();
|
||
|
|
||
|
foreach ($avail_infos AS $info) {
|
||
|
if (isset($_infos[1][1]["/".$info])) {
|
||
|
if ($_infos[1][1]["/".$info][0] == PDF_TYPE_STRING) {
|
||
|
$infos[$info] = $this->deescapeString($_infos[1][1]["/".$info][1]);
|
||
|
} else if ($_infos[1][1]["/".$info][0] == PDF_TYPE_HEX) {
|
||
|
$infos[$info] = $this->hex2String($_infos[1][1]["/".$info][1]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
$this->infos = $infos;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Rebuilds a hexstring to string
|
||
|
*
|
||
|
* @param string $hex hexstring
|
||
|
* @return string
|
||
|
*/
|
||
|
function hex2String($hex) {
|
||
|
$endian = false;
|
||
|
|
||
|
if (preg_match("/^FEFF/",$hex)) { // is utf-16 aka big endian
|
||
|
$i = 4;
|
||
|
$endian = "big";
|
||
|
} else if (preg_match("/^FFFE/",$hex)) { // is utf-16 aka little endian
|
||
|
$i = 4;
|
||
|
$endian = "little";
|
||
|
} else {
|
||
|
$i = 0;
|
||
|
}
|
||
|
|
||
|
$s = "";
|
||
|
$l = strlen($hex);
|
||
|
for (; $i < $l; $i+=2) {
|
||
|
if (!$endian) {
|
||
|
$s .= chr(hexdec($hex[$i].(isset($hex[$i+1]) ? $hex[$i+1] : '0')));
|
||
|
} else {
|
||
|
if ($endian == "big") {
|
||
|
$_c = $hex[$i].$hex[$i+1];
|
||
|
$i+=2;
|
||
|
$c = $hex[$i].$hex[$i+1];
|
||
|
|
||
|
if ($_c != "00") {
|
||
|
$s .= "?";
|
||
|
continue;
|
||
|
} else {
|
||
|
$s .= chr(hexdec($c));
|
||
|
continue;
|
||
|
}
|
||
|
} else if ($endian == "little") {
|
||
|
$c = $hex[$i].$hex[$i+1];
|
||
|
$i+=2;
|
||
|
$_c = $hex[$i].$hex[$i+1];
|
||
|
|
||
|
if ($_c != "00") {
|
||
|
$s .= "?";
|
||
|
continue;
|
||
|
} else {
|
||
|
$s .= chr(hexdec($c));
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $s;
|
||
|
}
|
||
|
|
||
|
function deescapeString($s) {
|
||
|
$torepl = array("/\\\(\d{1,3})/e" => "chr(octdec(\\1))",
|
||
|
"/\\\\\(/" => "(",
|
||
|
"/\\\\\)/" => ")");
|
||
|
return preg_replace(array_keys($torepl),$torepl,$s);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Get content of current page
|
||
|
*
|
||
|
* If more /Contents is an array, the streams are concated
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
function getContent() {
|
||
|
$buffer = "";
|
||
|
|
||
|
$contents = $this->getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
|
||
|
foreach($contents AS $tmp_content) {
|
||
|
$buffer .= $this->rebuildContentStream($tmp_content);
|
||
|
}
|
||
|
|
||
|
return $buffer;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Resolve all content-objects
|
||
|
*
|
||
|
* @param array $content_ref
|
||
|
* @return array
|
||
|
*/
|
||
|
function getPageContent($content_ref) {
|
||
|
$contents = array();
|
||
|
|
||
|
if ($content_ref[0] == PDF_TYPE_OBJREF) {
|
||
|
$content = $this->pdf_resolve_object($this->c, $content_ref);
|
||
|
if ($content[1][0] == PDF_TYPE_ARRAY) {
|
||
|
$contents = $this->getPageContent($content[1]);
|
||
|
} else {
|
||
|
$contents[] = $content;
|
||
|
}
|
||
|
} else if ($content_ref[0] == PDF_TYPE_ARRAY) {
|
||
|
foreach ($content_ref[1] AS $tmp_content_ref) {
|
||
|
$contents = array_merge($contents,$this->getPageContent($tmp_content_ref));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $contents;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Rebuild content-streams
|
||
|
* only non-compressed streams and /FlateDecode are ready!
|
||
|
*
|
||
|
* @param array $obj
|
||
|
* @return string
|
||
|
*/
|
||
|
function rebuildContentStream($obj) {
|
||
|
$filters = array();
|
||
|
|
||
|
if (isset($obj[1][1]['/Filter'])) {
|
||
|
$_filter = $obj[1][1]['/Filter'];
|
||
|
|
||
|
if ($_filter[0] == PDF_TYPE_TOKEN) {
|
||
|
$filters[] = $_filter;
|
||
|
} else if ($_filter[0] == PDF_TYPE_ARRAY) {
|
||
|
$filters = $_filter[1];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$stream = $obj[2][1];
|
||
|
|
||
|
foreach ($filters AS $_filter) {
|
||
|
switch ($_filter[1]) {
|
||
|
case "/FlateDecode":
|
||
|
if (function_exists('gzuncompress')) {
|
||
|
$stream = @gzuncompress($stream);
|
||
|
} else {
|
||
|
$this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1]));
|
||
|
}
|
||
|
if ($stream === false) {
|
||
|
$this->fpdi->error("Error while decompressing string.");
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
case "/LZWDecode":
|
||
|
@include_once("decoders/lzw.php");
|
||
|
if (class_exists("LZWDecode")) {
|
||
|
$lzwdec = new LZWDecode($this->fpdi);
|
||
|
$stream = $lzwdec->decode($stream);
|
||
|
} else {
|
||
|
$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
|
||
|
}
|
||
|
break;
|
||
|
case "/ASCII85Decode":
|
||
|
@include_once("decoders/ascii85.php");
|
||
|
if (class_exists("ASCII85Decode")) {
|
||
|
$ascii85 = new ASCII85Decode($this->fpdi);
|
||
|
$stream = $ascii85->decode(trim($stream));
|
||
|
} else {
|
||
|
$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
|
||
|
}
|
||
|
break;
|
||
|
case null:
|
||
|
$stream = $stream;
|
||
|
break;
|
||
|
default:
|
||
|
$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $stream;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get MediaBox
|
||
|
*
|
||
|
* gets an array that describes the size of a page.
|
||
|
*
|
||
|
* @param integer $pageno
|
||
|
* @return array @see getPageBox()
|
||
|
*/
|
||
|
function getPageMediaBox($pageno) {
|
||
|
return $this->getPageBox($this->pages[$pageno-1],"/MediaBox");
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Get a Box from a page
|
||
|
* Arrayformat is same as used by fpdf_tpl
|
||
|
*
|
||
|
* @param array $page a /Page
|
||
|
* @param string $box_index Type of Box @see getPageBoxes()
|
||
|
* @return array
|
||
|
*/
|
||
|
function getPageBox($page, $box_index) {
|
||
|
$page = $this->pdf_resolve_object($this->c,$page);
|
||
|
|
||
|
$box = null;
|
||
|
if (isset($page[1][1][$box_index]))
|
||
|
$box =& $page[1][1][$box_index];
|
||
|
|
||
|
if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
|
||
|
$tmp_box = $this->pdf_resolve_object($this->c,$box);
|
||
|
$box = $tmp_box[1];
|
||
|
}
|
||
|
|
||
|
if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
|
||
|
$b =& $box[1];
|
||
|
return array("x" => $b[0][1]/$this->fpdi->k,
|
||
|
"y" => $b[1][1]/$this->fpdi->k,
|
||
|
"w" => $b[2][1]/$this->fpdi->k,
|
||
|
"h" => $b[3][1]/$this->fpdi->k);
|
||
|
} else if (!isset ($page[1][1]['/Parent'])) {
|
||
|
return false;
|
||
|
} else {
|
||
|
return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get all Boxes from /Page
|
||
|
*
|
||
|
* @param array a /Page
|
||
|
* @return array
|
||
|
*/
|
||
|
function getPageBoxes($page) {
|
||
|
$_boxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox");
|
||
|
$boxes = array();
|
||
|
|
||
|
foreach($_boxes AS $box) {
|
||
|
if ($_box = $this->getPageBox($page,$box)) {
|
||
|
$boxes[$box] = $_box;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $boxes;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Read all /Page(es)
|
||
|
*
|
||
|
* @param object pdf_context
|
||
|
* @param array /Pages
|
||
|
* @param array the result-array
|
||
|
*/
|
||
|
function read_pages (&$c, &$pages, &$result) {
|
||
|
|
||
|
// Get the kids dictionary
|
||
|
$kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
|
||
|
|
||
|
if (!is_array($kids))
|
||
|
$this->fpdi->Error("Cannot find /Kids in current /Page-Dictionary");
|
||
|
foreach ($kids[1] as $v) {
|
||
|
$pg = $this->pdf_resolve_object ($c, $v);
|
||
|
#print_r($pg);
|
||
|
|
||
|
if ($pg[1][1]['/Type'][1] === '/Pages') {
|
||
|
// If one of the kids is an embedded
|
||
|
// /Pages array, resolve it as well.
|
||
|
$this->read_pages ($c, $pg, $result);
|
||
|
} else {
|
||
|
$result[] = $pg;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Get PDF-Version
|
||
|
*
|
||
|
* And reset the PDF Version used in FPDI if needed
|
||
|
*/
|
||
|
function getPDFVersion() {
|
||
|
parent::getPDFVersion();
|
||
|
|
||
|
if (isset($this->fpdi->importVersion) && $this->pdfVersion > $this->fpdi->importVersion) {
|
||
|
$this->fpdi->importVersion = $this->pdfVersion;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
?>
|