% Copyright (C) 1994 Aladdin Enterprises. All rights reserved. % pdf_base.ps % Basic parser for PDF reader. % This handles basic parsing of the file (including the trailer % and cross-reference table), as well as objects, object references, % and streams; it doesn't include any facilities for making marks on % the page. /.setlanguagelevel where { pop 2 .setlanguagelevel } if .currentglobal true .setglobal /pdfdict where { pop } { /pdfdict 100 dict def } ifelse pdfdict begin % We rebind # and #dsc later if we're writing out PostScript. /# % ... # - { pop cvx exec } bind def /#dsc % mark ... #dsc - { cleartomark } bind def % Define the name interpretation dictionary for reading values. /valueopdict mark (<<) cvn { mark } bind % don't push an actual mark! (>>) cvn /.dicttomark load /[ { mark } bind % ditto /] /] load /true true /false false /null null /F dup cvx % see Objects section below /R dup cvx % see Objects section below /stream dup cvx % see Streams section below .dicttomark readonly def % ------ Utilities ------ % % Define a scratch string. The PDF language definition says that % no line in a PDF file can exceed 255 characters. /pdfstring 255 string def % Read the previous line of a file. If we aren't at a line boundary, % read the line containing the current position. /prevline % - prevline { PDFfile fileposition pdfstring 1 index 257 sub 0 .max PDFfile exch setfileposition { PDFfile fileposition PDFfile 2 index readline pop % Stack: initpos string startpos substring PDFfile fileposition 4 index ge { exit } if pop pop } loop 4 2 roll pop pop } bind def % Execute a file, interpreting its executable names in a given % dictionary. The name procedures may do whatever they want % to the operand stack. /pdfrun % pdfrun - { % Construct a procedure with the file and opdict bound into it. 1 index cvlit mark mark 4 2 roll { token not { (%%EOF) cvn cvx } if dup xcheck { DEBUG { dup == flush } if 2 copy .knownget { exch pop exch pop exec } { .stderr dup (****************Unknown operator: ) writestring exch .writecvs .stderr dup (\n) writestring flushfile pop } ifelse } { exch pop DEBUG { dup ==only ( ) print flush } if } ifelse } aload pop .packtomark cvx /loop cvx 2 packedarray cvx { stopped /PDFsource } aload pop PDFsource { store { stop } if } aload pop .packtomark cvx /PDFsource 3 -1 roll store exec } bind def % ------ File reading ------ % % Read the cross-reference entry for an (unresolved) object. % The caller must save and restore the PDFfile position if desired. /readxrefentry % readxrefentry { dup Objects exch get PDFfile exch setfileposition PDFfile token pop % object position PDFfile token pop % generation # PDFfile token pop % n or f /n ne { /readxrefentry cvx /syntaxerror signalerror } if dup 255 gt { Generations type /stringtype eq { % Convert Generations from a string to an array. Generations length array dup 0 1 2 index length 1 sub { Generations 1 index get put dup } for pop /Generations exch store } if } if % Stack: obj# objpos gen# Generations 4 -1 roll 3 -1 roll put } bind def % ================================ Objects ================================ % % We represent an unresolved object reference by a procedure of the form % {obj# gen# resolveR}. This is not a possible PDF object, because PDF has % no way to represent procedures. Since PDF in fact has no way to represent % any PostScript object that doesn't evaluate to itself, we can 'force' % a possibly indirect object painlessly with 'exec'. % Note that since we represent streams by executable dictionaries % (see below), we need both an xcheck and a type check to determine % whether an object has been resolved. /unresolved? % unresolved? { Objects exch get dup xcheck exch type /integertype eq and } bind def /oforce /exec load def /oget % oget % oget { 2 copy get dup xcheck { exec dup 4 1 roll put } { exch pop exch pop } ifelse } bind def % A null value in a dictionary is equivalent to an omitted key; % we must check for this specially. /knownoget { 2 copy known { oget dup null eq { pop false } { true } ifelse } { pop pop false } ifelse } bind def % PDF 1.1 defines a 'foreign file reference', but not its meaning. % Per the specification, we convert these to nulls. /F % F { % Some PDF 1.1 files use F as a synonym for f! count 3 lt { setfillcolor /f fs } { pop pop pop null } ifelse } bind def % We keep track of objects in a pair of arrays, Objects and Generations. % Generations[N] is the current generation number for object number N. % (As far as we can tell, this is needed only for error checking.) % If object N is loaded, Objects[N] is the actual object; % otherwise, Objects[N] is an executable integer giving the file offset % of the object's entry in the cross-reference table. /checkgeneration % checkgeneration { Generations 2 index get 1 index ne { (Wrong generation: ) print 1 index =only ( ) print dup = /R cvx /rangecheck signalerror } if pop } bind def /R % R { 1 index unresolved? { /resolveR cvx 3 packedarray cvx } { Objects 2 index get 3 1 roll checkgeneration pop } ifelse } bind def % If we encounter an object definition while reading sequentially, % we just store it away and keep going. /objopdict mark valueopdict { } forall /endobj dup cvx .dicttomark readonly def /obj % obj { PDFfile objopdict pdfrun } bind def /endobj % endobj { 3 1 roll % Read the xref entry if we haven't yet done so. % This is only needed for generation # checking. 1 index unresolved? { PDFfile fileposition 2 index readxrefentry pop PDFfile exch setfileposition } if checkgeneration Objects exch 2 index put } bind def % When resolving an object reference, we stop at the endobj. /resolveopdict mark valueopdict { } forall /endobj { endobj exit } bind .dicttomark readonly def /resolveR % resolveR { DEBUG { (Resolving: ) print 2 copy 2 array astore == } if 1 index unresolved? { PDFfile fileposition 3 1 roll 1 index readxrefentry 3 1 roll checkgeneration % Stack: savepos objpos obj# exch PDFfile exch setfileposition PDFfile token pop 2 copy ne { (xref error!\n) print /resolveR cvx /rangecheck signalerror } if pop PDFfile token pop PDFfile token pop /obj ne { (xref error!\n) print /resolveR cvx /rangecheck signalerror } if PDFfile resolveopdict pdfrun exch PDFfile exch setfileposition } { pop Objects exch get } ifelse } bind def %================================ Streams ================================ % % We represent a stream by an executable dictionary that contains, % in addition to the contents of the original stream dictionary: % /File - the file or string where the stream contents are stored; % /FilePosition - iff File is a file, the position in the file % where the contents start. % We do the real work of constructing the data stream only when the % contents are needed. % Construct a stream. The length is not reliable in the face of % different end-of-line conventions, but it's all we've got. % Since the stream keyword may be followed by 0, 1, or more blanks, % we have to back up in the file to find where the data actually starts. % The stream keyword may be followed by a blank line, which we also % must skip before reading any data. (This is chancy if the data are % in binary form, but such files are questionable to begin with.) /streamskipeols { { PDFsource read not { /stream cvx /syntaxerror signalerror } if dup 10 eq 1 index 13 eq or not { PDFsource exch unread exit } if pop } loop } bind def /stream { PDFsource PDFfile eq { dup /File PDFfile put prevline pop pop streamskipeols dup /FilePosition PDFfile fileposition put PDFfile fileposition 1 index /Length oget add PDFfile exch setfileposition } { % We're already reading from a stream, which we can't reposition. % Capture the sub-stream contents in a string. streamskipeols dup /Length oget string PDFsource exch readstring not { (Unexpected EOF in stream!\n) print /stream cvx /rangecheck signalerror } if 1 index exch /File exch put } ifelse PDFsource token pop /endstream ne { /stream cvx /syntaxerror signalerror } if cvx } bind def % Resolve a stream dictionary to a PostScript stream. % Streams with no filters require special handling: % - If we are going to interpret their contents, we let endstream % terminate the interpretation loop; % - If we are just going to read data from them, we impose % a SubFileDecode filter that reads just the requisite amount of data. % Note that, in general, resolving a stream repositions PDFfile. % Clients must save and restore the position of PDFfile themselves. /resolvestream % resolvestream { exch dup /FilePosition .knownget { 1 index /File get exch setfileposition } if % Stack: readdata? dict dup /DecodeParms .knownget not { null } if 1 index /Filter .knownget not { {} } if dup type /nametype eq { 1 array astore 1 index null ne { exch 1 array astore exch } if } if % Stack: readdata? dict parms filternames 2 index /File get exch % Stack: readdata? dict parms file/string filternames dup length 0 eq { % All the PDF filters have EOD markers, but in this case % there is no specified filter. pop exch pop % Stack: readdata? dict file/string 2 index { % We're going to read data; use a SubFileDecode filter. 1 index /Length oget () /SubFileDecode filter } { dup type /filetype ne { % Use a SubFileDecode filter to read from a string. 0 () SubFileDecode filter } if } ifelse } { 2 index null eq { { filter } } { % Stack: readdata? dict parms file/string filtername { 2 index 0 get dup null eq { pop } { exch } ifelse filterpdf exch dup length 1 sub 1 exch getinterval exch } } ifelse forall exch pop } ifelse % Stack: readdata? dict file exch pop exch pop } bind def /endstream { exit } def % Construct a PDF filter. These are the same as PostScript filters, % with one exception: LZWDecode filters with Predictor=2 must insert % a PixelDifferenceDecode filter in the pipeline. /filterpdf % <...params...> filterpdf { dup /LZWDecode eq 2 index type /dicttype eq and { 1 index /Predictor .knownget not { 1 } if 1 sub { { filter } { 1 index 4 1 roll filter exch /PixelDifferenceDecode filter } } exch get exec } { filter } ifelse } bind def end % pdfdict .setglobal