% Copyright (C) 1991, 1995 Aladdin Enterprises. All rights reserved. % % This file is part of GNU Ghostscript. % % GNU Ghostscript is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY. No author or distributor accepts responsibility to % anyone for the consequences of using it or for whether it serves any % particular purpose or works at all, unless he says so in writing. Refer % to the GNU Ghostscript General Public License for full details. % % Extract the ASCII text from a PostScript file. Nothing is displayed. % Instead, ASCII information is written to stdout. The idea is similar to % Glenn Reid's `distillery', only a lot more simple-minded, and less robust. % If SIMPLE is defined, just the text is written, with a guess at line % breaks and word spacing. If SIMPLE is not defined, lines are written % to stdout as follows: % % C % Indicate the current color. (NOT IMPLEMENTED YET) % % F () % Indicate font height and width of a space. % % P % Indicate the end of the page. % % R % Display a rectangle. (NOT IMPLEMENTED YET) % % S () % Display a string. % % , , and are RGB values expressed as integers between 0 and 1000. % and are integer dimensions in units of 1/720". % and are integer coordinates, in units of 1/720", with the origin % at the lower left. % and are strings represented with the standard % PostScript escape conventions. % Note that future versions of this program may add other output elements, % so programs parsing the output should be prepared to ignore elements % that they do not recognize. % Note that this code will only work in all cases if systemdict is writable % and if `binding' the definitions of operators defined as procedures % is deferred. For this reason, it is normally invoked with % gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT ps2ascii.ps % Thanks to J Greely for improvements % to this code, and to Jerry Whelan for % motivating other improvements. /QUIET true def systemdict wcheck { systemdict } { userdict } ifelse begin /.max where { pop } { /.max { 2 copy lt { exch } if pop } bind def } ifelse /SIMPLE dup where { pop true } { false } ifelse def /setglobal where { pop currentglobal /setglobal load true setglobal } { { } } ifelse % Define a way to store and retrieve integers that survives save/restore. /.i.string0 (0 ) def /.i.string .i.string0 length string def /.iget { cvi } bind def /.iput { exch //.i.string exch copy cvs pop } bind def /.inew { //.i.string0 dup length string copy } bind def % Disable the display operators. /eofill { newpath } odef /erasepage { } odef /fill { newpath } odef /stroke { newpath } odef % The image operators must read the input, but do nothing. /colorimage where { pop /colorimage { gsave nulldevice //colorimage grestore } odef } if /image { gsave nulldevice //image grestore } odef /imagemask { gsave nulldevice //imagemask grestore } odef % Redefine the end-of-page operators. /copypage { SIMPLE { (\014) } { (P\n) } ifelse //print } odef /showpage { copypage erasepage initgraphics } odef % Output the current color if necessary. /.color.r .inew def .color.r -1 .iput % make sure we write the color at the beginning /.color.g .inew def /.color.b .inew def /.showcolor { SIMPLE not { currentrgbcolor 1000 mul round cvi 3 1 roll 1000 mul round cvi exch 1000 mul round cvi % Stack: b g r dup //.color.r .iget eq 2 index //.color.g .iget eq and 3 index //.color.b .iget eq and { pop pop pop } { (C ) //print dup //.color.r exch .iput .show==only ( ) //print dup //.color.g exch .iput .show==only ( ) //print dup //.color.b exch .iput .show==only (\n) //print } ifelse } if } bind def % Some programs that interpret the output of ps2ascii aren't prepared to % handle the C command, so ****** WE DISABLE IT ******. /.showcolor { } def % Redefine `show'. % Set things up so our output will be in tenths of a point, with origin at % lower left. This isolates us from the peculiarities of individual devices. /.show.ident.matrix matrix def /.show.ident % { //.show.ident.matrix defaultmatrix % % Assume the original transformation is well-behaved. % 0.1 0 2 index dtransform abs exch abs .max /.show.scale exch def % 0.1 dup 3 -1 roll scale { gsave initmatrix % Assume the original transformation is well-behaved. 0.1 0 dtransform abs exch abs .max /.show.scale exch def 0.1 dup scale .show.ident.matrix currentmatrix grestore } bind def /.coord { transform .show.ident itransform exch round cvi exch round cvi } odef /.dcoord { % Transforming distances is trickier, because % the coordinate system might be rotated. .show.ident pop exch 0 dtransform % dup 0 ne { dup mul exch dup mul add sqrt } { pop abs } ifelse dup mul exch dup mul add sqrt .show.scale div round cvi exch 0 exch dtransform % exch dup 0 ne { dup mul exch dup mul add sqrt } { pop abs } ifelse dup mul exch dup mul add sqrt .show.scale div round cvi } odef % Remember the current X, Y, and height. /.show.x .inew def /.show.y .inew def /.show.height .inew def % Remember the last character of the previous string; if it was a % hyphen preceded by a letter, we didn't output the hyphen. /.show.last (\000) def % Remember the current font. /.font.name 130 string def /.font.name.length .inew def /.font.height .inew def /.font.width .inew def % We have to redirect stdout somehow.... /.show.stdout { (%stdout) (w) file } bind def % Make sure writing will work even if a program uses =string. /.show.string =string length string def /.show.=string =string length string def /.show==only { //=string //.show.=string copy pop dup type /stringtype eq { dup length //.show.string length le { dup rcheck { //.show.string copy } if } if } if .show.stdout exch write==only //.show.=string //=string copy pop } odef /.showfont % .showfont { gsave % Try getting the height and width of the font from the FontBBox. currentfont /FontBBox .knownget not { {0 0 0 0} } if aload pop exch 4 -1 roll sub 3 1 roll exch sub 2 copy .max 0 ne { currentfont /FontMatrix get dtransform } { pop pop % Fonts produced by dvips, among other applications, have % BuildChar procedures that bomb out when given unexpected % characters, and there is no way to determine whether a given % character will do this. So for Type 1 fonts, we measure a % typical character ('X'); for others, we punt. currentfont /FontType get 1 eq { (X) stringwidth pop dup 1.3 mul } { % No safe way to get the character size. Punt. 0 0 } ifelse } ifelse .dcoord exch currentfont /FontName .knownget not { () } if dup type /stringtype ne { //.show.string cvs } if grestore % Stack: height width fontname SIMPLE { pop pop //.show.height exch .iput } { 2 index //.font.height .iget eq 2 index //.font.width .iget eq and 1 index //.font.name 0 //.font.name.length .iget getinterval eq and { pop pop pop } { (F ) //print 3 -1 roll dup //.font.height exch .iput .show==only ( ) //print exch dup //.font.width exch .iput .show==only ( ) //print dup length //.font.name.length exch .iput //.font.name cvs .show==only (\n) //print } ifelse } ifelse } odef % Define the letters -- characters which, if they occur followed by a hyphen % at the end of a line, cause the hyphen and line break to be ignored. /.letter.chars 100 dict def mark 65 1 90 { dup 32 add } for counttomark { StandardEncoding exch get .letter.chars exch dup put } repeat pop % Define a set of characters which, if they occur at the start of a line, % are taken as indicating a paragraph break. /.break.chars 50 dict def mark /bullet /dagger /daggerdbl /periodcentered /section counttomark { .break.chars exch dup put } repeat pop % Define character translation to ASCII. % We have to do this for the entire character set. /.char.map 500 dict def /.chars.def { counttomark 2 idiv { .char.map 3 1 roll put } repeat pop } def % Encode the printable ASCII characters. mark 32 1 126 { 1 string dup 0 4 -1 roll put dup 0 get StandardEncoding exch get exch } for .chars.def % Encode accents. mark /acute (') /caron (^) /cedilla (,) /circumflex (^) /dieresis (") /grave (`) /ring (*) /tilde (~) .chars.def % Encode the ISO accented characters. mark 192 1 255 { ISOLatin1Encoding exch get =string cvs dup 0 1 getinterval 1 index dup length 1 sub 1 exch getinterval .char.map 2 index known .char.map 2 index known and { .char.map 3 -1 roll get .char.map 3 -1 roll get concatstrings .char.map 3 1 roll put } { pop pop pop } ifelse } for .chars.def % Encode the remaining standard and ISO alphabetic characters. mark /AE (AE) /Eth (DH) /OE (OE) /Thorn (Th) /ae (ae) /eth (dh) /ffi (ffi) /ffl (ffl) /fi (fi) /fl (fl) /germandbls (ss) /oe (oe) /thorn (th) .chars.def % Encode the other standard and ISO characters. mark /brokenbar (|) /bullet (*) /copyright ((C)) /currency (#) /dagger (#) /daggerdbl (##) /degree (o) /divide (/) /dotaccent (.) /dotlessi (i) /ellipsis (...) /emdash (--) /endash (-) /exclamdown (!) /florin (f) /fraction (/) /guillemotleft (<<) /guillemotright (>>) /guilsingleft (<) /guilsingright (>) /hungarumlaut ("") /logicalnot (~) /macron (_) /minus (-) /mu (u) /multiply (*) /ogonek (,) /onehalf (1/2) /onequarter (1/4) /onesuperior (1) /ordfeminine (-a) /ordmasculine (-o) /paragraph (||) /periodcentered (*) /perthousand (o/oo) /plusminus (+-) /questiondown (?) /quotedblbase (") /quotedblleft (") /quotedblright (") /quotesinglbase (,) /quotesingle (') /registered ((R)) /section ($) /sterling (#) /threequarters (3/4) /threesuperior (3) /trademark ((TM)) /twosuperior (2) /yen (Y) .chars.def % Encode a few common Symbol characters. mark /asteriskmath (*) /copyrightsans ((C)) /copyrightserif ((C)) /greaterequal (>=) /lessequal (<=) /registersans ((R)) /registerserif ((R)) /trademarksans ((TM)) /trademarkserif ((TM)) .chars.def % Write out a string. If it ends in a letter and a hyphen, % don't write the hyphen, and set .show.last to a hyphen; % otherwise, set .show.last to the character (or \000 if it was a hyphen). /.show.write % { dup length 1 ge { dup dup length 1 sub get dup 45 eq % hyphen { 1 index length 1 gt { 1 index dup length 2 sub get } { //.show.last 0 get } ifelse currentfont /Encoding get exch get //.letter.chars exch known { % Remove the hyphen exch dup length 1 sub 0 exch getinterval exch } { pop 0 } ifelse } if //.show.last 0 3 -1 roll put } if { dup currentfont /Encoding get exch get dup //.char.map exch .knownget { .show.stdout exch writestring pop pop } { currentfont /Encoding dup StandardEncoding eq exch ISOLatin1Encoding eq or { % Untranslated character in standard encoding pop .show.stdout exch write } { % Character in non-standard encoding, substitute pop pop .show.stdout (*) writestring } ifelse } ifelse } forall } odef /.showstring1 { currentpoint .coord 3 -1 roll dup stringwidth 1 index 0 rmoveto .dcoord pop % Stack: x y string width SIMPLE { 2 index //.show.y .iget ne { % Try to figure out whether we have a line break % or a paragraph break. 2 index //.show.y .iget sub abs //.show.height .iget 1.3 mul ge 2 index length 0 gt { 2 index 0 get currentfont /Encoding get exch get //.break.chars exch known { pop true } if } if { (\n\n) % Paragraph } { ( ) % Line } ifelse //print //.show.y 3 index .iput //.show.x 4 index .iput } { % If the word processor split a hyphenated word within % the same line, put out the hyphen now. //.show.last 0 get 45 eq { (-) //print } if } ifelse 3 index //.show.x .iget 10 add gt { ( ) //print } if 4 1 roll .show.write pop add //.show.x exch .iput } { (S ) //print 4 -1 roll .show==only ( ) //print 3 -1 roll .show==only ( ) //print exch .show==only ( ) //print .show==only (\n) //print } ifelse } odef /.showstring { dup () eq { pop } { .showstring1 } ifelse } bind def % Redefine all the string display operators. /show { .showfont .showcolor .showstring } odef % We define all the other operators in terms of .show1. /.show1.string ( ) def /.show1 { //.show1.string exch 0 exch put //.show1.string .showstring } odef /ashow { .showfont .showcolor { .show1 2 copy rmoveto } forall pop pop } odef /awidthshow { .showfont .showcolor { dup .show1 4 index eq { 4 index 4 index rmoveto } if 2 copy rmoveto } forall pop pop pop pop pop } odef /widthshow { .showfont .showcolor //.show1.string 0 4 -1 roll put { //.show1.string search not { exit } if .showstring .showstring 2 index 2 index rmoveto } loop .showstring pop pop } odef /kshow { .showfont .showcolor { .show1 dup exec } forall pop } odef % We don't really do the right thing with the Level 2 show operators, % but we do something semi-reasonable. /xshow where { pop /xshow { pop show } bind odef } if /yshow where { pop /yshow { pop show } bind odef } if /xyshow where { pop /xyshow { pop show } bind odef } if /glyphshow where { pop /glyphshow { currentfont /Encoding .knownget not { {} } if 0 1 2 index length 1 sub { % Stack: glyph encoding index 2 copy get 3 index eq { exch pop exch pop null exit } if } for null eq { (X) dup 0 4 -1 roll put show } { pop } ifelse } bind odef } if % Redirect the printing operators. /.stdout (_temp_.out) (w) file def /.stderr (_temp_.err) (w) file def /print { //.stdout exch writestring } odef /=only { //.stdout exch write=only } odef /==only { //.stdout exch write==only } odef end % Bind the operators we just defined, and all the others if we didn't % do it before. Also reenable 'bind' for future files. .bindoperators NOBIND currentdict systemdict ne and { systemdict begin .bindoperators end } if NOBIND { /bind /.bind load def } if % Make systemdict read-only if it wasn't already. systemdict wcheck { systemdict readonly pop } if % Restore the current local/global VM mode. exec