Commit 388990f7bcddec0583fd5a84f62d4aa0eba88d39

Authored by Jay Berkenbilt
1 parent a44b5a34

Rewrite fix-qdf in C++

ChangeLog
1 2020-01-14 Jay Berkenbilt <ejb@ql.org> 1 2020-01-14 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Rewrite fix-qdf in C++. This means fix-qdf is a proper
  4 + executable now, and there is no longer a runtime requirement on
  5 + perl.
  6 +
3 * Add QUtil::call_main_from_wmain, a helper function that can be 7 * Add QUtil::call_main_from_wmain, a helper function that can be
4 called in the body of wmain to convert UTF-16 arguments to UTF-8 8 called in the body of wmain to convert UTF-16 arguments to UTF-8
5 arguments and then call another main function. 9 arguments and then call another main function.
make/installwin.mk
@@ -14,7 +14,7 @@ installwin: all @@ -14,7 +14,7 @@ installwin: all
14 perl copy_dlls libqpdf/$(OUTPUT_DIR)/qpdf*.dll $(DEST)/bin $(OBJDUMP) $(WINDOWS_WORDSIZE) 14 perl copy_dlls libqpdf/$(OUTPUT_DIR)/qpdf*.dll $(DEST)/bin $(OBJDUMP) $(WINDOWS_WORDSIZE)
15 cp qpdf/$(OUTPUT_DIR)/qpdf.exe $(DEST)/bin 15 cp qpdf/$(OUTPUT_DIR)/qpdf.exe $(DEST)/bin
16 cp zlib-flate/$(OUTPUT_DIR)/zlib-flate.exe $(DEST)/bin 16 cp zlib-flate/$(OUTPUT_DIR)/zlib-flate.exe $(DEST)/bin
17 - cp qpdf/fix-qdf $(DEST)/bin 17 + cp qpdf/$(OUTPUT_DIR)/fix-qdf.exe $(DEST)/bin
18 cp include/qpdf/*.h $(DEST)/include/qpdf 18 cp include/qpdf/*.h $(DEST)/include/qpdf
19 cp include/qpdf/*.hh $(DEST)/include/qpdf 19 cp include/qpdf/*.hh $(DEST)/include/qpdf
20 cp doc/stylesheet.css $(DEST)/doc 20 cp doc/stylesheet.css $(DEST)/doc
make/libtool.mk
@@ -122,7 +122,9 @@ install: all @@ -122,7 +122,9 @@ install: all
122 $(LIBTOOL) --mode=install ./install-sh \ 122 $(LIBTOOL) --mode=install ./install-sh \
123 zlib-flate/$(OUTPUT_DIR)/zlib-flate \ 123 zlib-flate/$(OUTPUT_DIR)/zlib-flate \
124 $(DESTDIR)$(bindir)/zlib-flate 124 $(DESTDIR)$(bindir)/zlib-flate
125 - ./install-sh -m 0755 qpdf/fix-qdf $(DESTDIR)$(bindir) 125 + $(LIBTOOL) --mode=install ./install-sh \
  126 + qpdf/$(OUTPUT_DIR)/fix-qdf \
  127 + $(DESTDIR)$(bindir)/fix-qdf
126 ./install-sh -m 0644 include/qpdf/*.h $(DESTDIR)$(includedir)/qpdf 128 ./install-sh -m 0644 include/qpdf/*.h $(DESTDIR)$(includedir)/qpdf
127 ./install-sh -m 0644 include/qpdf/*.hh $(DESTDIR)$(includedir)/qpdf 129 ./install-sh -m 0644 include/qpdf/*.hh $(DESTDIR)$(includedir)/qpdf
128 ./install-sh -m 0644 doc/stylesheet.css $(DESTDIR)$(docdir) 130 ./install-sh -m 0644 doc/stylesheet.css $(DESTDIR)$(docdir)
manual/qpdf-manual.xml
@@ -150,7 +150,8 @@ @@ -150,7 +150,8 @@
150 <para> 150 <para>
151 perl version 5.8 or newer: 151 perl version 5.8 or newer:
152 <ulink url="http://www.perl.org/">http://www.perl.org/</ulink>; 152 <ulink url="http://www.perl.org/">http://www.perl.org/</ulink>;
153 - required for <command>fix-qdf</command> and the test suite. 153 + required for running the test suite. Starting with qpdf version
  154 + 9.1.1, perl is no longer required at runtime.
154 </para> 155 </para>
155 </listitem> 156 </listitem>
156 <listitem> 157 <listitem>
@@ -473,6 +474,13 @@ make @@ -473,6 +474,13 @@ make
473 <itemizedlist> 474 <itemizedlist>
474 <listitem> 475 <listitem>
475 <para> 476 <para>
  477 + Starting in qpdf version 9.1.1, qpdf no longer has a runtime
  478 + dependency on perl. This is because fix-qdf was rewritten in
  479 + C++. However, qpdf still has a build-time dependency on perl.
  480 + </para>
  481 + </listitem>
  482 + <listitem>
  483 + <para>
476 Make sure you are getting the intended behavior with regard to 484 Make sure you are getting the intended behavior with regard to
477 crypto providers. Read <xref linkend="ref.crypto.build"/> for 485 crypto providers. Read <xref linkend="ref.crypto.build"/> for
478 details. 486 details.
qpdf/build.mk
1 BINS_qpdf = \ 1 BINS_qpdf = \
2 qpdf \ 2 qpdf \
  3 + fix-qdf \
3 pdf_from_scratch \ 4 pdf_from_scratch \
4 test_driver \ 5 test_driver \
5 test_large_file \ 6 test_large_file \
@@ -24,10 +25,13 @@ TC_SRCS_qpdf = $(wildcard libqpdf/*.cc) $(wildcard qpdf/*.cc) @@ -24,10 +25,13 @@ TC_SRCS_qpdf = $(wildcard libqpdf/*.cc) $(wildcard qpdf/*.cc)
24 25
25 XCXXFLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_COMPILE) 26 XCXXFLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_COMPILE)
26 XLDFLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_LINK) 27 XLDFLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_LINK)
  28 +XLINK_FLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_XLINK_FLAGS)
27 XCXXFLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_COMPILE) 29 XCXXFLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_COMPILE)
28 XLDFLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_LINK) 30 XLDFLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_LINK)
29 -XLINK_FLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_XLINK_FLAGS)  
30 XLINK_FLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_XLINK_FLAGS) 31 XLINK_FLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_XLINK_FLAGS)
  32 +XCXXFLAGS_qpdf_fix-qdf := $(WINDOWS_WMAIN_COMPILE)
  33 +XLDFLAGS_qpdf_fix-qdf := $(WINDOWS_WMAIN_LINK)
  34 +XLINK_FLAGS_qpdf_fix-qdf := $(WINDOWS_WMAIN_XLINK_FLAGS)
31 35
32 $(foreach B,$(BINS_qpdf),$(eval \ 36 $(foreach B,$(BINS_qpdf),$(eval \
33 OBJS_$(B) = $(call src_to_obj,qpdf/$(B).cc))) 37 OBJS_$(B) = $(call src_to_obj,qpdf/$(B).cc)))
qpdf/fix-qdf deleted
1 -#!/usr/bin/env perl  
2 -  
3 -require 5.008_001;  
4 -use warnings;  
5 -use strict;  
6 -use File::Basename;  
7 -  
8 -my $whoami = basename($0);  
9 -my $dirname = dirname($0);  
10 -  
11 -if ((@ARGV == 1) && ($ARGV[0] eq '--version'))  
12 -{  
13 - exec "$dirname/qpdf", '--version';  
14 - exit 2;  
15 -}  
16 -  
17 -my $offset = 0;  
18 -my $last_offset = 0;  
19 -  
20 -my $file = shift(@ARGV);  
21 -if (defined $file)  
22 -{  
23 - open(F, "<$file") or die "$whoami: can't open $file: $!\n";  
24 -}  
25 -else  
26 -{  
27 - $file = 'stdin';  
28 - open(F, "<&STDIN") or die "$whoami: can't dup stdin: $!\n";  
29 -}  
30 -binmode F;  
31 -binmode STDOUT;  
32 -  
33 -my $line = get_line();  
34 -if (! ((defined $line) && ($line =~ m/^%PDF-1\.\d+\b/)))  
35 -{  
36 - die "$whoami: $file: not a pdf file\n";  
37 -}  
38 -print $line;  
39 -$line = get_line();  
40 -die "$whoami: $file: premature EOF\n" unless defined $line;  
41 -print $line;  
42 -$line = get_line();  
43 -if (! ((defined $line) && ($line =~ m/^%QDF-1.\d+\b/)))  
44 -{  
45 - die "$whoami: $file: not a qdf file\n";  
46 -}  
47 -print $line;  
48 -  
49 -my $last_obj = 0;  
50 -my @xref = ();  
51 -  
52 -my $stream_start = 0;  
53 -my $stream_length = 0;  
54 -my $xref_offset = 0;  
55 -my $xref_f1_nbytes = 0;  
56 -my $xref_f2_nbytes = 0;  
57 -my $xref_size = 0;  
58 -  
59 -my $cur_state = 0;  
60 -my $st_top = ++$cur_state;  
61 -my $st_in_obj = ++$cur_state;  
62 -my $st_in_stream = ++$cur_state;  
63 -my $st_after_stream = ++$cur_state;  
64 -my $st_in_ostream_dict = ++$cur_state;  
65 -my $st_in_ostream_offsets = ++$cur_state;  
66 -my $st_in_ostream_outer = ++$cur_state;  
67 -my $st_in_ostream_obj = ++$cur_state;  
68 -my $st_in_xref_stream_dict = ++$cur_state;  
69 -my $st_in_length = ++$cur_state;  
70 -my $st_at_xref = ++$cur_state;  
71 -my $st_before_trailer = ++$cur_state;  
72 -my $st_in_trailer = ++$cur_state;  
73 -my $st_done = ++$cur_state;  
74 -  
75 -my @ostream = ();  
76 -my @ostream_offsets = ();  
77 -my @ostream_discarded = ();  
78 -my $ostream_idx = 0;  
79 -my $ostream_id = 0;  
80 -my $ostream_extends = "";  
81 -  
82 -my $state = $st_top;  
83 -while (defined($line = get_line()))  
84 -{  
85 - if ($state == $st_top)  
86 - {  
87 - if ($line =~ m/^(\d+) 0 obj$/)  
88 - {  
89 - check_obj_id($1);  
90 - $state = $st_in_obj;  
91 - }  
92 - elsif ($line =~ m/^xref$/)  
93 - {  
94 - $xref_offset = $last_offset;  
95 - $state = $st_at_xref;  
96 - }  
97 - print $line;  
98 - }  
99 - elsif ($state == $st_in_obj)  
100 - {  
101 - print $line;  
102 - if ($line =~ m/^stream$/)  
103 - {  
104 - $state = $st_in_stream;  
105 - $stream_start = $offset;  
106 - }  
107 - elsif ($line =~ m/^endobj$/)  
108 - {  
109 - $state = $st_top;  
110 - }  
111 - elsif ($line =~ m,/Type /ObjStm,)  
112 - {  
113 - $state = $st_in_ostream_dict;  
114 - $ostream_id = $last_obj;  
115 - }  
116 - elsif ($line =~ m,/Type /XRef,)  
117 - {  
118 - $xref_offset = $xref[-1][1];  
119 - $xref_f1_nbytes = 0;  
120 - my $t = $xref_offset;  
121 - while ($t)  
122 - {  
123 - $t >>= 8;  
124 - ++$xref_f1_nbytes;  
125 - }  
126 - # Figure out how many bytes we need for ostream index.  
127 - # Make sure we get at least 1 byte even if there are no  
128 - # object streams.  
129 - my $max_objects = 1;  
130 - foreach my $e (@xref)  
131 - {  
132 - my ($type, $f1, $f2) = @$e;  
133 - if ((defined $f2) && ($f2 > $max_objects))  
134 - {  
135 - $max_objects = $f2;  
136 - }  
137 - }  
138 - while ($max_objects)  
139 - {  
140 - $max_objects >>=8;  
141 - ++$xref_f2_nbytes;  
142 - }  
143 - my $esize = 1 + $xref_f1_nbytes + $xref_f2_nbytes;  
144 - $xref_size = 1 + @xref;  
145 - my $length = $xref_size * $esize;  
146 - print " /Length $length\n";  
147 - print " /W [ 1 $xref_f1_nbytes $xref_f2_nbytes ]\n";  
148 - $state = $st_in_xref_stream_dict;  
149 - }  
150 - }  
151 - elsif ($state == $st_in_ostream_dict)  
152 - {  
153 - if ($line =~ m/^stream/)  
154 - {  
155 - $state = $st_in_ostream_offsets;  
156 - }  
157 - else  
158 - {  
159 - push(@ostream_discarded, $line);  
160 - if ($line =~ m,/Extends (\d+ 0 R),)  
161 - {  
162 - $ostream_extends = $1;  
163 - }  
164 - }  
165 - # discard line  
166 - }  
167 - elsif ($state == $st_in_ostream_offsets)  
168 - {  
169 - if ($line =~ m/^\%\% Object stream: object (\d+)/)  
170 - {  
171 - check_obj_id($1);  
172 - $stream_start = $last_offset;  
173 - $state = $st_in_ostream_outer;  
174 - push(@ostream, $line);  
175 - }  
176 - else  
177 - {  
178 - push(@ostream_discarded, $line);  
179 - }  
180 - # discard line  
181 - }  
182 - elsif ($state == $st_in_ostream_outer)  
183 - {  
184 - adjust_ostream_xref();  
185 - push(@ostream_offsets, $last_offset - $stream_start);  
186 - $state = $st_in_ostream_obj;  
187 - push(@ostream, $line);  
188 - }  
189 - elsif ($state == $st_in_ostream_obj)  
190 - {  
191 - push(@ostream, $line);  
192 - if ($line =~ m/^\%\% Object stream: object (\d+)/)  
193 - {  
194 - check_obj_id($1);  
195 - $state = $st_in_ostream_outer;  
196 - }  
197 - elsif ($line =~ m/^endstream/)  
198 - {  
199 - $stream_length = $last_offset - $stream_start;  
200 - write_ostream();  
201 - $state = $st_in_obj;  
202 - }  
203 - }  
204 - elsif ($state == $st_in_xref_stream_dict)  
205 - {  
206 - if ($line =~ m,/(Length|W) ,)  
207 - {  
208 - # already printed  
209 - }  
210 - elsif ($line =~ m,/Size ,)  
211 - {  
212 - my $size = 1 + @xref;  
213 - print " /Size $xref_size\n";  
214 - }  
215 - else  
216 - {  
217 - print $line;  
218 - }  
219 - if ($line =~ m/^stream\n/)  
220 - {  
221 - my $pack = "(C C$xref_f1_nbytes C$xref_f2_nbytes)";  
222 - print pack($pack, 0, 0, 0);  
223 - foreach my $x (@xref)  
224 - {  
225 - my ($type, $f1, $f2) = @$x;  
226 - $f2 = 0 unless defined $f2;  
227 - my @f1 = ();  
228 - my @f2 = ();  
229 - foreach my $d ([\@f1, $f1, $xref_f1_nbytes],  
230 - [\@f2, $f2, $xref_f2_nbytes])  
231 - {  
232 - my ($fa, $f, $nbytes) = @$d;  
233 - for (my $i = 0; $i < $nbytes; ++$i)  
234 - {  
235 - unshift(@$fa, $f & 0xff);  
236 - $f >>= 8;  
237 - }  
238 - }  
239 - print pack($pack, $type, @f1, @f2);  
240 - }  
241 - print "\nendstream\nendobj\n\n";  
242 - print "startxref\n$xref_offset\n\%\%EOF\n";  
243 - $state = $st_done;  
244 - }  
245 - }  
246 - elsif ($state == $st_in_stream)  
247 - {  
248 - if ($line =~ m/^endstream$/)  
249 - {  
250 - $stream_length = $last_offset - $stream_start;  
251 - $state = $st_after_stream;  
252 - }  
253 - print $line;  
254 - }  
255 - elsif ($state == $st_after_stream)  
256 - {  
257 - if ($line =~ m/^\%QDF: ignore_newline$/)  
258 - {  
259 - --$stream_length;  
260 - }  
261 - elsif ($line =~ m/^(\d+) 0 obj$/)  
262 - {  
263 - check_obj_id($1);  
264 - $state = $st_in_length;  
265 - }  
266 - print $line;  
267 - }  
268 - elsif ($state == $st_in_length)  
269 - {  
270 - if ($line !~ m/^\d+$/)  
271 - {  
272 - die "$file:$.: expected integer\n";  
273 - }  
274 - my $new = "$stream_length\n";  
275 - $offset -= length($line);  
276 - $offset += length($new);  
277 - print $new;  
278 - $state = $st_top;  
279 - }  
280 - elsif ($state == $st_at_xref)  
281 - {  
282 - my $n = scalar(@xref);  
283 - print "0 ", $n+1, "\n0000000000 65535 f \n";  
284 - for (@xref)  
285 - {  
286 - my ($type, $f1, $f2) = @$_;  
287 - printf("%010d 00000 n \n", $f1);  
288 - }  
289 - $state = $st_before_trailer;  
290 - }  
291 - elsif ($state == $st_before_trailer)  
292 - {  
293 - if ($line =~ m/^trailer <</)  
294 - {  
295 - print $line;  
296 - $state = $st_in_trailer;  
297 - }  
298 - # no output  
299 - }  
300 - elsif ($state == $st_in_trailer)  
301 - {  
302 - if ($line =~ m/^ \/Size \d+$/)  
303 - {  
304 - print " /Size ", scalar(@xref) + 1, "\n";  
305 - }  
306 - else  
307 - {  
308 - print $line;  
309 - }  
310 - if ($line =~ m/^>>$/)  
311 - {  
312 - print "startxref\n$xref_offset\n\%\%EOF\n";  
313 - $state = $st_done;  
314 - }  
315 - }  
316 - elsif ($state == $st_done)  
317 - {  
318 - # ignore  
319 - }  
320 -}  
321 -  
322 -die "$whoami: $file: premature EOF\n" unless $state == $st_done;  
323 -  
324 -sub get_line  
325 -{  
326 - my $line = scalar(<F>);  
327 - if (defined $line)  
328 - {  
329 - $last_offset = $offset;  
330 - $offset += length($line);  
331 - }  
332 - $line;  
333 -}  
334 -  
335 -sub check_obj_id  
336 -{  
337 - my $cur_obj = shift;  
338 - if ($cur_obj != $last_obj + 1)  
339 - {  
340 - die "$file:$.: expected object ", $last_obj + 1, "\n";  
341 - }  
342 - $last_obj = $cur_obj;  
343 - push(@xref, [1, $last_offset]);  
344 -}  
345 -  
346 -sub adjust_ostream_xref  
347 -{  
348 - pop(@xref);  
349 - push(@xref, [2, $ostream_id, $ostream_idx++]);  
350 -}  
351 -  
352 -sub write_ostream  
353 -{  
354 - my $first = $ostream_offsets[0];  
355 - my $onum = $ostream_id;  
356 - my $offsets = "";  
357 - my $n = scalar(@ostream_offsets);  
358 - for (@ostream_offsets)  
359 - {  
360 - $_ -= $first;  
361 - ++$onum;  
362 - $offsets .= "$onum $_\n";  
363 - }  
364 - my $offset_adjust = length($offsets);  
365 - $first += length($offsets);  
366 - $stream_length += length($offsets);  
367 - my $dict_data = "";  
368 - $dict_data .= " /Length $stream_length\n";  
369 - $dict_data .= " /N $n\n";  
370 - $dict_data .= " /First $first\n";  
371 - if ($ostream_extends)  
372 - {  
373 - $dict_data .= " /Extends $ostream_extends\n";  
374 - }  
375 - $dict_data .= ">>\n";  
376 - $offset_adjust += length($dict_data);  
377 - print $dict_data;  
378 - print "stream\n";  
379 - print $offsets;  
380 - foreach (@ostream)  
381 - {  
382 - print $_;  
383 - }  
384 -  
385 - for (@ostream_discarded)  
386 - {  
387 - $offset -= length($_);  
388 - }  
389 - $offset += $offset_adjust;  
390 -  
391 - $ostream_idx = 0;  
392 - $ostream_id = 0;  
393 - @ostream = ();  
394 - @ostream_offsets = ();  
395 - @ostream_discarded = ();  
396 - $ostream_extends = "";  
397 -}  
qpdf/fix-qdf.cc 0 โ†’ 100644
  1 +#include <qpdf/QUtil.hh>
  2 +#include <qpdf/QPDF.hh>
  3 +#include <qpdf/QPDFXRefEntry.hh>
  4 +#include <qpdf/QIntC.hh>
  5 +#include <cstdio>
  6 +#include <iostream>
  7 +#include <cstring>
  8 +#include <regex>
  9 +
  10 +static char const* whoami = 0;
  11 +
  12 +static void usage()
  13 +{
  14 + std::cerr << "Usage: " << whoami << " [filename]" << std::endl;
  15 + exit(2);
  16 +}
  17 +
  18 +class QdfFixer
  19 +{
  20 + public:
  21 + QdfFixer(std::string const& filename);
  22 + void processLines(std::list<std::string>& lines);
  23 + private:
  24 + void fatal(std::string const&);
  25 + void checkObjId(std::string const& obj_id);
  26 + void adjustOstreamXref();
  27 + void writeOstream();
  28 + void writeBinary(unsigned long long val, size_t bytes);
  29 +
  30 + std::string filename;
  31 + enum {
  32 + st_top,
  33 + st_in_obj,
  34 + st_in_stream,
  35 + st_after_stream,
  36 + st_in_ostream_dict,
  37 + st_in_ostream_offsets,
  38 + st_in_ostream_outer,
  39 + st_in_ostream_obj,
  40 + st_in_xref_stream_dict,
  41 + st_in_length,
  42 + st_at_xref,
  43 + st_before_trailer,
  44 + st_in_trailer,
  45 + st_done,
  46 + } state;
  47 +
  48 + size_t lineno;
  49 + qpdf_offset_t offset;
  50 + qpdf_offset_t last_offset;
  51 + int last_obj;
  52 + std::vector<QPDFXRefEntry> xref;
  53 + qpdf_offset_t stream_start;
  54 + size_t stream_length;
  55 + qpdf_offset_t xref_offset;
  56 + size_t xref_f1_nbytes;
  57 + size_t xref_f2_nbytes;
  58 + size_t xref_size;
  59 + std::vector<std::string> ostream;
  60 + std::vector<qpdf_offset_t> ostream_offsets;
  61 + std::vector<std::string> ostream_discarded;
  62 + size_t ostream_idx;
  63 + int ostream_id;
  64 + std::string ostream_extends;
  65 +};
  66 +
  67 +QdfFixer::QdfFixer(std::string const& filename) :
  68 + filename(filename),
  69 + state(st_top),
  70 + lineno(0),
  71 + offset(0),
  72 + last_offset(0),
  73 + last_obj(0),
  74 + stream_start(0),
  75 + stream_length(0),
  76 + xref_offset(0),
  77 + xref_f1_nbytes(0),
  78 + xref_f2_nbytes(0),
  79 + xref_size(0),
  80 + ostream_idx(0),
  81 + ostream_id(0)
  82 +{
  83 +}
  84 +
  85 +void
  86 +QdfFixer::fatal(std::string const& msg)
  87 +{
  88 + std::cerr << msg << std::endl;
  89 + exit(2);
  90 +}
  91 +
  92 +void
  93 +QdfFixer::processLines(std::list<std::string>& lines)
  94 +{
  95 + static std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
  96 + static std::regex re_xref("^xref\n$");
  97 + static std::regex re_stream("^stream\n$");
  98 + static std::regex re_endobj("^endobj\n$");
  99 + static std::regex re_type_objstm("/Type /ObjStm");
  100 + static std::regex re_type_xref("/Type /XRef");
  101 + static std::regex re_extends("/Extends (\\d+ 0 R)");
  102 + static std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
  103 + static std::regex re_endstream("^endstream\n$");
  104 + static std::regex re_length_or_w("/(Length|W) ");
  105 + static std::regex re_size("/Size ");
  106 + static std::regex re_ignore_newline("^%QDF: ignore_newline\n$");
  107 + static std::regex re_num("^\\d+\n$");
  108 + static std::regex re_trailer("^trailer <<");
  109 + static std::regex re_size_n("^ /Size \\d+\n$");
  110 + static std::regex re_dict_end("^>>\n$");
  111 +
  112 + lineno = 0;
  113 + for (auto line: lines)
  114 + {
  115 + ++lineno;
  116 + last_offset = offset;
  117 + offset += QIntC::to_offset(line.length());
  118 + std::smatch m;
  119 + auto matches = [&m, &line](std::regex& r){
  120 + return std::regex_search(line, m, r); };
  121 + if (state == st_top)
  122 + {
  123 + if (matches(re_n_0_obj))
  124 + {
  125 + checkObjId(m[1].str());
  126 + state = st_in_obj;
  127 + }
  128 + else if (matches(re_xref))
  129 + {
  130 + xref_offset = last_offset;
  131 + state = st_at_xref;
  132 + }
  133 + std::cout << line;
  134 + }
  135 + else if (state == st_in_obj)
  136 + {
  137 + std::cout << line;
  138 + if (matches(re_stream))
  139 + {
  140 + state = st_in_stream;
  141 + stream_start = offset;
  142 + }
  143 + else if (matches(re_endobj))
  144 + {
  145 + state = st_top;
  146 + }
  147 + else if (matches(re_type_objstm))
  148 + {
  149 + state = st_in_ostream_dict;
  150 + ostream_id = last_obj;
  151 + }
  152 + else if (matches(re_type_xref))
  153 + {
  154 + xref_offset = xref.back().getOffset();
  155 + xref_f1_nbytes = 0;
  156 + auto t = xref_offset;
  157 + while (t)
  158 + {
  159 + t >>= 8;
  160 + ++xref_f1_nbytes;
  161 + }
  162 + // Figure out how many bytes we need for ostream
  163 + // index. Make sure we get at least 1 byte even if
  164 + // there are no object streams.
  165 + int max_objects = 1;
  166 + for (auto e: xref)
  167 + {
  168 + if ((e.getType() == 2) &&
  169 + (e.getObjStreamIndex() > max_objects))
  170 + {
  171 + max_objects = e.getObjStreamIndex();
  172 + }
  173 + }
  174 + while (max_objects)
  175 + {
  176 + max_objects >>=8;
  177 + ++xref_f2_nbytes;
  178 + }
  179 + auto esize = 1 + xref_f1_nbytes + xref_f2_nbytes;
  180 + xref_size = 1 + xref.size();
  181 + auto length = xref_size * esize;
  182 + std::cout << " /Length " << length << "\n"
  183 + << " /W [ 1 " << xref_f1_nbytes << " "
  184 + << xref_f2_nbytes << " ]" << "\n";
  185 + state = st_in_xref_stream_dict;
  186 + }
  187 + }
  188 + else if (state == st_in_ostream_dict)
  189 + {
  190 + if (matches(re_stream))
  191 + {
  192 + state = st_in_ostream_offsets;
  193 + }
  194 + else
  195 + {
  196 + ostream_discarded.push_back(line);
  197 + if (matches(re_extends))
  198 + {
  199 + ostream_extends = m[1].str();
  200 + }
  201 + }
  202 + // discard line
  203 + }
  204 + else if (state == st_in_ostream_offsets)
  205 + {
  206 + if (matches(re_ostream_obj))
  207 + {
  208 + checkObjId(m[1].str());
  209 + stream_start = last_offset;
  210 + state = st_in_ostream_outer;
  211 + ostream.push_back(line);
  212 + }
  213 + else
  214 + {
  215 + ostream_discarded.push_back(line);
  216 + }
  217 + // discard line
  218 + }
  219 + else if (state == st_in_ostream_outer)
  220 + {
  221 + adjustOstreamXref();
  222 + ostream_offsets.push_back(last_offset - stream_start);
  223 + state = st_in_ostream_obj;
  224 + ostream.push_back(line);
  225 + }
  226 + else if (state == st_in_ostream_obj)
  227 + {
  228 + ostream.push_back(line);
  229 + if (matches(re_ostream_obj))
  230 + {
  231 + checkObjId(m[1].str());
  232 + state = st_in_ostream_outer;
  233 + }
  234 + else if (matches(re_endstream))
  235 + {
  236 + stream_length = QIntC::to_size(last_offset - stream_start);
  237 + writeOstream();
  238 + state = st_in_obj;
  239 + }
  240 + }
  241 + else if (state == st_in_xref_stream_dict)
  242 + {
  243 + if (matches(re_length_or_w))
  244 + {
  245 + // already printed
  246 + }
  247 + else if (matches(re_size))
  248 + {
  249 + auto xref_size = 1 + xref.size();
  250 + std::cout << " /Size " << xref_size << "\n";
  251 + }
  252 + else
  253 + {
  254 + std::cout << line;
  255 + }
  256 + if (matches(re_stream))
  257 + {
  258 + writeBinary(0, 1);
  259 + writeBinary(0, xref_f1_nbytes);
  260 + writeBinary(0, xref_f2_nbytes);
  261 + for (auto x: xref)
  262 + {
  263 + unsigned long long f1 = 0;
  264 + unsigned long long f2 = 0;
  265 + unsigned int type = QIntC::to_uint(x.getType());
  266 + if (1 == type)
  267 + {
  268 + f1 = QIntC::to_ulonglong(x.getOffset());
  269 + }
  270 + else
  271 + {
  272 + f1 = QIntC::to_ulonglong(x.getObjStreamNumber());
  273 + f2 = QIntC::to_ulonglong(x.getObjStreamIndex());
  274 + }
  275 + writeBinary(type, 1);
  276 + writeBinary(f1, xref_f1_nbytes);
  277 + writeBinary(f2, xref_f2_nbytes);
  278 + }
  279 + std::cout << "\nendstream\nendobj\n\n"
  280 + << "startxref\n" << xref_offset << "\n%%EOF\n";
  281 + state = st_done;
  282 + }
  283 + }
  284 + else if (state == st_in_stream)
  285 + {
  286 + if (matches(re_endstream))
  287 + {
  288 + stream_length = QIntC::to_size(last_offset - stream_start);
  289 + state = st_after_stream;
  290 + }
  291 + std::cout << line;
  292 + }
  293 + else if (state == st_after_stream)
  294 + {
  295 + if (matches(re_ignore_newline))
  296 + {
  297 + --stream_length;
  298 + }
  299 + else if (matches(re_n_0_obj))
  300 + {
  301 + checkObjId(m[1].str());
  302 + state = st_in_length;
  303 + }
  304 + std::cout << line;
  305 + }
  306 + else if (state == st_in_length)
  307 + {
  308 + if (! matches(re_num))
  309 + {
  310 + fatal(filename + ":" + QUtil::uint_to_string(lineno) +
  311 + ": expected integer");
  312 + }
  313 + std::string new_length =
  314 + QUtil::uint_to_string(stream_length) + "\n";
  315 + offset -= QIntC::to_offset(line.length());
  316 + offset += QIntC::to_offset(new_length.length());
  317 + std::cout << new_length;
  318 + state = st_top;
  319 + }
  320 + else if (state == st_at_xref)
  321 + {
  322 + auto n = xref.size();
  323 + std::cout << "0 " << 1 + n << "\n0000000000 65535 f \n";
  324 + for (auto e: xref)
  325 + {
  326 + std::cout << QUtil::int_to_string(e.getOffset(), 10)
  327 + << " 00000 n \n";
  328 + }
  329 + state = st_before_trailer;
  330 + }
  331 + else if (state == st_before_trailer)
  332 + {
  333 + if (matches(re_trailer))
  334 + {
  335 + std::cout << line;
  336 + state = st_in_trailer;
  337 + }
  338 + // no output
  339 + }
  340 + else if (state == st_in_trailer)
  341 + {
  342 + if (matches(re_size_n))
  343 + {
  344 + std::cout << " /Size " << 1 + xref.size() << "\n";
  345 + }
  346 + else
  347 + {
  348 + std::cout << line;
  349 + }
  350 + if (matches(re_dict_end))
  351 + {
  352 + std::cout << "startxref\n" << xref_offset<< "\n%%EOF\n";
  353 + state = st_done;
  354 + }
  355 + }
  356 + else if (state == st_done)
  357 + {
  358 + // ignore
  359 + }
  360 + }
  361 +}
  362 +
  363 +void
  364 +QdfFixer::checkObjId(std::string const& cur_obj_str)
  365 +{
  366 + int cur_obj = QUtil::string_to_int(cur_obj_str.c_str());
  367 + if (cur_obj != last_obj + 1)
  368 + {
  369 + fatal(filename + ":" + QUtil::uint_to_string(lineno) +
  370 + ": expected object " + QUtil::int_to_string(last_obj + 1));
  371 + }
  372 + last_obj = cur_obj;
  373 + xref.push_back(QPDFXRefEntry(1, QIntC::to_offset(last_offset), 0));
  374 +}
  375 +
  376 +void
  377 +QdfFixer::adjustOstreamXref()
  378 +{
  379 + xref.pop_back();
  380 + xref.push_back(QPDFXRefEntry(2, ostream_id, QIntC::to_int(ostream_idx++)));
  381 +}
  382 +
  383 +void
  384 +QdfFixer::writeOstream()
  385 +{
  386 + auto first = ostream_offsets.at(0);
  387 + auto onum = ostream_id;
  388 + std::string offsets;
  389 + auto n = ostream_offsets.size();
  390 + for (auto iter = ostream_offsets.begin();
  391 + iter != ostream_offsets.end(); ++iter)
  392 + {
  393 + (*iter) -= QIntC::to_offset(first);
  394 + ++onum;
  395 + offsets += QUtil::int_to_string(onum) + " " +
  396 + QUtil::int_to_string(*iter) + "\n";
  397 + }
  398 + auto offset_adjust = QIntC::to_offset(offsets.size());
  399 + first += offset_adjust;
  400 + stream_length += QIntC::to_size(offset_adjust);
  401 + std::string dict_data = "";
  402 + dict_data += " /Length " + QUtil::uint_to_string(stream_length) + "\n";
  403 + dict_data += " /N " + QUtil::uint_to_string(n) + "\n";
  404 + dict_data += " /First " + QUtil::int_to_string(first) + "\n";
  405 + if (! ostream_extends.empty())
  406 + {
  407 + dict_data += " /Extends " + ostream_extends + "\n";
  408 + }
  409 + dict_data += ">>\n";
  410 + offset_adjust += QIntC::to_offset(dict_data.length());
  411 + std::cout << dict_data
  412 + << "stream\n"
  413 + << offsets;
  414 + for (auto o: ostream)
  415 + {
  416 + std::cout << o;
  417 + }
  418 +
  419 + for (auto o: ostream_discarded)
  420 + {
  421 + offset -= QIntC::to_offset(o.length());
  422 + }
  423 + offset += offset_adjust;
  424 +
  425 + ostream_idx = 0;
  426 + ostream_id = 0;
  427 + ostream.clear();
  428 + ostream_offsets.clear();
  429 + ostream_discarded.clear();
  430 + ostream_extends.clear();
  431 +}
  432 +
  433 +void
  434 +QdfFixer::writeBinary(unsigned long long val, size_t bytes)
  435 +{
  436 + if (bytes > sizeof(unsigned long long))
  437 + {
  438 + throw std::logic_error(
  439 + "fix-qdf::writeBinary called with too many bytes");
  440 + }
  441 + std::string data;
  442 + data.reserve(bytes);
  443 + for (size_t i = 0; i < bytes; ++i)
  444 + {
  445 + data.append(1, '\0');
  446 + }
  447 + for (size_t i = 0; i < bytes; ++i)
  448 + {
  449 + data.at(bytes - i - 1) =
  450 + static_cast<char>(QIntC::to_uchar(val & 0xff));
  451 + val >>= 8;
  452 + }
  453 + std::cout << data;
  454 +}
  455 +
  456 +static int realmain(int argc, char* argv[])
  457 +{
  458 + whoami = QUtil::getWhoami(argv[0]);
  459 + QUtil::setLineBuf(stdout);
  460 + char const* filename = 0;
  461 + if (argc > 2)
  462 + {
  463 + usage();
  464 + }
  465 + else if ((argc > 1) && (strcmp(argv[1], "--version") == 0))
  466 + {
  467 + std::cout << whoami << " from qpdf version "
  468 + << QPDF::QPDFVersion() << std::endl;
  469 + return 0;
  470 + }
  471 + else if ((argc > 1) && (strcmp(argv[1], "--help") == 0))
  472 + {
  473 + usage();
  474 + }
  475 + else if (argc == 2)
  476 + {
  477 + filename = argv[1];
  478 + }
  479 + std::list<std::string> lines;
  480 + if (filename == 0)
  481 + {
  482 + filename = "standard input";
  483 + QUtil::binary_stdin();
  484 + lines = QUtil::read_lines_from_file(stdin, true);
  485 + }
  486 + else
  487 + {
  488 + lines = QUtil::read_lines_from_file(filename, true);
  489 + }
  490 + QUtil::binary_stdout();
  491 + QdfFixer qf(filename);
  492 + qf.processLines(lines);
  493 + return 0;
  494 +}
  495 +
  496 +#ifdef WINDOWS_WMAIN
  497 +
  498 +extern "C"
  499 +int wmain(int argc, wchar_t* argv[])
  500 +{
  501 + return QUtil::call_main_from_wmain(argc, argv, realmain);
  502 +}
  503 +
  504 +#else
  505 +
  506 +int main(int argc, char* argv[])
  507 +{
  508 + return realmain(argc, argv);
  509 +}
  510 +
  511 +#endif