Commit 0ddc4abd03f4f4ea25bcf7d51d171be78d22ab62

Authored by Jay Berkenbilt
Committed by GitHub
2 parents a5cab082 a2fc5b52

Merge pull request #1345 from jberkenbilt/fix-qdf-output-file

Fix qdf output file
ChangeLog
  1 +2025-02-02 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Have fix-qdf accept a second argument, interpreted as the output
  4 + file. Fixes #1330.
  5 +
1 2024-02-01 M Holger <m.holger@qpdf.org> 6 2024-02-01 M Holger <m.holger@qpdf.org>
2 7
3 * Bug fix: in qpdf CLI / QPDFJob throw a QPDFUsage exception if a 8 * Bug fix: in qpdf CLI / QPDFJob throw a QPDFUsage exception if a
manual/fix-qdf.1.in
@@ -3,9 +3,11 @@ @@ -3,9 +3,11 @@
3 fix-qdf \- repair PDF files in QDF form after editing 3 fix-qdf \- repair PDF files in QDF form after editing
4 .SH SYNOPSIS 4 .SH SYNOPSIS
5 .B fix-qdf 5 .B fix-qdf
6 -< \fIinfilename\fR > \fIoutfilename\fR 6 +[\fIinfilename\fR [\fIoutfilename\fR]]
7 .SH DESCRIPTION 7 .SH DESCRIPTION
8 -The fix-qdf program is part of the qpdf package. 8 +The fix-qdf program is part of the qpdf package. With no arguments,
  9 +fix-qdf reads from standard input and writes to standard output. With
  10 +one argument, it reads from that file and writes to standard output.
9 .PP 11 .PP
10 The fix-qdf program reads a PDF file in QDF form and writes out 12 The fix-qdf program reads a PDF file in QDF form and writes out
11 the same file with stream lengths, cross-reference table entries, and 13 the same file with stream lengths, cross-reference table entries, and
manual/qdf.rst
@@ -23,10 +23,18 @@ files are full of offset and length information that makes it hard to @@ -23,10 +23,18 @@ files are full of offset and length information that makes it hard to
23 add or remove data. A QDF file is organized in a manner such that, if 23 add or remove data. A QDF file is organized in a manner such that, if
24 edits are kept within certain constraints, the 24 edits are kept within certain constraints, the
25 :command:`fix-qdf` program, distributed with qpdf, is 25 :command:`fix-qdf` program, distributed with qpdf, is
26 -able to restore edited files to a correct state. The  
27 -:command:`fix-qdf` program takes no command-line  
28 -arguments. It reads a possibly edited QDF file from standard input and  
29 -writes a repaired file to standard output. 26 +able to restore edited files to a correct state.
  27 +
  28 +.. code-block:: bash
  29 +
  30 + fix-qdf [infilename [outfilename]]
  31 +
  32 +With no arguments, :command:`fix-qdf` reads the possibly-edited QDF
  33 +file from standard input and writes a repaired file to standard
  34 +output. You can also specify the input and output files as
  35 +command-line arguments. With one argument, the argument is taken as an
  36 +input file. With two arguments, the first argument is an input file,
  37 +and the second is an output file.
30 38
31 For another way to work with PDF files in an editor, see :ref:`json`. 39 For another way to work with PDF files in an editor, see :ref:`json`.
32 Using qpdf JSON format allows you to edit the PDF file semantically 40 Using qpdf JSON format allows you to edit the PDF file semantically
manual/release-notes.rst
@@ -38,6 +38,14 @@ Planned changes for future 12.x (subject to change): @@ -38,6 +38,14 @@ Planned changes for future 12.x (subject to change):
38 38
39 .. x.y.z: not yet released 39 .. x.y.z: not yet released
40 40
  41 +11.10.0: not yet released
  42 + - CLI Enhancements
  43 +
  44 + - The :command:`fix-qdf` command now allows an output file to be
  45 + specified as an optional second argument. This is useful for
  46 + environments in which writing a binary file to standard output
  47 + doesn't work (such as PowerShell 5).
  48 +
41 11.9.1: June 7, 2024 49 11.9.1: June 7, 2024
42 - Bug Fixes 50 - Bug Fixes
43 51
qpdf/fix-qdf.cc
@@ -4,23 +4,27 @@ @@ -4,23 +4,27 @@
4 #include <qpdf/QUtil.hh> 4 #include <qpdf/QUtil.hh>
5 #include <cstdio> 5 #include <cstdio>
6 #include <cstring> 6 #include <cstring>
  7 +#include <fstream>
7 #include <iostream> 8 #include <iostream>
8 #include <regex> 9 #include <regex>
9 #include <string_view> 10 #include <string_view>
10 11
  12 +using namespace std::literals;
11 static char const* whoami = nullptr; 13 static char const* whoami = nullptr;
12 14
13 static void 15 static void
14 usage() 16 usage()
15 { 17 {
16 - std::cerr << "Usage: " << whoami << " [filename]" << std::endl;  
17 - exit(2); 18 + std::cerr << "Usage: " << whoami << " [infilename [outfilename]]" << std::endl
  19 + << "infilename defaults to standard output" << std::endl
  20 + << "outfilename defaults to standard output" << std::endl;
18 } 21 }
19 22
20 class QdfFixer 23 class QdfFixer
21 { 24 {
22 public: 25 public:
23 - QdfFixer(std::string const& filename); 26 + QdfFixer(std::string const& filename, std::ostream& out);
  27 + ~QdfFixer() = default;
24 void processLines(std::string const& input); 28 void processLines(std::string const& input);
25 29
26 private: 30 private:
@@ -31,6 +35,7 @@ class QdfFixer @@ -31,6 +35,7 @@ class QdfFixer
31 void writeBinary(unsigned long long val, size_t bytes); 35 void writeBinary(unsigned long long val, size_t bytes);
32 36
33 std::string filename; 37 std::string filename;
  38 + std::ostream& out;
34 enum { 39 enum {
35 st_top, 40 st_top,
36 st_in_obj, 41 st_in_obj,
@@ -67,8 +72,9 @@ class QdfFixer @@ -67,8 +72,9 @@ class QdfFixer
67 std::string ostream_extends; 72 std::string ostream_extends;
68 }; 73 };
69 74
70 -QdfFixer::QdfFixer(std::string const& filename) :  
71 - filename(filename) 75 +QdfFixer::QdfFixer(std::string const& filename, std::ostream& out) :
  76 + filename(filename),
  77 + out(out)
72 { 78 {
73 } 79 }
74 80
@@ -131,9 +137,9 @@ QdfFixer::processLines(std::string const&amp; input) @@ -131,9 +137,9 @@ QdfFixer::processLines(std::string const&amp; input)
131 xref_offset = last_offset; 137 xref_offset = last_offset;
132 state = st_at_xref; 138 state = st_at_xref;
133 } 139 }
134 - std::cout << line; 140 + out << line;
135 } else if (state == st_in_obj) { 141 } else if (state == st_in_obj) {
136 - std::cout << line; 142 + out << line;
137 if (line.compare("stream\n"sv) == 0) { 143 if (line.compare("stream\n"sv) == 0) {
138 state = st_in_stream; 144 state = st_in_stream;
139 stream_start = offset; 145 stream_start = offset;
@@ -166,8 +172,8 @@ QdfFixer::processLines(std::string const&amp; input) @@ -166,8 +172,8 @@ QdfFixer::processLines(std::string const&amp; input)
166 auto esize = 1 + xref_f1_nbytes + xref_f2_nbytes; 172 auto esize = 1 + xref_f1_nbytes + xref_f2_nbytes;
167 xref_size = 1 + xref.size(); 173 xref_size = 1 + xref.size();
168 auto length = xref_size * esize; 174 auto length = xref_size * esize;
169 - std::cout << " /Length " << length << "\n"  
170 - << " /W [ 1 " << xref_f1_nbytes << " " << xref_f2_nbytes << " ]\n"; 175 + out << " /Length " << length << "\n"
  176 + << " /W [ 1 " << xref_f1_nbytes << " " << xref_f2_nbytes << " ]\n";
171 state = st_in_xref_stream_dict; 177 state = st_in_xref_stream_dict;
172 } 178 }
173 } else if (state == st_in_ostream_dict) { 179 } else if (state == st_in_ostream_dict) {
@@ -209,10 +215,10 @@ QdfFixer::processLines(std::string const&amp; input) @@ -209,10 +215,10 @@ QdfFixer::processLines(std::string const&amp; input)
209 if ((line.find("/Length"sv) != line.npos) || (line.find("/W"sv) != line.npos)) { 215 if ((line.find("/Length"sv) != line.npos) || (line.find("/W"sv) != line.npos)) {
210 // already printed 216 // already printed
211 } else if (line.find("/Size"sv) != line.npos) { 217 } else if (line.find("/Size"sv) != line.npos) {
212 - auto xref_size = 1 + xref.size();  
213 - std::cout << " /Size " << xref_size << "\n"; 218 + auto size = 1 + xref.size();
  219 + out << " /Size " << size << "\n";
214 } else { 220 } else {
215 - std::cout << line; 221 + out << line;
216 } 222 }
217 if (line.compare("stream\n"sv) == 0) { 223 if (line.compare("stream\n"sv) == 0) {
218 writeBinary(0, 1); 224 writeBinary(0, 1);
@@ -232,9 +238,9 @@ QdfFixer::processLines(std::string const&amp; input) @@ -232,9 +238,9 @@ QdfFixer::processLines(std::string const&amp; input)
232 writeBinary(f1, xref_f1_nbytes); 238 writeBinary(f1, xref_f1_nbytes);
233 writeBinary(f2, xref_f2_nbytes); 239 writeBinary(f2, xref_f2_nbytes);
234 } 240 }
235 - std::cout << "\nendstream\nendobj\n\n"  
236 - << "startxref\n"  
237 - << xref_offset << "\n%%EOF\n"; 241 + out << "\nendstream\nendobj\n\n"
  242 + << "startxref\n"
  243 + << xref_offset << "\n%%EOF\n";
238 state = st_done; 244 state = st_done;
239 } 245 }
240 } else if (state == st_in_stream) { 246 } else if (state == st_in_stream) {
@@ -242,7 +248,7 @@ QdfFixer::processLines(std::string const&amp; input) @@ -242,7 +248,7 @@ QdfFixer::processLines(std::string const&amp; input)
242 stream_length = QIntC::to_size(last_offset - stream_start); 248 stream_length = QIntC::to_size(last_offset - stream_start);
243 state = st_after_stream; 249 state = st_after_stream;
244 } 250 }
245 - std::cout << line; 251 + out << line;
246 } else if (state == st_after_stream) { 252 } else if (state == st_after_stream) {
247 if (line.compare("%QDF: ignore_newline\n"sv) == 0) { 253 if (line.compare("%QDF: ignore_newline\n"sv) == 0) {
248 if (stream_length > 0) { 254 if (stream_length > 0) {
@@ -252,7 +258,7 @@ QdfFixer::processLines(std::string const&amp; input) @@ -252,7 +258,7 @@ QdfFixer::processLines(std::string const&amp; input)
252 checkObjId(m[1].str()); 258 checkObjId(m[1].str());
253 state = st_in_length; 259 state = st_in_length;
254 } 260 }
255 - std::cout << line; 261 + out << line;
256 } else if (state == st_in_length) { 262 } else if (state == st_in_length) {
257 if (!matches(re_num)) { 263 if (!matches(re_num)) {
258 fatal(filename + ":" + std::to_string(lineno) + ": expected integer"); 264 fatal(filename + ":" + std::to_string(lineno) + ": expected integer");
@@ -260,29 +266,29 @@ QdfFixer::processLines(std::string const&amp; input) @@ -260,29 +266,29 @@ QdfFixer::processLines(std::string const&amp; input)
260 std::string new_length = std::to_string(stream_length) + "\n"; 266 std::string new_length = std::to_string(stream_length) + "\n";
261 offset -= QIntC::to_offset(line.length()); 267 offset -= QIntC::to_offset(line.length());
262 offset += QIntC::to_offset(new_length.length()); 268 offset += QIntC::to_offset(new_length.length());
263 - std::cout << new_length; 269 + out << new_length;
264 state = st_top; 270 state = st_top;
265 } else if (state == st_at_xref) { 271 } else if (state == st_at_xref) {
266 auto n = xref.size(); 272 auto n = xref.size();
267 - std::cout << "0 " << 1 + n << "\n0000000000 65535 f \n"; 273 + out << "0 " << 1 + n << "\n0000000000 65535 f \n";
268 for (auto const& e: xref) { 274 for (auto const& e: xref) {
269 - std::cout << QUtil::int_to_string(e.getOffset(), 10) << " 00000 n \n"; 275 + out << QUtil::int_to_string(e.getOffset(), 10) << " 00000 n \n";
270 } 276 }
271 state = st_before_trailer; 277 state = st_before_trailer;
272 } else if (state == st_before_trailer) { 278 } else if (state == st_before_trailer) {
273 if (line.compare("trailer <<\n"sv) == 0) { 279 if (line.compare("trailer <<\n"sv) == 0) {
274 - std::cout << line; 280 + out << line;
275 state = st_in_trailer; 281 state = st_in_trailer;
276 } 282 }
277 // no output 283 // no output
278 } else if (state == st_in_trailer) { 284 } else if (state == st_in_trailer) {
279 if (matches(re_size_n)) { 285 if (matches(re_size_n)) {
280 - std::cout << " /Size " << 1 + xref.size() << "\n"; 286 + out << " /Size " << 1 + xref.size() << "\n";
281 } else { 287 } else {
282 - std::cout << line; 288 + out << line;
283 } 289 }
284 if (line.compare(">>\n"sv) == 0) { 290 if (line.compare(">>\n"sv) == 0) {
285 - std::cout << "startxref\n" << xref_offset << "\n%%EOF\n"; 291 + out << "startxref\n" << xref_offset << "\n%%EOF\n";
286 state = st_done; 292 state = st_done;
287 } 293 }
288 } else if (state == st_done) { 294 } else if (state == st_done) {
@@ -332,9 +338,9 @@ QdfFixer::writeOstream() @@ -332,9 +338,9 @@ QdfFixer::writeOstream()
332 } 338 }
333 dict_data += ">>\n"; 339 dict_data += ">>\n";
334 offset_adjust += QIntC::to_offset(dict_data.length()); 340 offset_adjust += QIntC::to_offset(dict_data.length());
335 - std::cout << dict_data << "stream\n" << offsets; 341 + out << dict_data << "stream\n" << offsets;
336 for (auto const& o: ostream) { 342 for (auto const& o: ostream) {
337 - std::cout << o; 343 + out << o;
338 } 344 }
339 345
340 for (auto const& o: ostream_discarded) { 346 for (auto const& o: ostream_discarded) {
@@ -361,7 +367,7 @@ QdfFixer::writeBinary(unsigned long long val, size_t bytes) @@ -361,7 +367,7 @@ QdfFixer::writeBinary(unsigned long long val, size_t bytes)
361 data[i - 1] = static_cast<char>(val & 0xff); // i.e. val % 256 367 data[i - 1] = static_cast<char>(val & 0xff); // i.e. val % 256
362 val >>= 8; // i.e. val = val / 256 368 val >>= 8; // i.e. val = val / 256
363 } 369 }
364 - std::cout << data; 370 + out << data;
365 } 371 }
366 372
367 static int 373 static int
@@ -370,27 +376,44 @@ realmain(int argc, char* argv[]) @@ -370,27 +376,44 @@ realmain(int argc, char* argv[])
370 whoami = QUtil::getWhoami(argv[0]); 376 whoami = QUtil::getWhoami(argv[0]);
371 QUtil::setLineBuf(stdout); 377 QUtil::setLineBuf(stdout);
372 char const* filename = nullptr; 378 char const* filename = nullptr;
373 - if (argc > 2) { 379 + char const* outfilename = nullptr;
  380 + if (argc > 3) {
374 usage(); 381 usage();
375 } else if ((argc > 1) && (strcmp(argv[1], "--version") == 0)) { 382 } else if ((argc > 1) && (strcmp(argv[1], "--version") == 0)) {
376 std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl; 383 std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl;
377 return 0; 384 return 0;
378 } else if ((argc > 1) && (strcmp(argv[1], "--help") == 0)) { 385 } else if ((argc > 1) && (strcmp(argv[1], "--help") == 0)) {
379 usage(); 386 usage();
380 - } else if (argc == 2) { 387 + } else if (argc >= 2) {
381 filename = argv[1]; 388 filename = argv[1];
  389 + if (argc == 3) {
  390 + outfilename = argv[2];
  391 + }
382 } 392 }
383 - std::string input;  
384 - if (filename == nullptr) {  
385 - filename = "standard input";  
386 - QUtil::binary_stdin();  
387 - input = QUtil::read_file_into_string(stdin);  
388 - } else {  
389 - input = QUtil::read_file_into_string(filename); 393 + try {
  394 + std::string input;
  395 + if (filename == nullptr) {
  396 + filename = "standard input";
  397 + QUtil::binary_stdin();
  398 + input = QUtil::read_file_into_string(stdin);
  399 + } else {
  400 + input = QUtil::read_file_into_string(filename);
  401 + }
  402 + std::unique_ptr<std::ofstream> out = nullptr;
  403 + if (outfilename) {
  404 + out = std::make_unique<std::ofstream>(outfilename, std::ios::binary);
  405 + if (out->fail()) {
  406 + QUtil::throw_system_error("open "s + outfilename);
  407 + }
  408 + } else {
  409 + QUtil::binary_stdout();
  410 + }
  411 + QdfFixer qf(filename, out ? *out : std::cout);
  412 + qf.processLines(input);
  413 + } catch (std::exception& e) {
  414 + std::cerr << whoami << ": error: " << e.what() << std::endl;
  415 + exit(qpdf_exit_error);
390 } 416 }
391 - QUtil::binary_stdout();  
392 - QdfFixer qf(filename);  
393 - qf.processLines(input);  
394 return 0; 417 return 0;
395 } 418 }
396 419
qpdf/qtest/fix-qdf.test
@@ -14,7 +14,7 @@ cleanup(); @@ -14,7 +14,7 @@ cleanup();
14 14
15 my $td = new TestDriver('fix-qdf'); 15 my $td = new TestDriver('fix-qdf');
16 16
17 -my $n_tests = 5; 17 +my $n_tests = 11;
18 18
19 for (my $n = 1; $n <= 2; ++$n) 19 for (my $n = 1; $n <= 2; ++$n)
20 { 20 {
@@ -23,6 +23,15 @@ for (my $n = 1; $n &lt;= 2; ++$n) @@ -23,6 +23,15 @@ for (my $n = 1; $n &lt;= 2; ++$n)
23 {$td->FILE => "fix$n.qdf.out", 23 {$td->FILE => "fix$n.qdf.out",
24 $td->EXIT_STATUS => 0}); 24 $td->EXIT_STATUS => 0});
25 25
  26 + $td->runtest("fix-qdf $n with named output",
  27 + {$td->COMMAND => "fix-qdf fix$n.qdf a.pdf"},
  28 + {$td->STRING => "",
  29 + $td->EXIT_STATUS => 0});
  30 +
  31 + $td->runtest("check fix-qdf $n output",
  32 + {$td->FILE => "a.pdf"},
  33 + {$td->FILE => "fix$n.qdf.out"});
  34 +
26 $td->runtest("identity fix-qdf $n", 35 $td->runtest("identity fix-qdf $n",
27 {$td->COMMAND => "fix-qdf fix$n.qdf.out"}, 36 {$td->COMMAND => "fix-qdf fix$n.qdf.out"},
28 {$td->FILE => "fix$n.qdf.out", 37 {$td->FILE => "fix$n.qdf.out",
@@ -54,5 +63,15 @@ $td-&gt;runtest(&quot;fix-qdf with big object stream&quot;, # &gt; 255 objects in a stream @@ -54,5 +63,15 @@ $td-&gt;runtest(&quot;fix-qdf with big object stream&quot;, # &gt; 255 objects in a stream
54 {$td->FILE => "big-ostream.pdf", 63 {$td->FILE => "big-ostream.pdf",
55 $td->EXIT_STATUS => 0}); 64 $td->EXIT_STATUS => 0});
56 65
  66 +$td->runtest("fix-qdf error opening input",
  67 + {$td->COMMAND => "fix-qdf /does/not/exist/potato.pdf"},
  68 + {$td->REGEXP => "^fix-qdf: error: open .*/does/not/exist/potato.pdf: .*",
  69 + $td->EXIT_STATUS => 2});
  70 +
  71 +$td->runtest("fix-qdf error opening output", # > 255 objects in a stream
  72 + {$td->COMMAND => "fix-qdf fix1.qdf /does/not/exist/salad.pdf"},
  73 + {$td->REGEXP => "^fix-qdf: error: open .*/does/not/exist/salad.pdf: .*",
  74 + $td->EXIT_STATUS => 2});
  75 +
57 cleanup(); 76 cleanup();
58 $td->report($n_tests); 77 $td->report($n_tests);