Commit c6a393a574befe405e707434c1432987cc2298b0

Authored by m-holger
1 parent 0a9ef286

Change QPDFExc to allow zero offsets

Internally use -1 to represent a missing offset and provide a constructor
overload that allows 0 as a valid offset.

In QPDF::damagedPDF use the new overload.
include/qpdf/QPDFExc.hh
@@ -38,6 +38,15 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error @@ -38,6 +38,15 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
38 qpdf_offset_t offset, 38 qpdf_offset_t offset,
39 std::string const& message); 39 std::string const& message);
40 40
  41 + QPDF_DLL
  42 + QPDFExc(
  43 + qpdf_error_code_e error_code,
  44 + std::string const& filename,
  45 + std::string const& object,
  46 + qpdf_offset_t offset,
  47 + std::string const& message,
  48 + bool zero_offset_valid);
  49 +
41 ~QPDFExc() noexcept override = default; 50 ~QPDFExc() noexcept override = default;
42 51
43 // To get a complete error string, call what(), provided by std::exception. The accessors below 52 // To get a complete error string, call what(), provided by std::exception. The accessors below
libqpdf/QPDF.cc
@@ -813,12 +813,12 @@ QPDF::getRoot() @@ -813,12 +813,12 @@ QPDF::getRoot()
813 { 813 {
814 QPDFObjectHandle root = m->trailer.getKey("/Root"); 814 QPDFObjectHandle root = m->trailer.getKey("/Root");
815 if (!root.isDictionary()) { 815 if (!root.isDictionary()) {
816 - throw damagedPDF("", 0, "unable to find /Root dictionary"); 816 + throw damagedPDF("", -1, "unable to find /Root dictionary");
817 } else if ( 817 } else if (
818 // Check_mode is an interim solution to request #810 pending a more comprehensive review of 818 // Check_mode is an interim solution to request #810 pending a more comprehensive review of
819 // the approach to more extensive checks and warning levels. 819 // the approach to more extensive checks and warning levels.
820 m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { 820 m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
821 - warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); 821 + warn(damagedPDF("", -1, "catalog /Type entry missing or invalid"));
822 root.replaceKey("/Type", "/Catalog"_qpdf); 822 root.replaceKey("/Type", "/Catalog"_qpdf);
823 } 823 }
824 return root; 824 return root;
@@ -964,7 +964,7 @@ QPDFExc @@ -964,7 +964,7 @@ QPDFExc
964 QPDF::damagedPDF( 964 QPDF::damagedPDF(
965 InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) 965 InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
966 { 966 {
967 - return {qpdf_e_damaged_pdf, input.getName(), object, offset, message}; 967 + return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true};
968 } 968 }
969 969
970 // Return an exception of type qpdf_e_damaged_pdf. The object is taken from 970 // Return an exception of type qpdf_e_damaged_pdf. The object is taken from
@@ -979,7 +979,7 @@ QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& me @@ -979,7 +979,7 @@ QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& me
979 QPDFExc 979 QPDFExc
980 QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) 980 QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
981 { 981 {
982 - return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message}; 982 + return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true};
983 } 983 }
984 984
985 // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the 985 // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
libqpdf/QPDFExc.cc
@@ -6,11 +6,28 @@ QPDFExc::QPDFExc( @@ -6,11 +6,28 @@ QPDFExc::QPDFExc(
6 std::string const& object, 6 std::string const& object,
7 qpdf_offset_t offset, 7 qpdf_offset_t offset,
8 std::string const& message) : 8 std::string const& message) :
9 - std::runtime_error(createWhat(filename, object, offset, message)), 9 + std::runtime_error(createWhat(filename, object, (offset ? offset : -1), message)),
10 error_code(error_code), 10 error_code(error_code),
11 filename(filename), 11 filename(filename),
12 object(object), 12 object(object),
13 - offset(offset), 13 + offset(offset ? offset : -1),
  14 + message(message)
  15 +{
  16 +}
  17 +
  18 +QPDFExc::QPDFExc(
  19 + qpdf_error_code_e error_code,
  20 + std::string const& filename,
  21 + std::string const& object,
  22 + qpdf_offset_t offset,
  23 + std::string const& message,
  24 + bool zero_offset_valid) :
  25 + std::runtime_error(
  26 + createWhat(filename, object, (offset || zero_offset_valid ? offset : -1), message)),
  27 + error_code(error_code),
  28 + filename(filename),
  29 + object(object),
  30 + offset(offset || zero_offset_valid ? offset : -1),
14 message(message) 31 message(message)
15 { 32 {
16 } 33 }
@@ -26,17 +43,17 @@ QPDFExc::createWhat( @@ -26,17 +43,17 @@ QPDFExc::createWhat(
26 if (!filename.empty()) { 43 if (!filename.empty()) {
27 result += filename; 44 result += filename;
28 } 45 }
29 - if (!(object.empty() && offset == 0)) { 46 + if (!(object.empty() && offset < 0)) {
30 if (!filename.empty()) { 47 if (!filename.empty()) {
31 result += " ("; 48 result += " (";
32 } 49 }
33 if (!object.empty()) { 50 if (!object.empty()) {
34 result += object; 51 result += object;
35 - if (offset > 0) { 52 + if (offset >= 0) {
36 result += ", "; 53 result += ", ";
37 } 54 }
38 } 55 }
39 - if (offset > 0) { 56 + if (offset >= 0) {
40 result += "offset " + std::to_string(offset); 57 result += "offset " + std::to_string(offset);
41 } 58 }
42 if (!filename.empty()) { 59 if (!filename.empty()) {
@@ -71,7 +88,7 @@ QPDFExc::getObject() const @@ -71,7 +88,7 @@ QPDFExc::getObject() const
71 qpdf_offset_t 88 qpdf_offset_t
72 QPDFExc::getFilePosition() const 89 QPDFExc::getFilePosition() const
73 { 90 {
74 - return this->offset; 91 + return offset < 0 ? 0 : offset;
75 } 92 }
76 93
77 std::string const& 94 std::string const&
libqpdf/QPDF_objects.cc
@@ -108,7 +108,7 @@ QPDF::parse(char const* password) @@ -108,7 +108,7 @@ QPDF::parse(char const* password)
108 PatternFinder hf(*this, &QPDF::findHeader); 108 PatternFinder hf(*this, &QPDF::findHeader);
109 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { 109 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
110 QTC::TC("qpdf", "QPDF not a pdf file"); 110 QTC::TC("qpdf", "QPDF not a pdf file");
111 - warn(damagedPDF("", 0, "can't find PDF header")); 111 + warn(damagedPDF("", -1, "can't find PDF header"));
112 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode 112 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
113 m->pdf_version = "1.2"; 113 m->pdf_version = "1.2";
114 } 114 }
@@ -133,14 +133,14 @@ QPDF::parse(char const* password) @@ -133,14 +133,14 @@ QPDF::parse(char const* password)
133 try { 133 try {
134 if (xref_offset == 0) { 134 if (xref_offset == 0) {
135 QTC::TC("qpdf", "QPDF can't find startxref"); 135 QTC::TC("qpdf", "QPDF can't find startxref");
136 - throw damagedPDF("", 0, "can't find startxref"); 136 + throw damagedPDF("", -1, "can't find startxref");
137 } 137 }
138 try { 138 try {
139 read_xref(xref_offset); 139 read_xref(xref_offset);
140 } catch (QPDFExc&) { 140 } catch (QPDFExc&) {
141 throw; 141 throw;
142 } catch (std::exception& e) { 142 } catch (std::exception& e) {
143 - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what()); 143 + throw damagedPDF("", -1, std::string("error reading xref: ") + e.what());
144 } 144 }
145 } catch (QPDFExc& e) { 145 } catch (QPDFExc& e) {
146 if (m->attempt_recovery) { 146 if (m->attempt_recovery) {
@@ -155,7 +155,7 @@ QPDF::parse(char const* password) @@ -155,7 +155,7 @@ QPDF::parse(char const* password)
155 m->parsed = true; 155 m->parsed = true;
156 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { 156 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
157 // QPDFs created from JSON have an empty xref table and no root object yet. 157 // QPDFs created from JSON have an empty xref table and no root object yet.
158 - throw damagedPDF("", 0, "unable to find page tree"); 158 + throw damagedPDF("", -1, "unable to find page tree");
159 } 159 }
160 } 160 }
161 161
@@ -195,7 +195,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -195,7 +195,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
195 const auto max_warnings = m->warnings.size() + 1000U; 195 const auto max_warnings = m->warnings.size() + 1000U;
196 auto check_warnings = [this, max_warnings]() { 196 auto check_warnings = [this, max_warnings]() {
197 if (m->warnings.size() > max_warnings) { 197 if (m->warnings.size() > max_warnings) {
198 - throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); 198 + throw damagedPDF("", -1, "too many errors while reconstructing cross-reference table");
199 } 199 }
200 }; 200 };
201 201
@@ -203,9 +203,9 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -203,9 +203,9 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
203 // We may find more objects, which may contain dangling references. 203 // We may find more objects, which may contain dangling references.
204 m->fixed_dangling_refs = false; 204 m->fixed_dangling_refs = false;
205 205
206 - warn(damagedPDF("", 0, "file is damaged")); 206 + warn(damagedPDF("", -1, "file is damaged"));
207 warn(e); 207 warn(e);
208 - warn(damagedPDF("", 0, "Attempting to reconstruct cross-reference table")); 208 + warn(damagedPDF("", -1, "Attempting to reconstruct cross-reference table"));
209 209
210 // Delete all references to type 1 (uncompressed) objects 210 // Delete all references to type 1 (uncompressed) objects
211 std::vector<QPDFObjGen> to_delete; 211 std::vector<QPDFObjGen> to_delete;
@@ -240,7 +240,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -240,7 +240,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
240 found_objects.emplace_back(obj, gen, token_start); 240 found_objects.emplace_back(obj, gen, token_start);
241 } else { 241 } else {
242 warn(damagedPDF( 242 warn(damagedPDF(
243 - "", 0, "ignoring object with impossibly large id " + std::to_string(obj))); 243 + "", -1, "ignoring object with impossibly large id " + std::to_string(obj)));
244 } 244 }
245 } 245 }
246 m->file->seek(pos, SEEK_SET); 246 m->file->seek(pos, SEEK_SET);
@@ -261,8 +261,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -261,8 +261,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
261 read_xref(offset); 261 read_xref(offset);
262 if (getRoot().getKey("/Pages").isDictionary()) { 262 if (getRoot().getKey("/Pages").isDictionary()) {
263 QTC::TC("qpdf", "QPDF startxref more than 1024 before end"); 263 QTC::TC("qpdf", "QPDF startxref more than 1024 before end");
264 - warn(  
265 - damagedPDF("", 0, "startxref was more than 1024 bytes before end of file")); 264 + warn(damagedPDF(
  265 + "", -1, "startxref was more than 1024 bytes before end of file"));
266 initializeEncryption(); 266 initializeEncryption();
267 m->parsed = true; 267 m->parsed = true;
268 m->reconstructed_xref = false; 268 m->reconstructed_xref = false;
@@ -327,7 +327,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -327,7 +327,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
327 read_xref(max_offset); 327 read_xref(max_offset);
328 } catch (std::exception&) { 328 } catch (std::exception&) {
329 warn(damagedPDF( 329 warn(damagedPDF(
330 - "", 0, "error decoding candidate xref stream while recovering damaged file")); 330 + "", -1, "error decoding candidate xref stream while recovering damaged file"));
331 } 331 }
332 QTC::TC("qpdf", "QPDF recover xref stream"); 332 QTC::TC("qpdf", "QPDF recover xref stream");
333 } 333 }
@@ -348,7 +348,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -348,7 +348,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
348 if (root) { 348 if (root) {
349 if (!m->trailer) { 349 if (!m->trailer) {
350 warn(damagedPDF( 350 warn(damagedPDF(
351 - "", 0, "unable to find trailer dictionary while recovering damaged file")); 351 + "", -1, "unable to find trailer dictionary while recovering damaged file"));
352 m->trailer = QPDFObjectHandle::newDictionary(); 352 m->trailer = QPDFObjectHandle::newDictionary();
353 } 353 }
354 m->trailer.replaceKey("/Root", root); 354 m->trailer.replaceKey("/Root", root);
@@ -360,12 +360,12 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -360,12 +360,12 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
360 // could try to get the trailer from there. This may make it possible to recover files with 360 // could try to get the trailer from there. This may make it possible to recover files with
361 // bad startxref pointers even when they have object streams. 361 // bad startxref pointers even when they have object streams.
362 362
363 - throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file"); 363 + throw damagedPDF("", -1, "unable to find trailer dictionary while recovering damaged file");
364 } 364 }
365 if (m->xref_table.empty()) { 365 if (m->xref_table.empty()) {
366 // We cannot check for an empty xref table in parse because empty tables are valid when 366 // We cannot check for an empty xref table in parse because empty tables are valid when
367 // creating QPDF objects from JSON. 367 // creating QPDF objects from JSON.
368 - throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); 368 + throw damagedPDF("", -1, "unable to find objects while recovering damaged file");
369 } 369 }
370 check_warnings(); 370 check_warnings();
371 if (!m->parsed) { 371 if (!m->parsed) {
@@ -374,7 +374,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -374,7 +374,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
374 check_warnings(); 374 check_warnings();
375 if (m->all_pages.empty()) { 375 if (m->all_pages.empty()) {
376 m->parsed = false; 376 m->parsed = false;
377 - throw damagedPDF("", 0, "unable to find any pages while recovering damaged file"); 377 + throw damagedPDF("", -1, "unable to find any pages while recovering damaged file");
378 } 378 }
379 } 379 }
380 // We could iterate through the objects looking for streams and try to find objects inside of 380 // We could iterate through the objects looking for streams and try to find objects inside of
@@ -421,7 +421,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -421,7 +421,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
421 if ((strncmp(buf, "xref", 4) == 0) && util::is_space(buf[4])) { 421 if ((strncmp(buf, "xref", 4) == 0) && util::is_space(buf[4])) {
422 if (skipped_space) { 422 if (skipped_space) {
423 QTC::TC("qpdf", "QPDF xref skipped space"); 423 QTC::TC("qpdf", "QPDF xref skipped space");
424 - warn(damagedPDF("", 0, "extraneous whitespace seen before xref")); 424 + warn(damagedPDF("", -1, "extraneous whitespace seen before xref"));
425 } 425 }
426 QTC::TC( 426 QTC::TC(
427 "qpdf", 427 "qpdf",
@@ -441,12 +441,12 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -441,12 +441,12 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
441 } 441 }
442 if (visited.count(xref_offset) != 0) { 442 if (visited.count(xref_offset) != 0) {
443 QTC::TC("qpdf", "QPDF xref loop"); 443 QTC::TC("qpdf", "QPDF xref loop");
444 - throw damagedPDF("", 0, "loop detected following xref tables"); 444 + throw damagedPDF("", -1, "loop detected following xref tables");
445 } 445 }
446 } 446 }
447 447
448 if (!m->trailer) { 448 if (!m->trailer) {
449 - throw damagedPDF("", 0, "unable to find trailer while reading xref"); 449 + throw damagedPDF("", -1, "unable to find trailer while reading xref");
450 } 450 }
451 int size = m->trailer.getKey("/Size").getIntValueAsInt(); 451 int size = m->trailer.getKey("/Size").getIntValueAsInt();
452 int max_obj = 0; 452 int max_obj = 0;
@@ -460,7 +460,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -460,7 +460,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
460 QTC::TC("qpdf", "QPDF xref size mismatch"); 460 QTC::TC("qpdf", "QPDF xref size mismatch");
461 warn(damagedPDF( 461 warn(damagedPDF(
462 "", 462 "",
463 - 0, 463 + -1,
464 ("reported number of objects (" + std::to_string(size) + 464 ("reported number of objects (" + std::to_string(size) +
465 ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"))); 465 ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
466 } 466 }
@@ -1411,7 +1411,7 @@ QPDF::readObjectAtOffset( @@ -1411,7 +1411,7 @@ QPDF::readObjectAtOffset(
1411 // these. 1411 // these.
1412 if (offset == 0) { 1412 if (offset == 0) {
1413 QTC::TC("qpdf", "QPDF bogus 0 offset", 0); 1413 QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
1414 - warn(damagedPDF(0, "object has offset 0")); 1414 + warn(damagedPDF(-1, "object has offset 0"));
1415 return QPDFObjectHandle::newNull(); 1415 return QPDFObjectHandle::newNull();
1416 } 1416 }
1417 1417
@@ -1470,7 +1470,7 @@ QPDF::readObjectAtOffset( @@ -1470,7 +1470,7 @@ QPDF::readObjectAtOffset(
1470 QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); 1470 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1471 warn(damagedPDF( 1471 warn(damagedPDF(
1472 "", 1472 "",
1473 - 0, 1473 + -1,
1474 ("object " + exp_og.unparse(' ') + 1474 ("object " + exp_og.unparse(' ') +
1475 " not found in file after regenerating cross reference " 1475 " not found in file after regenerating cross reference "
1476 "table"))); 1476 "table")));
@@ -1577,13 +1577,13 @@ QPDF::resolve(QPDFObjGen og) @@ -1577,13 +1577,13 @@ QPDF::resolve(QPDFObjGen og)
1577 1577
1578 default: 1578 default:
1579 throw damagedPDF( 1579 throw damagedPDF(
1580 - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type")); 1580 + "", -1, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1581 } 1581 }
1582 } catch (QPDFExc& e) { 1582 } catch (QPDFExc& e) {
1583 warn(e); 1583 warn(e);
1584 } catch (std::exception& e) { 1584 } catch (std::exception& e) {
1585 warn(damagedPDF( 1585 warn(damagedPDF(
1586 - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); 1586 + "", -1, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1587 } 1587 }
1588 } 1588 }
1589 1589