Commit 6434e09dc2f9942388bb03c666635fd3c2394342

Authored by m-holger
Committed by GitHub
2 parents 9bc0b2d7 e3b77e43

Merge pull request #1415 from m-holger/pr1396

Fix offsets in QPDF::resolveObjectsInStream warnings
include/qpdf/QPDFExc.hh
... ... @@ -38,6 +38,15 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
38 38 qpdf_offset_t offset,
39 39 std::string const& message);
40 40  
  41 + QPDF_DLL
  42 + QPDFExc(
  43 + qpdf_error_code_e error_code,
  44 + std::string const& filename,
  45 + std::string const& object,
  46 + qpdf_offset_t offset,
  47 + std::string const& message,
  48 + bool zero_offset_valid);
  49 +
41 50 ~QPDFExc() noexcept override = default;
42 51  
43 52 // To get a complete error string, call what(), provided by std::exception. The accessors below
... ...
libqpdf/QPDF.cc
... ... @@ -813,12 +813,12 @@ QPDF::getRoot()
813 813 {
814 814 QPDFObjectHandle root = m->trailer.getKey("/Root");
815 815 if (!root.isDictionary()) {
816   - throw damagedPDF("", 0, "unable to find /Root dictionary");
  816 + throw damagedPDF("", -1, "unable to find /Root dictionary");
817 817 } else if (
818 818 // Check_mode is an interim solution to request #810 pending a more comprehensive review of
819 819 // the approach to more extensive checks and warning levels.
820 820 m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
821   - warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
  821 + warn(damagedPDF("", -1, "catalog /Type entry missing or invalid"));
822 822 root.replaceKey("/Type", "/Catalog"_qpdf);
823 823 }
824 824 return root;
... ... @@ -964,7 +964,7 @@ QPDFExc
964 964 QPDF::damagedPDF(
965 965 InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
966 966 {
967   - return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};
  967 + return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true};
968 968 }
969 969  
970 970 // Return an exception of type qpdf_e_damaged_pdf. The object is taken from
... ... @@ -979,7 +979,7 @@ QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& me
979 979 QPDFExc
980 980 QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
981 981 {
982   - return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};
  982 + return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true};
983 983 }
984 984  
985 985 // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
... ...
libqpdf/QPDFExc.cc
... ... @@ -6,11 +6,28 @@ QPDFExc::QPDFExc(
6 6 std::string const& object,
7 7 qpdf_offset_t offset,
8 8 std::string const& message) :
9   - std::runtime_error(createWhat(filename, object, offset, message)),
  9 + std::runtime_error(createWhat(filename, object, (offset ? offset : -1), message)),
10 10 error_code(error_code),
11 11 filename(filename),
12 12 object(object),
13   - offset(offset),
  13 + offset(offset ? offset : -1),
  14 + message(message)
  15 +{
  16 +}
  17 +
  18 +QPDFExc::QPDFExc(
  19 + qpdf_error_code_e error_code,
  20 + std::string const& filename,
  21 + std::string const& object,
  22 + qpdf_offset_t offset,
  23 + std::string const& message,
  24 + bool zero_offset_valid) :
  25 + std::runtime_error(
  26 + createWhat(filename, object, (offset || zero_offset_valid ? offset : -1), message)),
  27 + error_code(error_code),
  28 + filename(filename),
  29 + object(object),
  30 + offset(offset || zero_offset_valid ? offset : -1),
14 31 message(message)
15 32 {
16 33 }
... ... @@ -26,17 +43,17 @@ QPDFExc::createWhat(
26 43 if (!filename.empty()) {
27 44 result += filename;
28 45 }
29   - if (!(object.empty() && offset == 0)) {
  46 + if (!(object.empty() && offset < 0)) {
30 47 if (!filename.empty()) {
31 48 result += " (";
32 49 }
33 50 if (!object.empty()) {
34 51 result += object;
35   - if (offset > 0) {
  52 + if (offset >= 0) {
36 53 result += ", ";
37 54 }
38 55 }
39   - if (offset > 0) {
  56 + if (offset >= 0) {
40 57 result += "offset " + std::to_string(offset);
41 58 }
42 59 if (!filename.empty()) {
... ... @@ -71,7 +88,7 @@ QPDFExc::getObject() const
71 88 qpdf_offset_t
72 89 QPDFExc::getFilePosition() const
73 90 {
74   - return this->offset;
  91 + return offset < 0 ? 0 : offset;
75 92 }
76 93  
77 94 std::string const&
... ...
libqpdf/QPDF_objects.cc
... ... @@ -108,7 +108,7 @@ QPDF::parse(char const* password)
108 108 PatternFinder hf(*this, &QPDF::findHeader);
109 109 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
110 110 QTC::TC("qpdf", "QPDF not a pdf file");
111   - warn(damagedPDF("", 0, "can't find PDF header"));
  111 + warn(damagedPDF("", -1, "can't find PDF header"));
112 112 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
113 113 m->pdf_version = "1.2";
114 114 }
... ... @@ -133,14 +133,14 @@ QPDF::parse(char const* password)
133 133 try {
134 134 if (xref_offset == 0) {
135 135 QTC::TC("qpdf", "QPDF can't find startxref");
136   - throw damagedPDF("", 0, "can't find startxref");
  136 + throw damagedPDF("", -1, "can't find startxref");
137 137 }
138 138 try {
139 139 read_xref(xref_offset);
140 140 } catch (QPDFExc&) {
141 141 throw;
142 142 } catch (std::exception& e) {
143   - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what());
  143 + throw damagedPDF("", -1, std::string("error reading xref: ") + e.what());
144 144 }
145 145 } catch (QPDFExc& e) {
146 146 if (m->attempt_recovery) {
... ... @@ -155,7 +155,7 @@ QPDF::parse(char const* password)
155 155 m->parsed = true;
156 156 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
157 157 // QPDFs created from JSON have an empty xref table and no root object yet.
158   - throw damagedPDF("", 0, "unable to find page tree");
  158 + throw damagedPDF("", -1, "unable to find page tree");
159 159 }
160 160 }
161 161  
... ... @@ -195,7 +195,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
195 195 const auto max_warnings = m->warnings.size() + 1000U;
196 196 auto check_warnings = [this, max_warnings]() {
197 197 if (m->warnings.size() > max_warnings) {
198   - throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
  198 + throw damagedPDF("", -1, "too many errors while reconstructing cross-reference table");
199 199 }
200 200 };
201 201  
... ... @@ -203,9 +203,9 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
203 203 // We may find more objects, which may contain dangling references.
204 204 m->fixed_dangling_refs = false;
205 205  
206   - warn(damagedPDF("", 0, "file is damaged"));
  206 + warn(damagedPDF("", -1, "file is damaged"));
207 207 warn(e);
208   - warn(damagedPDF("", 0, "Attempting to reconstruct cross-reference table"));
  208 + warn(damagedPDF("", -1, "Attempting to reconstruct cross-reference table"));
209 209  
210 210 // Delete all references to type 1 (uncompressed) objects
211 211 std::vector<QPDFObjGen> to_delete;
... ... @@ -240,7 +240,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
240 240 found_objects.emplace_back(obj, gen, token_start);
241 241 } else {
242 242 warn(damagedPDF(
243   - "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
  243 + "", -1, "ignoring object with impossibly large id " + std::to_string(obj)));
244 244 }
245 245 }
246 246 m->file->seek(pos, SEEK_SET);
... ... @@ -261,8 +261,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
261 261 read_xref(offset);
262 262 if (getRoot().getKey("/Pages").isDictionary()) {
263 263 QTC::TC("qpdf", "QPDF startxref more than 1024 before end");
264   - warn(
265   - damagedPDF("", 0, "startxref was more than 1024 bytes before end of file"));
  264 + warn(damagedPDF(
  265 + "", -1, "startxref was more than 1024 bytes before end of file"));
266 266 initializeEncryption();
267 267 m->parsed = true;
268 268 m->reconstructed_xref = false;
... ... @@ -327,7 +327,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
327 327 read_xref(max_offset);
328 328 } catch (std::exception&) {
329 329 warn(damagedPDF(
330   - "", 0, "error decoding candidate xref stream while recovering damaged file"));
  330 + "", -1, "error decoding candidate xref stream while recovering damaged file"));
331 331 }
332 332 QTC::TC("qpdf", "QPDF recover xref stream");
333 333 }
... ... @@ -348,7 +348,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
348 348 if (root) {
349 349 if (!m->trailer) {
350 350 warn(damagedPDF(
351   - "", 0, "unable to find trailer dictionary while recovering damaged file"));
  351 + "", -1, "unable to find trailer dictionary while recovering damaged file"));
352 352 m->trailer = QPDFObjectHandle::newDictionary();
353 353 }
354 354 m->trailer.replaceKey("/Root", root);
... ... @@ -360,12 +360,12 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
360 360 // could try to get the trailer from there. This may make it possible to recover files with
361 361 // bad startxref pointers even when they have object streams.
362 362  
363   - throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
  363 + throw damagedPDF("", -1, "unable to find trailer dictionary while recovering damaged file");
364 364 }
365 365 if (m->xref_table.empty()) {
366 366 // We cannot check for an empty xref table in parse because empty tables are valid when
367 367 // creating QPDF objects from JSON.
368   - throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
  368 + throw damagedPDF("", -1, "unable to find objects while recovering damaged file");
369 369 }
370 370 check_warnings();
371 371 if (!m->parsed) {
... ... @@ -374,7 +374,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
374 374 check_warnings();
375 375 if (m->all_pages.empty()) {
376 376 m->parsed = false;
377   - throw damagedPDF("", 0, "unable to find any pages while recovering damaged file");
  377 + throw damagedPDF("", -1, "unable to find any pages while recovering damaged file");
378 378 }
379 379 }
380 380 // We could iterate through the objects looking for streams and try to find objects inside of
... ... @@ -421,7 +421,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
421 421 if ((strncmp(buf, "xref", 4) == 0) && util::is_space(buf[4])) {
422 422 if (skipped_space) {
423 423 QTC::TC("qpdf", "QPDF xref skipped space");
424   - warn(damagedPDF("", 0, "extraneous whitespace seen before xref"));
  424 + warn(damagedPDF("", -1, "extraneous whitespace seen before xref"));
425 425 }
426 426 QTC::TC(
427 427 "qpdf",
... ... @@ -441,12 +441,12 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
441 441 }
442 442 if (visited.count(xref_offset) != 0) {
443 443 QTC::TC("qpdf", "QPDF xref loop");
444   - throw damagedPDF("", 0, "loop detected following xref tables");
  444 + throw damagedPDF("", -1, "loop detected following xref tables");
445 445 }
446 446 }
447 447  
448 448 if (!m->trailer) {
449   - throw damagedPDF("", 0, "unable to find trailer while reading xref");
  449 + throw damagedPDF("", -1, "unable to find trailer while reading xref");
450 450 }
451 451 int size = m->trailer.getKey("/Size").getIntValueAsInt();
452 452 int max_obj = 0;
... ... @@ -460,7 +460,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
460 460 QTC::TC("qpdf", "QPDF xref size mismatch");
461 461 warn(damagedPDF(
462 462 "",
463   - 0,
  463 + -1,
464 464 ("reported number of objects (" + std::to_string(size) +
465 465 ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
466 466 }
... ... @@ -1411,7 +1411,7 @@ QPDF::readObjectAtOffset(
1411 1411 // these.
1412 1412 if (offset == 0) {
1413 1413 QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
1414   - warn(damagedPDF(0, "object has offset 0"));
  1414 + warn(damagedPDF(-1, "object has offset 0"));
1415 1415 return QPDFObjectHandle::newNull();
1416 1416 }
1417 1417  
... ... @@ -1470,7 +1470,7 @@ QPDF::readObjectAtOffset(
1470 1470 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1471 1471 warn(damagedPDF(
1472 1472 "",
1473   - 0,
  1473 + -1,
1474 1474 ("object " + exp_og.unparse(' ') +
1475 1475 " not found in file after regenerating cross reference "
1476 1476 "table")));
... ... @@ -1577,13 +1577,13 @@ QPDF::resolve(QPDFObjGen og)
1577 1577  
1578 1578 default:
1579 1579 throw damagedPDF(
1580   - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
  1580 + "", -1, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1581 1581 }
1582 1582 } catch (QPDFExc& e) {
1583 1583 warn(e);
1584 1584 } catch (std::exception& e) {
1585 1585 warn(damagedPDF(
1586   - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
  1586 + "", -1, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1587 1587 }
1588 1588 }
1589 1589  
... ... @@ -1608,7 +1608,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1608 1608 m->file->getName() + " object stream " + std::to_string(obj_stream_number),
1609 1609 +"object " + std::to_string(id) + " 0",
1610 1610 offset,
1611   - msg};
  1611 + msg,
  1612 + true};
1612 1613 };
1613 1614  
1614 1615 if (m->resolved_object_streams.count(obj_stream_number)) {
... ... @@ -1667,6 +1668,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1667 1668 bool is_first = true;
1668 1669 for (unsigned int i = 0; i < n; ++i) {
1669 1670 auto tnum = readToken(input);
  1671 + auto id_offset = input.getLastOffset();
1670 1672 auto toffset = readToken(input);
1671 1673 if (!(tnum.isInteger() && toffset.isInteger())) {
1672 1674 throw damaged(0, input.getLastOffset(), "expected integer in object stream header");
... ... @@ -1677,13 +1679,13 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1677 1679  
1678 1680 if (num == obj_stream_number) {
1679 1681 QTC::TC("qpdf", "QPDF ignore self-referential object stream");
1680   - warn(damaged(num, input.getLastOffset(), "object stream claims to contain itself"));
  1682 + warn(damaged(num, id_offset, "object stream claims to contain itself"));
1681 1683 continue;
1682 1684 }
1683 1685  
1684 1686 if (num < 1) {
1685 1687 QTC::TC("qpdf", "QPDF object stream contains id < 1");
1686   - warn(damaged(num, input.getLastOffset(), "object id is invalid"s));
  1688 + warn(damaged(num, id_offset, "object id is invalid"s));
1687 1689 continue;
1688 1690 }
1689 1691  
... ... @@ -1691,8 +1693,9 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1691 1693 QTC::TC("qpdf", "QPDF object stream offsets not increasing");
1692 1694 warn(damaged(
1693 1695 num,
1694   - offset,
1695   - "offset is invalid (must be larger than previous offset " +
  1696 + input.getLastOffset(),
  1697 + "offset " + std::to_string(offset) +
  1698 + " is invalid (must be larger than previous offset " +
1696 1699 std::to_string(last_offset) + ")"));
1697 1700 continue;
1698 1701 }
... ... @@ -1702,7 +1705,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1702 1705 }
1703 1706  
1704 1707 if (first + offset >= end_offset) {
1705   - warn(damaged(num, offset, "offset is too large"));
  1708 + warn(damaged(
  1709 + num, input.getLastOffset(), "offset " + std::to_string(offset) + " is too large"));
1706 1710 continue;
1707 1711 }
1708 1712  
... ...
qpdf/qtest/qpdf/issue-143.out
... ... @@ -14,9 +14,9 @@ WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but foun
14 14 WARNING: issue-143.pdf (object 1 0, offset 21): stream dictionary lacks /Length key
15 15 WARNING: issue-143.pdf (object 1 0, offset 84): attempting to recover stream length
16 16 WARNING: issue-143.pdf (object 1 0, offset 84): recovered stream length: 606
17   -WARNING: issue-143.pdf object stream 1 (object 0 0, offset 4): object id is invalid
18   -WARNING: issue-143.pdf object stream 1 (object 0 0, offset 15): object id is invalid
19   -WARNING: issue-143.pdf object stream 1 (object 6 0): offset is invalid (must be larger than previous offset 0)
20   -WARNING: issue-143.pdf object stream 1 (object 0 0, offset 27): object id is invalid
  17 +WARNING: issue-143.pdf object stream 1 (object 0 0, offset 0): object id is invalid
  18 +WARNING: issue-143.pdf object stream 1 (object 0 0, offset 11): object id is invalid
  19 +WARNING: issue-143.pdf object stream 1 (object 6 0, offset 21): offset 0 is invalid (must be larger than previous offset 0)
  20 +WARNING: issue-143.pdf object stream 1 (object 0 0, offset 23): object id is invalid
21 21 WARNING: issue-143.pdf object stream 1 (object 2 0, offset 33): expected dictionary key but found non-name object; inserting key /QPDFFake1
22 22 qpdf: issue-143.pdf: unable to find page tree
... ...
qpdf/qtest/qpdf/object-stream-self-ref.out
1   -WARNING: object-stream-self-ref.pdf object stream 1 (object 1 0, offset 2): object stream claims to contain itself
  1 +WARNING: object-stream-self-ref.pdf object stream 1 (object 1 0, offset 0): object stream claims to contain itself
2 2 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...