Commit e324d36b95aad64d6d53cad74d44e7ac32242d0a
Committed by
GitHub
Merge pull request #1343 from m-holger/i1335a
Refine xref reconstruction (fixes #1335)
Showing
9 changed files
with
57 additions
and
9 deletions
libqpdf/QPDF.cc
| @@ -608,6 +608,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -608,6 +608,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 608 | 608 | ||
| 609 | if (!m->trailer) { | 609 | if (!m->trailer) { |
| 610 | qpdf_offset_t max_offset{0}; | 610 | qpdf_offset_t max_offset{0}; |
| 611 | + size_t max_size{0}; | ||
| 611 | // If there are any xref streams, take the last one to appear. | 612 | // If there are any xref streams, take the last one to appear. |
| 612 | for (auto const& iter: m->xref_table) { | 613 | for (auto const& iter: m->xref_table) { |
| 613 | auto entry = iter.second; | 614 | auto entry = iter.second; |
| @@ -623,7 +624,8 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -623,7 +624,8 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 623 | continue; | 624 | continue; |
| 624 | } | 625 | } |
| 625 | auto offset = entry.getOffset(); | 626 | auto offset = entry.getOffset(); |
| 626 | - if (offset > max_offset) { | 627 | + auto size = oh.getDict().getKey("/Size").getUIntValueAsUInt(); |
| 628 | + if (size > max_size || (size == max_size && offset > max_offset)) { | ||
| 627 | max_offset = offset; | 629 | max_offset = offset; |
| 628 | setTrailer(oh.getDict()); | 630 | setTrailer(oh.getDict()); |
| 629 | } | 631 | } |
| @@ -633,13 +635,35 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -633,13 +635,35 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 633 | try { | 635 | try { |
| 634 | read_xref(max_offset); | 636 | read_xref(max_offset); |
| 635 | } catch (std::exception&) { | 637 | } catch (std::exception&) { |
| 636 | - throw damagedPDF( | ||
| 637 | - "", 0, "error decoding candidate xref stream while recovering damaged file"); | 638 | + warn(damagedPDF( |
| 639 | + "", 0, "error decoding candidate xref stream while recovering damaged file")); | ||
| 638 | } | 640 | } |
| 639 | QTC::TC("qpdf", "QPDF recover xref stream"); | 641 | QTC::TC("qpdf", "QPDF recover xref stream"); |
| 640 | } | 642 | } |
| 641 | } | 643 | } |
| 642 | 644 | ||
| 645 | + if (!m->trailer || (!m->parsed && !m->trailer.getKey("/Root").isDictionary())) { | ||
| 646 | + // Try to find a Root dictionary. As a quick fix try the one with the highest object id. | ||
| 647 | + QPDFObjectHandle root; | ||
| 648 | + for (auto const& iter: m->obj_cache) { | ||
| 649 | + try { | ||
| 650 | + if (QPDFObjectHandle(iter.second.object).isDictionaryOfType("/Catalog")) { | ||
| 651 | + root = iter.second.object; | ||
| 652 | + } | ||
| 653 | + } catch (std::exception&) { | ||
| 654 | + continue; | ||
| 655 | + } | ||
| 656 | + } | ||
| 657 | + if (root) { | ||
| 658 | + if (!m->trailer) { | ||
| 659 | + warn(damagedPDF( | ||
| 660 | + "", 0, "unable to find trailer dictionary while recovering damaged file")); | ||
| 661 | + m->trailer = QPDFObjectHandle::newDictionary(); | ||
| 662 | + } | ||
| 663 | + m->trailer.replaceKey("/Root", root); | ||
| 664 | + } | ||
| 665 | + } | ||
| 666 | + | ||
| 643 | if (!m->trailer) { | 667 | if (!m->trailer) { |
| 644 | // We could check the last encountered object to see if it was an xref stream. If so, we | 668 | // We could check the last encountered object to see if it was an xref stream. If so, we |
| 645 | // could try to get the trailer from there. This may make it possible to recover files with | 669 | // could try to get the trailer from there. This may make it possible to recover files with |
qpdf/qtest/error-condition.test
| @@ -127,7 +127,7 @@ $n_tests += @badfiles + 11; | @@ -127,7 +127,7 @@ $n_tests += @badfiles + 11; | ||
| 127 | # though in some cases it may. Acrobat Reader would not be able to | 127 | # though in some cases it may. Acrobat Reader would not be able to |
| 128 | # recover any of these files any better. | 128 | # recover any of these files any better. |
| 129 | my %recover_failures = (); | 129 | my %recover_failures = (); |
| 130 | -for (1, 7, 16) | 130 | +for (1) |
| 131 | { | 131 | { |
| 132 | $recover_failures{$_} = 1; | 132 | $recover_failures{$_} = 1; |
| 133 | } | 133 | } |
qpdf/qtest/qpdf/bad16-recover.out
| @@ -11,4 +11,10 @@ WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token | @@ -11,4 +11,10 @@ WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token | ||
| 11 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string | 11 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 12 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object | 12 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 13 | WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | 13 | WARNING: bad16.pdf (trailer, offset 779): unexpected EOF |
| 14 | -bad16.pdf: unable to find trailer dictionary while recovering damaged file | 14 | +WARNING: bad16.pdf: unable to find trailer dictionary while recovering damaged file |
| 15 | +/QTest is implicit | ||
| 16 | +/QTest is direct and has type null (2) | ||
| 17 | +/QTest is null | ||
| 18 | +unparse: null | ||
| 19 | +unparseResolved: null | ||
| 20 | +test 1 done |
qpdf/qtest/qpdf/bad7-recover.out
| @@ -3,4 +3,10 @@ WARNING: bad7.pdf (offset 698): expected trailer dictionary | @@ -3,4 +3,10 @@ WARNING: bad7.pdf (offset 698): expected trailer dictionary | ||
| 3 | WARNING: bad7.pdf: Attempting to reconstruct cross-reference table | 3 | WARNING: bad7.pdf: Attempting to reconstruct cross-reference table |
| 4 | WARNING: bad7.pdf (object 2 0, offset 128): expected endobj | 4 | WARNING: bad7.pdf (object 2 0, offset 128): expected endobj |
| 5 | WARNING: bad7.pdf (object 4 0, offset 389): expected endobj | 5 | WARNING: bad7.pdf (object 4 0, offset 389): expected endobj |
| 6 | -bad7.pdf: unable to find trailer dictionary while recovering damaged file | 6 | +WARNING: bad7.pdf: unable to find trailer dictionary while recovering damaged file |
| 7 | +/QTest is implicit | ||
| 8 | +/QTest is direct and has type null (2) | ||
| 9 | +/QTest is null | ||
| 10 | +unparse: null | ||
| 11 | +unparseResolved: null | ||
| 12 | +test 1 done |
qpdf/qtest/qpdf/issue-100.out
| @@ -2,4 +2,12 @@ WARNING: issue-100.pdf: file is damaged | @@ -2,4 +2,12 @@ WARNING: issue-100.pdf: file is damaged | ||
| 2 | WARNING: issue-100.pdf (offset 736): xref not found | 2 | WARNING: issue-100.pdf (offset 736): xref not found |
| 3 | WARNING: issue-100.pdf: Attempting to reconstruct cross-reference table | 3 | WARNING: issue-100.pdf: Attempting to reconstruct cross-reference table |
| 4 | WARNING: issue-100.pdf (trailer, offset 488): stream keyword found in trailer | 4 | WARNING: issue-100.pdf (trailer, offset 488): stream keyword found in trailer |
| 5 | +WARNING: issue-100.pdf (object 5 0, offset 268): unknown token while reading object; treating as string | ||
| 6 | +WARNING: issue-100.pdf (object 5 0, offset 286): unknown token while reading object; treating as string | ||
| 7 | +WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading object; treating as string | ||
| 8 | +WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading object; treating as string | ||
| 9 | +WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as string | ||
| 10 | +WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as string | ||
| 11 | +WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object | ||
| 12 | +WARNING: issue-100.pdf (object 5 0, offset 308): expected endobj | ||
| 5 | qpdf: issue-100.pdf: unable to find /Root dictionary | 13 | qpdf: issue-100.pdf: unable to find /Root dictionary |
qpdf/qtest/qpdf/issue-101.out
| @@ -2,4 +2,6 @@ WARNING: issue-101.pdf: file is damaged | @@ -2,4 +2,6 @@ WARNING: issue-101.pdf: file is damaged | ||
| 2 | WARNING: issue-101.pdf (offset 3526): xref not found | 2 | WARNING: issue-101.pdf (offset 3526): xref not found |
| 3 | WARNING: issue-101.pdf: Attempting to reconstruct cross-reference table | 3 | WARNING: issue-101.pdf: Attempting to reconstruct cross-reference table |
| 4 | WARNING: issue-101.pdf (trailer, offset 1508): stream keyword found in trailer | 4 | WARNING: issue-101.pdf (trailer, offset 1508): stream keyword found in trailer |
| 5 | +WARNING: issue-101.pdf (object 5 0, offset 1242): dictionary ended prematurely; using null as value for last key | ||
| 6 | +WARNING: issue-101.pdf (object 5 0, offset 1242): expected dictionary key but found non-name object; inserting key /QPDFFake1 | ||
| 5 | qpdf: issue-101.pdf: unable to find /Root dictionary | 7 | qpdf: issue-101.pdf: unable to find /Root dictionary |
qpdf/qtest/qpdf/issue-148.out
| @@ -12,4 +12,5 @@ WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): attempting to recov | @@ -12,4 +12,5 @@ WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): attempting to recov | ||
| 12 | WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): recovered stream length: 2 | 12 | WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): recovered stream length: 2 |
| 13 | WARNING: issue-148.pdf (xref stream: object 8 0, offset 85): expected endobj | 13 | WARNING: issue-148.pdf (xref stream: object 8 0, offset 85): expected endobj |
| 14 | WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect header check | 14 | WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect header check |
| 15 | -qpdf: issue-148.pdf: error decoding candidate xref stream while recovering damaged file | 15 | +WARNING: issue-148.pdf: error decoding candidate xref stream while recovering damaged file |
| 16 | +qpdf: issue-148.pdf: unable to find /Root dictionary |
qpdf/qtest/qpdf/issue-202.out
| @@ -5,4 +5,5 @@ WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table | @@ -5,4 +5,5 @@ WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table | ||
| 5 | WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure | 5 | WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure |
| 6 | WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones | 6 | WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones |
| 7 | WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones | 7 | WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones |
| 8 | -qpdf: issue-202.pdf: unable to find trailer dictionary while recovering damaged file | 8 | +WARNING: issue-202.pdf: unable to find trailer dictionary while recovering damaged file |
| 9 | +qpdf: operation succeeded with warnings; resulting file may have some problems |
qpdf/qtest/specific-bugs.test
| @@ -34,7 +34,7 @@ my @bug_tests = ( | @@ -34,7 +34,7 @@ my @bug_tests = ( | ||
| 34 | ["148", "free memory on bad flate", 2], | 34 | ["148", "free memory on bad flate", 2], |
| 35 | ["149", "xref prev pointer loop", 3], | 35 | ["149", "xref prev pointer loop", 3], |
| 36 | ["150", "integer overflow", 2], | 36 | ["150", "integer overflow", 2], |
| 37 | - ["202", "even more deeply nested dictionary", 2], | 37 | + ["202", "even more deeply nested dictionary", 3], |
| 38 | ["263", "empty xref stream", 2], | 38 | ["263", "empty xref stream", 2], |
| 39 | ["335a", "ozz-fuzz-12152", 2], | 39 | ["335a", "ozz-fuzz-12152", 2], |
| 40 | ["335b", "ozz-fuzz-14845", 2], | 40 | ["335b", "ozz-fuzz-14845", 2], |