Commit e324d36b95aad64d6d53cad74d44e7ac32242d0a

Authored by m-holger
Committed by GitHub
2 parents aa583f29 ca3ea2e3

Merge pull request #1343 from m-holger/i1335a

Refine xref reconstruction (fixes #1335)
libqpdf/QPDF.cc
... ... @@ -608,6 +608,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
608 608  
609 609 if (!m->trailer) {
610 610 qpdf_offset_t max_offset{0};
  611 + size_t max_size{0};
611 612 // If there are any xref streams, take the last one to appear.
612 613 for (auto const& iter: m->xref_table) {
613 614 auto entry = iter.second;
... ... @@ -623,7 +624,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
623 624 continue;
624 625 }
625 626 auto offset = entry.getOffset();
626   - if (offset > max_offset) {
  627 + auto size = oh.getDict().getKey("/Size").getUIntValueAsUInt();
  628 + if (size > max_size || (size == max_size && offset > max_offset)) {
627 629 max_offset = offset;
628 630 setTrailer(oh.getDict());
629 631 }
... ... @@ -633,13 +635,35 @@ QPDF::reconstruct_xref(QPDFExc& e)
633 635 try {
634 636 read_xref(max_offset);
635 637 } catch (std::exception&) {
636   - throw damagedPDF(
637   - "", 0, "error decoding candidate xref stream while recovering damaged file");
  638 + warn(damagedPDF(
  639 + "", 0, "error decoding candidate xref stream while recovering damaged file"));
638 640 }
639 641 QTC::TC("qpdf", "QPDF recover xref stream");
640 642 }
641 643 }
642 644  
  645 + if (!m->trailer || (!m->parsed && !m->trailer.getKey("/Root").isDictionary())) {
  646 + // Try to find a Root dictionary. As a quick fix try the one with the highest object id.
  647 + QPDFObjectHandle root;
  648 + for (auto const& iter: m->obj_cache) {
  649 + try {
  650 + if (QPDFObjectHandle(iter.second.object).isDictionaryOfType("/Catalog")) {
  651 + root = iter.second.object;
  652 + }
  653 + } catch (std::exception&) {
  654 + continue;
  655 + }
  656 + }
  657 + if (root) {
  658 + if (!m->trailer) {
  659 + warn(damagedPDF(
  660 + "", 0, "unable to find trailer dictionary while recovering damaged file"));
  661 + m->trailer = QPDFObjectHandle::newDictionary();
  662 + }
  663 + m->trailer.replaceKey("/Root", root);
  664 + }
  665 + }
  666 +
643 667 if (!m->trailer) {
644 668 // We could check the last encountered object to see if it was an xref stream. If so, we
645 669 // could try to get the trailer from there. This may make it possible to recover files with
... ...
qpdf/qtest/error-condition.test
... ... @@ -127,7 +127,7 @@ $n_tests += @badfiles + 11;
127 127 # though in some cases it may. Acrobat Reader would not be able to
128 128 # recover any of these files any better.
129 129 my %recover_failures = ();
130   -for (1, 7, 16)
  130 +for (1)
131 131 {
132 132 $recover_failures{$_} = 1;
133 133 }
... ...
qpdf/qtest/qpdf/bad16-recover.out
... ... @@ -11,4 +11,10 @@ WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token
11 11 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string
12 12 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object
13 13 WARNING: bad16.pdf (trailer, offset 779): unexpected EOF
14   -bad16.pdf: unable to find trailer dictionary while recovering damaged file
  14 +WARNING: bad16.pdf: unable to find trailer dictionary while recovering damaged file
  15 +/QTest is implicit
  16 +/QTest is direct and has type null (2)
  17 +/QTest is null
  18 +unparse: null
  19 +unparseResolved: null
  20 +test 1 done
... ...
qpdf/qtest/qpdf/bad7-recover.out
... ... @@ -3,4 +3,10 @@ WARNING: bad7.pdf (offset 698): expected trailer dictionary
3 3 WARNING: bad7.pdf: Attempting to reconstruct cross-reference table
4 4 WARNING: bad7.pdf (object 2 0, offset 128): expected endobj
5 5 WARNING: bad7.pdf (object 4 0, offset 389): expected endobj
6   -bad7.pdf: unable to find trailer dictionary while recovering damaged file
  6 +WARNING: bad7.pdf: unable to find trailer dictionary while recovering damaged file
  7 +/QTest is implicit
  8 +/QTest is direct and has type null (2)
  9 +/QTest is null
  10 +unparse: null
  11 +unparseResolved: null
  12 +test 1 done
... ...
qpdf/qtest/qpdf/issue-100.out
... ... @@ -2,4 +2,12 @@ WARNING: issue-100.pdf: file is damaged
2 2 WARNING: issue-100.pdf (offset 736): xref not found
3 3 WARNING: issue-100.pdf: Attempting to reconstruct cross-reference table
4 4 WARNING: issue-100.pdf (trailer, offset 488): stream keyword found in trailer
  5 +WARNING: issue-100.pdf (object 5 0, offset 268): unknown token while reading object; treating as string
  6 +WARNING: issue-100.pdf (object 5 0, offset 286): unknown token while reading object; treating as string
  7 +WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading object; treating as string
  8 +WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading object; treating as string
  9 +WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as string
  10 +WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as string
  11 +WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object
  12 +WARNING: issue-100.pdf (object 5 0, offset 308): expected endobj
5 13 qpdf: issue-100.pdf: unable to find /Root dictionary
... ...
qpdf/qtest/qpdf/issue-101.out
... ... @@ -2,4 +2,6 @@ WARNING: issue-101.pdf: file is damaged
2 2 WARNING: issue-101.pdf (offset 3526): xref not found
3 3 WARNING: issue-101.pdf: Attempting to reconstruct cross-reference table
4 4 WARNING: issue-101.pdf (trailer, offset 1508): stream keyword found in trailer
  5 +WARNING: issue-101.pdf (object 5 0, offset 1242): dictionary ended prematurely; using null as value for last key
  6 +WARNING: issue-101.pdf (object 5 0, offset 1242): expected dictionary key but found non-name object; inserting key /QPDFFake1
5 7 qpdf: issue-101.pdf: unable to find /Root dictionary
... ...
qpdf/qtest/qpdf/issue-148.out
... ... @@ -12,4 +12,5 @@ WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): attempting to recov
12 12 WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): recovered stream length: 2
13 13 WARNING: issue-148.pdf (xref stream: object 8 0, offset 85): expected endobj
14 14 WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect header check
15   -qpdf: issue-148.pdf: error decoding candidate xref stream while recovering damaged file
  15 +WARNING: issue-148.pdf: error decoding candidate xref stream while recovering damaged file
  16 +qpdf: issue-148.pdf: unable to find /Root dictionary
... ...
qpdf/qtest/qpdf/issue-202.out
... ... @@ -5,4 +5,5 @@ WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table
5 5 WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure
6 6 WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones
7 7 WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones
8   -qpdf: issue-202.pdf: unable to find trailer dictionary while recovering damaged file
  8 +WARNING: issue-202.pdf: unable to find trailer dictionary while recovering damaged file
  9 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/specific-bugs.test
... ... @@ -34,7 +34,7 @@ my @bug_tests = (
34 34 ["148", "free memory on bad flate", 2],
35 35 ["149", "xref prev pointer loop", 3],
36 36 ["150", "integer overflow", 2],
37   - ["202", "even more deeply nested dictionary", 2],
  37 + ["202", "even more deeply nested dictionary", 3],
38 38 ["263", "empty xref stream", 2],
39 39 ["335a", "ozz-fuzz-12152", 2],
40 40 ["335b", "ozz-fuzz-14845", 2],
... ...