Commit 2994f9cf4cc45e33406de34d4bce45ca491df98e
1 parent
8a24287c
Attempt to find xref streams during recovery (fixes #1103)
Showing
13 changed files
with
74 additions
and
2 deletions
ChangeLog
| 1 | +2024-01-06 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * When recovering a file's xref table, attempt to find xref | |
| 4 | + streams if a traditional trailer dictionary is not found. Fixes | |
| 5 | + #1103. | |
| 6 | + | |
| 1 | 7 | 2024-01-05 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | |
| 3 | 9 | * Add --set-page-labels command-line argument and supporting API. | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -580,6 +580,38 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 580 | 580 | m->deleted_objects.clear(); |
| 581 | 581 | |
| 582 | 582 | if (!m->trailer.isInitialized()) { |
| 583 | + qpdf_offset_t max_offset{0}; | |
| 584 | + // If there are any xref streams, take the last one to appear. | |
| 585 | + for (auto const& iter: m->xref_table) { | |
| 586 | + auto entry = iter.second; | |
| 587 | + if (entry.getType() != 1) { | |
| 588 | + continue; | |
| 589 | + } | |
| 590 | + auto oh = getObjectByObjGen(iter.first); | |
| 591 | + try { | |
| 592 | + if (!oh.isStreamOfType("/XRef")) { | |
| 593 | + continue; | |
| 594 | + } | |
| 595 | + } catch (std::exception&) { | |
| 596 | + continue; | |
| 597 | + } | |
| 598 | + auto offset = entry.getOffset(); | |
| 599 | + if (offset > max_offset) { | |
| 600 | + max_offset = offset; | |
| 601 | + setTrailer(oh.getDict()); | |
| 602 | + } | |
| 603 | + } | |
| 604 | + if (max_offset > 0) { | |
| 605 | + try { | |
| 606 | + read_xref(max_offset); | |
| 607 | + } catch (std::exception&) { | |
| 608 | + throw damagedPDF("", 0, "error decoding candidate xref stream while recovering damaged file"); | |
| 609 | + } | |
| 610 | + QTC::TC("qpdf", "QPDF recover xref stream"); | |
| 611 | + } | |
| 612 | + } | |
| 613 | + | |
| 614 | + if (!m->trailer.isInitialized()) { | |
| 583 | 615 | // We could check the last encountered object to see if it was an xref stream. If so, we |
| 584 | 616 | // could try to get the trailer from there. This may make it possible to recover files with |
| 585 | 617 | // bad startxref pointers even when they have object streams. | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -67,6 +67,11 @@ Planned changes for future 12.x (subject to change): |
| 67 | 67 | |
| 68 | 68 | - ``QPDFPageLabelDocumentHelper::pageLabelDict`` |
| 69 | 69 | |
| 70 | + - Improve file recovery logic to better handle files with | |
| 71 | + cross-reference streams. This should enable qpdf to recover some | |
| 72 | + files that it would previously have reported "unable to find | |
| 73 | + trailer dictionary." | |
| 74 | + | |
| 70 | 75 | 11.7.0: December 24, 2023 |
| 71 | 76 | - Bug fixes: |
| 72 | 77 | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -689,3 +689,4 @@ QPDFPageObjectHelper used fallback without copying 0 |
| 689 | 689 | QPDF skipping cache for known unchecked object 0 |
| 690 | 690 | QPDF fix dangling triggered xref reconstruction 0 |
| 691 | 691 | QPDFPageDocumentHelper flatten resources missing or invalid 0 |
| 692 | +QPDF recover xref stream 0 | ... | ... |
qpdf/qtest/object-stream.test
| ... | ... | @@ -16,7 +16,7 @@ cleanup(); |
| 16 | 16 | |
| 17 | 17 | my $td = new TestDriver('object-stream'); |
| 18 | 18 | |
| 19 | -my $n_tests = 3 + (36 * 4) + (12 * 2); | |
| 19 | +my $n_tests = 5 + (36 * 4) + (12 * 2); | |
| 20 | 20 | my $n_compare_pdfs = 36; |
| 21 | 21 | |
| 22 | 22 | for (my $n = 16; $n <= 19; ++$n) |
| ... | ... | @@ -87,5 +87,15 @@ $td->runtest("check file", |
| 87 | 87 | {$td->FILE => "gen1.qdf"}); |
| 88 | 88 | |
| 89 | 89 | |
| 90 | +# Recover a file with xref streams | |
| 91 | +$td->runtest("recover file with xref stream", | |
| 92 | + {$td->COMMAND => "qpdf --static-id --compress-streams=n" . | |
| 93 | + " recover-xref-stream.pdf a.pdf"}, | |
| 94 | + {$td->FILE => "recover-xref-stream.out", $td->EXIT_STATUS => 3}, | |
| 95 | + $td->NORMALIZE_NEWLINES); | |
| 96 | +$td->runtest("check file", | |
| 97 | + {$td->FILE => "a.pdf"}, | |
| 98 | + {$td->FILE => "recover-xref-stream-recovered.pdf"}); | |
| 99 | + | |
| 90 | 100 | cleanup(); |
| 91 | 101 | $td->report(calc_ntests($n_tests, $n_compare_pdfs)); | ... | ... |
qpdf/qtest/qpdf/bad7-recover.out
| 1 | 1 | WARNING: bad7.pdf: file is damaged |
| 2 | 2 | WARNING: bad7.pdf (offset 698): expected trailer dictionary |
| 3 | 3 | WARNING: bad7.pdf: Attempting to reconstruct cross-reference table |
| 4 | +WARNING: bad7.pdf (object 2 0, offset 128): expected endobj | |
| 5 | +WARNING: bad7.pdf (object 4 0, offset 389): expected endobj | |
| 4 | 6 | bad7.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |
qpdf/qtest/qpdf/issue-146.out
| ... | ... | @@ -2,4 +2,7 @@ WARNING: issue-146.pdf: file is damaged |
| 2 | 2 | WARNING: issue-146.pdf: can't find startxref |
| 3 | 3 | WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table |
| 4 | 4 | WARNING: issue-146.pdf (trailer, offset 695): ignoring excessively deeply nested data structure |
| 5 | +WARNING: issue-146.pdf (object 1 0, offset 92): expected endobj | |
| 6 | +WARNING: issue-146.pdf (object 7 0, offset 146): unknown token while reading object; treating as string | |
| 7 | +WARNING: issue-146.pdf (object 7 0, offset 168): expected endobj | |
| 5 | 8 | qpdf: issue-146.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |
qpdf/qtest/qpdf/issue-148.out
| ... | ... | @@ -7,4 +7,9 @@ WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: s |
| 7 | 7 | WARNING: issue-148.pdf: file is damaged |
| 8 | 8 | WARNING: issue-148.pdf (offset 73): getStreamData called on unfilterable stream |
| 9 | 9 | WARNING: issue-148.pdf: Attempting to reconstruct cross-reference table |
| 10 | -qpdf: issue-148.pdf: unable to find trailer dictionary while recovering damaged file | |
| 10 | +WARNING: issue-148.pdf (xref stream: object 8 0, offset 26): stream dictionary lacks /Length key | |
| 11 | +WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): attempting to recover stream length | |
| 12 | +WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): recovered stream length: 2 | |
| 13 | +WARNING: issue-148.pdf (xref stream: object 8 0, offset 85): expected endobj | |
| 14 | +WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect header check | |
| 15 | +qpdf: issue-148.pdf: error decoding candidate xref stream while recovering damaged file | ... | ... |
qpdf/qtest/qpdf/issue-150.out
| ... | ... | @@ -2,4 +2,5 @@ WARNING: issue-150.pdf: can't find PDF header |
| 2 | 2 | WARNING: issue-150.pdf: file is damaged |
| 3 | 3 | WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer |
| 4 | 4 | WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table |
| 5 | +WARNING: issue-150.pdf (object 8 0): object has offset 0 | |
| 5 | 6 | qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |
qpdf/qtest/qpdf/issue-202.out
| ... | ... | @@ -3,4 +3,6 @@ WARNING: issue-202.pdf: file is damaged |
| 3 | 3 | WARNING: issue-202.pdf (offset 54769): expected trailer dictionary |
| 4 | 4 | WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table |
| 5 | 5 | WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure |
| 6 | +WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones | |
| 7 | +WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones | |
| 6 | 8 | qpdf: issue-202.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |
qpdf/qtest/qpdf/recover-xref-stream-recovered.pdf
0 โ 100644
No preview for this file type
qpdf/qtest/qpdf/recover-xref-stream.out
0 โ 100644
| 1 | +WARNING: recover-xref-stream.pdf: file is damaged | |
| 2 | +WARNING: recover-xref-stream.pdf: can't find startxref | |
| 3 | +WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table | |
| 4 | +WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15) | |
| 5 | +qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/recover-xref-stream.pdf
0 โ 100644
No preview for this file type