From 37b32f3da2770312f58019591ae3ccdbe44a29eb Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 30 Jun 2025 18:19:37 +0100 Subject: [PATCH] Extend xref reconstruction sanity checks --- libqpdf/QPDF_objects.cc | 6 ++---- libqpdf/QPDF_pages.cc | 4 ++-- libqpdf/qpdf/QPDF_private.hh | 1 - qpdf/qtest/qpdf/issue-143.out | 5 +---- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index 024b843..d6ef576 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -200,7 +200,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) }; m->reconstructed_xref = true; - m->in_xref_reconstruction = true; // We may find more objects, which may contain dangling references. m->fixed_dangling_refs = false; @@ -382,7 +381,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) } } - m->in_xref_reconstruction = false; // We could iterate through the objects looking for streams and try to find objects inside of // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything @@ -1174,7 +1172,7 @@ QPDF::readTrailer() { qpdf_offset_t offset = m->file->tell(); auto [object, empty] = QPDFParser::parse( - *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction); + *m->file, "trailer", m->tokenizer, nullptr, *this, m->reconstructed_xref); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. @@ -1201,7 +1199,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) m->tokenizer, decrypter_ptr, *this, - m->in_xref_reconstruction || m->in_read_xref_stream); + m->reconstructed_xref || m->in_read_xref_stream); ; if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 1b18981..b2907f0 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -166,7 +166,7 @@ QPDF::getAllPagesInternal( // Make a copy of the page. This does the same as shallowCopyPage in // QPDFPageObjectHelper. QTC::TC("qpdf", "QPDF resolve duplicated page object"); - if (!m->in_xref_reconstruction) { + if (!m->reconstructed_xref) { cur_node.warnIfPossible( "kid " + std::to_string(i) + " (from 0) appears more than once in the pages tree;" @@ -193,7 +193,7 @@ QPDF::getAllPagesInternal( kid.replaceKey("/Type", "/Page"_qpdf); ++errors; } - if (m->in_xref_reconstruction && errors > 2) { + if (m->reconstructed_xref && errors > 2) { cur_node.warnIfPossible( "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); m->invalid_page_found = true; diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 8a678f0..8bd964c 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -490,7 +490,6 @@ class QPDF::Members // copied_stream_data_provider is owned by copied_streams CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; bool reconstructed_xref{false}; - bool in_xref_reconstruction{false}; bool in_read_xref_stream{false}; bool fixed_dangling_refs{false}; bool immediate_copy_from{false}; diff --git a/qpdf/qtest/qpdf/issue-143.out b/qpdf/qtest/qpdf/issue-143.out index e6fc61e..40a721c 100644 --- a/qpdf/qtest/qpdf/issue-143.out +++ b/qpdf/qtest/qpdf/issue-143.out @@ -7,10 +7,7 @@ WARNING: issue-143.pdf (xref stream, offset 654): self-referential object stream WARNING: issue-143.pdf: file is damaged WARNING: issue-143.pdf (object 1 0, offset 48): expected n n obj WARNING: issue-143.pdf: Attempting to reconstruct cross-reference table -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake3 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake4 +WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary keys but found non-name objects; ignoring WARNING: issue-143.pdf (object 1 0, offset 21): stream dictionary lacks /Length key WARNING: issue-143.pdf (object 1 0, offset 84): attempting to recover stream length WARNING: issue-143.pdf (object 1 0, offset 84): recovered stream length: 606 -- libgit2 0.21.4