Commit 37b32f3da2770312f58019591ae3ccdbe44a29eb

Authored by m-holger
1 parent 661db743

Extend xref reconstruction sanity checks

After xref reconstruction treat the input file as suspect and apply sanity checks to all subsequent object reads.

Remove `in_xref_reconstruction` flag and update references to use `reconstructed_xref` for simplified state management during xref processing. Adjust warnings for invalid dictionary keys in test output.
libqpdf/QPDF_objects.cc
... ... @@ -200,7 +200,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref)
200 200 };
201 201  
202 202 m->reconstructed_xref = true;
203   - m->in_xref_reconstruction = true;
204 203 // We may find more objects, which may contain dangling references.
205 204 m->fixed_dangling_refs = false;
206 205  
... ... @@ -382,7 +381,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref)
382 381 }
383 382 }
384 383  
385   - m->in_xref_reconstruction = false;
386 384 // We could iterate through the objects looking for streams and try to find objects inside of
387 385 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
388 386 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
... ... @@ -1174,7 +1172,7 @@ QPDF::readTrailer()
1174 1172 {
1175 1173 qpdf_offset_t offset = m->file->tell();
1176 1174 auto [object, empty] = QPDFParser::parse(
1177   - *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction);
  1175 + *m->file, "trailer", m->tokenizer, nullptr, *this, m->reconstructed_xref);
1178 1176 if (empty) {
1179 1177 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1180 1178 // actual PDF files and Adobe Reader appears to ignore them.
... ... @@ -1201,7 +1199,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
1201 1199 m->tokenizer,
1202 1200 decrypter_ptr,
1203 1201 *this,
1204   - m->in_xref_reconstruction || m->in_read_xref_stream);
  1202 + m->reconstructed_xref || m->in_read_xref_stream);
1205 1203 ;
1206 1204 if (empty) {
1207 1205 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
... ...
libqpdf/QPDF_pages.cc
... ... @@ -166,7 +166,7 @@ QPDF::getAllPagesInternal(
166 166 // Make a copy of the page. This does the same as shallowCopyPage in
167 167 // QPDFPageObjectHelper.
168 168 QTC::TC("qpdf", "QPDF resolve duplicated page object");
169   - if (!m->in_xref_reconstruction) {
  169 + if (!m->reconstructed_xref) {
170 170 cur_node.warnIfPossible(
171 171 "kid " + std::to_string(i) +
172 172 " (from 0) appears more than once in the pages tree;"
... ... @@ -193,7 +193,7 @@ QPDF::getAllPagesInternal(
193 193 kid.replaceKey("/Type", "/Page"_qpdf);
194 194 ++errors;
195 195 }
196   - if (m->in_xref_reconstruction && errors > 2) {
  196 + if (m->reconstructed_xref && errors > 2) {
197 197 cur_node.warnIfPossible(
198 198 "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
199 199 m->invalid_page_found = true;
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -490,7 +490,6 @@ class QPDF::Members
490 490 // copied_stream_data_provider is owned by copied_streams
491 491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
492 492 bool reconstructed_xref{false};
493   - bool in_xref_reconstruction{false};
494 493 bool in_read_xref_stream{false};
495 494 bool fixed_dangling_refs{false};
496 495 bool immediate_copy_from{false};
... ...
qpdf/qtest/qpdf/issue-143.out
... ... @@ -7,10 +7,7 @@ WARNING: issue-143.pdf (xref stream, offset 654): self-referential object stream
7 7 WARNING: issue-143.pdf: file is damaged
8 8 WARNING: issue-143.pdf (object 1 0, offset 48): expected n n obj
9 9 WARNING: issue-143.pdf: Attempting to reconstruct cross-reference table
10   -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake1
11   -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake2
12   -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake3
13   -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake4
  10 +WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary keys but found non-name objects; ignoring
14 11 WARNING: issue-143.pdf (object 1 0, offset 21): stream dictionary lacks /Length key
15 12 WARNING: issue-143.pdf (object 1 0, offset 84): attempting to recover stream length
16 13 WARNING: issue-143.pdf (object 1 0, offset 84): recovered stream length: 606
... ...