Commit c6fbeb871734f42081d02f7a9e92a07b6c66dd0c

Authored by m-holger
Committed by GitHub
2 parents 661db743 37b32f3d

Merge pull request #1482 from m-holger/fuzz

Extend xref reconstruction sanity checks
libqpdf/QPDF_objects.cc
@@ -200,7 +200,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) @@ -200,7 +200,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref)
200 }; 200 };
201 201
202 m->reconstructed_xref = true; 202 m->reconstructed_xref = true;
203 - m->in_xref_reconstruction = true;  
204 // We may find more objects, which may contain dangling references. 203 // We may find more objects, which may contain dangling references.
205 m->fixed_dangling_refs = false; 204 m->fixed_dangling_refs = false;
206 205
@@ -382,7 +381,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) @@ -382,7 +381,6 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref)
382 } 381 }
383 } 382 }
384 383
385 - m->in_xref_reconstruction = false;  
386 // We could iterate through the objects looking for streams and try to find objects inside of 384 // We could iterate through the objects looking for streams and try to find objects inside of
387 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors 385 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
388 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything 386 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
@@ -1174,7 +1172,7 @@ QPDF::readTrailer() @@ -1174,7 +1172,7 @@ QPDF::readTrailer()
1174 { 1172 {
1175 qpdf_offset_t offset = m->file->tell(); 1173 qpdf_offset_t offset = m->file->tell();
1176 auto [object, empty] = QPDFParser::parse( 1174 auto [object, empty] = QPDFParser::parse(
1177 - *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction); 1175 + *m->file, "trailer", m->tokenizer, nullptr, *this, m->reconstructed_xref);
1178 if (empty) { 1176 if (empty) {
1179 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1177 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1180 // actual PDF files and Adobe Reader appears to ignore them. 1178 // actual PDF files and Adobe Reader appears to ignore them.
@@ -1201,7 +1199,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) @@ -1201,7 +1199,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
1201 m->tokenizer, 1199 m->tokenizer,
1202 decrypter_ptr, 1200 decrypter_ptr,
1203 *this, 1201 *this,
1204 - m->in_xref_reconstruction || m->in_read_xref_stream); 1202 + m->reconstructed_xref || m->in_read_xref_stream);
1205 ; 1203 ;
1206 if (empty) { 1204 if (empty) {
1207 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1205 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
libqpdf/QPDF_pages.cc
@@ -166,7 +166,7 @@ QPDF::getAllPagesInternal( @@ -166,7 +166,7 @@ QPDF::getAllPagesInternal(
166 // Make a copy of the page. This does the same as shallowCopyPage in 166 // Make a copy of the page. This does the same as shallowCopyPage in
167 // QPDFPageObjectHelper. 167 // QPDFPageObjectHelper.
168 QTC::TC("qpdf", "QPDF resolve duplicated page object"); 168 QTC::TC("qpdf", "QPDF resolve duplicated page object");
169 - if (!m->in_xref_reconstruction) { 169 + if (!m->reconstructed_xref) {
170 cur_node.warnIfPossible( 170 cur_node.warnIfPossible(
171 "kid " + std::to_string(i) + 171 "kid " + std::to_string(i) +
172 " (from 0) appears more than once in the pages tree;" 172 " (from 0) appears more than once in the pages tree;"
@@ -193,7 +193,7 @@ QPDF::getAllPagesInternal( @@ -193,7 +193,7 @@ QPDF::getAllPagesInternal(
193 kid.replaceKey("/Type", "/Page"_qpdf); 193 kid.replaceKey("/Type", "/Page"_qpdf);
194 ++errors; 194 ++errors;
195 } 195 }
196 - if (m->in_xref_reconstruction && errors > 2) { 196 + if (m->reconstructed_xref && errors > 2) {
197 cur_node.warnIfPossible( 197 cur_node.warnIfPossible(
198 "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); 198 "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
199 m->invalid_page_found = true; 199 m->invalid_page_found = true;
libqpdf/qpdf/QPDF_private.hh
@@ -490,7 +490,6 @@ class QPDF::Members @@ -490,7 +490,6 @@ class QPDF::Members
490 // copied_stream_data_provider is owned by copied_streams 490 // copied_stream_data_provider is owned by copied_streams
491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; 491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
492 bool reconstructed_xref{false}; 492 bool reconstructed_xref{false};
493 - bool in_xref_reconstruction{false};  
494 bool in_read_xref_stream{false}; 493 bool in_read_xref_stream{false};
495 bool fixed_dangling_refs{false}; 494 bool fixed_dangling_refs{false};
496 bool immediate_copy_from{false}; 495 bool immediate_copy_from{false};
qpdf/qtest/qpdf/issue-143.out
@@ -7,10 +7,7 @@ WARNING: issue-143.pdf (xref stream, offset 654): self-referential object stream @@ -7,10 +7,7 @@ WARNING: issue-143.pdf (xref stream, offset 654): self-referential object stream
7 WARNING: issue-143.pdf: file is damaged 7 WARNING: issue-143.pdf: file is damaged
8 WARNING: issue-143.pdf (object 1 0, offset 48): expected n n obj 8 WARNING: issue-143.pdf (object 1 0, offset 48): expected n n obj
9 WARNING: issue-143.pdf: Attempting to reconstruct cross-reference table 9 WARNING: issue-143.pdf: Attempting to reconstruct cross-reference table
10 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake1  
11 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake2  
12 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake3  
13 -WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary key but found non-name object; inserting key /QPDFFake4 10 +WARNING: issue-143.pdf (object 1 0, offset 24): expected dictionary keys but found non-name objects; ignoring
14 WARNING: issue-143.pdf (object 1 0, offset 21): stream dictionary lacks /Length key 11 WARNING: issue-143.pdf (object 1 0, offset 21): stream dictionary lacks /Length key
15 WARNING: issue-143.pdf (object 1 0, offset 84): attempting to recover stream length 12 WARNING: issue-143.pdf (object 1 0, offset 84): attempting to recover stream length
16 WARNING: issue-143.pdf (object 1 0, offset 84): recovered stream length: 606 13 WARNING: issue-143.pdf (object 1 0, offset 84): recovered stream length: 606