Commit 4a8c821e3efc961d9c58631bcbbc9125167e41a4

Authored by m-holger
1 parent e62973d2

In QPDF::reconstruct_xref add sanity check for object ids

libqpdf/QPDF.cc
@@ -473,7 +473,7 @@ QPDF::parse(char const* password) @@ -473,7 +473,7 @@ QPDF::parse(char const* password)
473 m->parsed = true; 473 m->parsed = true;
474 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { 474 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
475 // QPDFs created from JSON have an empty xref table and no root object yet. 475 // QPDFs created from JSON have an empty xref table and no root object yet.
476 - throw damagedPDF("", 0, "unable to find page tree"); 476 + throw damagedPDF("", 0, "unable to find page tree");
477 } 477 }
478 } 478 }
479 479
@@ -547,6 +547,9 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -547,6 +547,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
547 547
548 m->file->seek(0, SEEK_END); 548 m->file->seek(0, SEEK_END);
549 qpdf_offset_t eof = m->file->tell(); 549 qpdf_offset_t eof = m->file->tell();
  550 + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
  551 + // scenarios at leat 3 bytes are required.
  552 + auto max_obj_id = eof / 3;
550 m->file->seek(0, SEEK_SET); 553 m->file->seek(0, SEEK_SET);
551 qpdf_offset_t line_start = 0; 554 qpdf_offset_t line_start = 0;
552 // Don't allow very long tokens here during recovery. 555 // Don't allow very long tokens here during recovery.
@@ -564,7 +567,12 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -564,7 +567,12 @@ QPDF::reconstruct_xref(QPDFExc& e)
564 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { 567 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
565 int obj = QUtil::string_to_int(t1.getValue().c_str()); 568 int obj = QUtil::string_to_int(t1.getValue().c_str());
566 int gen = QUtil::string_to_int(t2.getValue().c_str()); 569 int gen = QUtil::string_to_int(t2.getValue().c_str());
567 - insertReconstructedXrefEntry(obj, token_start, gen); 570 + if (obj <= max_obj_id) {
  571 + insertReconstructedXrefEntry(obj, token_start, gen);
  572 + } else {
  573 + warn(damagedPDF(
  574 + "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
  575 + }
568 } 576 }
569 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { 577 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) {
570 QPDFObjectHandle t = readTrailer(); 578 QPDFObjectHandle t = readTrailer();
qpdf/qtest/qpdf/issue-147.out
@@ -3,6 +3,5 @@ WARNING: issue-147.pdf: file is damaged @@ -3,6 +3,5 @@ WARNING: issue-147.pdf: file is damaged
3 WARNING: issue-147.pdf: can't find startxref 3 WARNING: issue-147.pdf: can't find startxref
4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table 4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
5 WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1 5 WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
6 -WARNING: issue-147.pdf (object 62 0, offset 88): expected endobj  
7 -WARNING: issue-147.pdf (trailer, offset 90): invalid /ID in trailer dictionary  
8 -qpdf: issue-147.pdf: invalid password 6 +WARNING: issue-147.pdf: ignoring object with impossibly large id 62
  7 +qpdf: issue-147.pdf: unable to find /Root dictionary