Commit ad3bac2c26e8ef0213daab15359d71df7f170ccb

Authored by m-holger
Committed by GitHub
2 parents c7779175 df9633de

Merge pull request #1446 from m-holger/fuzz

Add additional sanity check during xref recovery
libqpdf/QPDF_pages.cc
@@ -146,12 +146,21 @@ QPDF::getAllPagesInternal( @@ -146,12 +146,21 @@ QPDF::getAllPagesInternal(
146 // Make a copy of the page. This does the same as shallowCopyPage in 146 // Make a copy of the page. This does the same as shallowCopyPage in
147 // QPDFPageObjectHelper. 147 // QPDFPageObjectHelper.
148 QTC::TC("qpdf", "QPDF resolve duplicated page object"); 148 QTC::TC("qpdf", "QPDF resolve duplicated page object");
149 - cur_node.warnIfPossible(  
150 - "kid " + std::to_string(i) +  
151 - " (from 0) appears more than once in the pages tree;"  
152 - " creating a new page object as a copy");  
153 - kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());  
154 - seen.add(kid); 149 + if (!m->in_xref_reconstruction) {
  150 + cur_node.warnIfPossible(
  151 + "kid " + std::to_string(i) +
  152 + " (from 0) appears more than once in the pages tree;"
  153 + " creating a new page object as a copy");
  154 + kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
  155 + seen.add(kid);
  156 + } else {
  157 + cur_node.warnIfPossible(
  158 + "kid " + std::to_string(i) +
  159 + " (from 0) appears more than once in the pages tree; ignoring duplicate");
  160 + m->invalid_page_found = true;
  161 + kid = QPDFObjectHandle::newNull();
  162 + continue;
  163 + }
155 } 164 }
156 if (!kid.isDictionaryOfType("/Page")) { 165 if (!kid.isDictionaryOfType("/Page")) {
157 kid.warnIfPossible("/Type key should be /Page but is not; overriding"); 166 kid.warnIfPossible("/Type key should be /Page but is not; overriding");
manual/release-notes.rst
@@ -41,6 +41,14 @@ more detail. @@ -41,6 +41,14 @@ more detail.
41 41
42 - Fix Android build issues. 42 - Fix Android build issues.
43 43
  44 + - Other enhancements
  45 +
  46 + - More sanity checks have been added when files with damaged xref tables
  47 + are recovered in order to avoid long runtimes and large memory use.
  48 + Objects with with very large arrays or dictionaries (more than 5000
  49 + elements) and duplicate pages are ignored as they are almost certainly
  50 + invalid.
  51 +
44 12.1.0: April 6, 2025 52 12.1.0: April 6, 2025
45 - Upcoming C++ Version Change 53 - Upcoming C++ Version Change
46 54