Commit df9633de4b480f5362495159a8a768692e3a829e

Authored by m-holger
1 parent 6e580e24

Add additional sanity check during xref recovery

Do not copy duplicate pages during xref recovery.
libqpdf/QPDF_pages.cc
@@ -146,12 +146,21 @@ QPDF::getAllPagesInternal( @@ -146,12 +146,21 @@ QPDF::getAllPagesInternal(
146 // Make a copy of the page. This does the same as shallowCopyPage in 146 // Make a copy of the page. This does the same as shallowCopyPage in
147 // QPDFPageObjectHelper. 147 // QPDFPageObjectHelper.
148 QTC::TC("qpdf", "QPDF resolve duplicated page object"); 148 QTC::TC("qpdf", "QPDF resolve duplicated page object");
149 - cur_node.warnIfPossible(  
150 - "kid " + std::to_string(i) +  
151 - " (from 0) appears more than once in the pages tree;"  
152 - " creating a new page object as a copy");  
153 - kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());  
154 - seen.add(kid); 149 + if (!m->in_xref_reconstruction) {
  150 + cur_node.warnIfPossible(
  151 + "kid " + std::to_string(i) +
  152 + " (from 0) appears more than once in the pages tree;"
  153 + " creating a new page object as a copy");
  154 + kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
  155 + seen.add(kid);
  156 + } else {
  157 + cur_node.warnIfPossible(
  158 + "kid " + std::to_string(i) +
  159 + " (from 0) appears more than once in the pages tree; ignoring duplicate");
  160 + m->invalid_page_found = true;
  161 + kid = QPDFObjectHandle::newNull();
  162 + continue;
  163 + }
155 } 164 }
156 if (!kid.isDictionaryOfType("/Page")) { 165 if (!kid.isDictionaryOfType("/Page")) {
157 kid.warnIfPossible("/Type key should be /Page but is not; overriding"); 166 kid.warnIfPossible("/Type key should be /Page but is not; overriding");
manual/release-notes.rst
@@ -34,6 +34,14 @@ more detail. @@ -34,6 +34,14 @@ more detail.
34 34
35 - Fix Android build issues. 35 - Fix Android build issues.
36 36
  37 + - Other enhancements
  38 +
  39 + - More sanity checks have been added when files with damaged xref tables
  40 + are recovered in order to avoid long runtimes and large memory use.
  41 + Objects with with very large arrays or dictionaries (more than 5000
  42 + elements) and duplicate pages are ignored as they are almost certainly
  43 + invalid.
  44 +
37 12.1.0: April 6, 2025 45 12.1.0: April 6, 2025
38 - Upcoming C++ Version Change 46 - Upcoming C++ Version Change
39 47