Commit df9633de4b480f5362495159a8a768692e3a829e

Authored by m-holger
1 parent 6e580e24

Add additional sanity check during xref recovery

Do not copy duplicate pages during xref recovery.
libqpdf/QPDF_pages.cc
... ... @@ -146,12 +146,21 @@ QPDF::getAllPagesInternal(
146 146 // Make a copy of the page. This does the same as shallowCopyPage in
147 147 // QPDFPageObjectHelper.
148 148 QTC::TC("qpdf", "QPDF resolve duplicated page object");
149   - cur_node.warnIfPossible(
150   - "kid " + std::to_string(i) +
151   - " (from 0) appears more than once in the pages tree;"
152   - " creating a new page object as a copy");
153   - kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
154   - seen.add(kid);
  149 + if (!m->in_xref_reconstruction) {
  150 + cur_node.warnIfPossible(
  151 + "kid " + std::to_string(i) +
  152 + " (from 0) appears more than once in the pages tree;"
  153 + " creating a new page object as a copy");
  154 + kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
  155 + seen.add(kid);
  156 + } else {
  157 + cur_node.warnIfPossible(
  158 + "kid " + std::to_string(i) +
  159 + " (from 0) appears more than once in the pages tree; ignoring duplicate");
  160 + m->invalid_page_found = true;
  161 + kid = QPDFObjectHandle::newNull();
  162 + continue;
  163 + }
155 164 }
156 165 if (!kid.isDictionaryOfType("/Page")) {
157 166 kid.warnIfPossible("/Type key should be /Page but is not; overriding");
... ...
manual/release-notes.rst
... ... @@ -34,6 +34,14 @@ more detail.
34 34  
35 35 - Fix Android build issues.
36 36  
  37 + - Other enhancements
  38 +
  39 + - More sanity checks have been added when files with damaged xref tables
  40 + are recovered in order to avoid long runtimes and large memory use.
  41 + Objects with with very large arrays or dictionaries (more than 5000
  42 + elements) and duplicate pages are ignored as they are almost certainly
  43 + invalid.
  44 +
37 45 12.1.0: April 6, 2025
38 46 - Upcoming C++ Version Change
39 47  
... ...