Commit 7427c7fe84a3d8d26fe19c49c00856048ecd216d

Authored by m-holger
1 parent 5c7af348

Add error handling for missing or invalid Resources and invalid or duplicate ann…

…otations in page objects

- Repair invalid or missing Resources in page object trees with warnings
- Remove invalid Annots arrays with warnings
- Warn about duplicate annotations
- Update test cases and output to reflect new error handling.
- Improve robustness for annotation and resource validation.
libqpdf/QPDFPageDocumentHelper.cc
... ... @@ -66,7 +66,7 @@ QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_fla
66 66 for (auto& ph: getAllPages()) {
67 67 QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
68 68 if (!resources.isDictionary()) {
69   - QTC::TC("qpdf", "QPDFPageDocumentHelper flatten resources missing or invalid");
  69 + // As of #1521, this should be impossible unless a user inserted an invalid page.
70 70 resources = ph.getObjectHandle().replaceKeyAndGetNew(
71 71 "/Resources", QPDFObjectHandle::newDictionary());
72 72 }
... ...
libqpdf/QPDF_pages.cc
... ... @@ -158,10 +158,38 @@ QPDF::getAllPagesInternal(
158 158 QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
159 159 ++errors;
160 160 }
161   - if (!resources && !kid.getKey("/Resources").isDictionary()) {
162   - // Consider adding an information message
163   - ++errors;
  161 + if (!resources) {
  162 + auto res = kid.getKey("/Resources");
  163 +
  164 + if (!res.isDictionary()) {
  165 + ++errors;
  166 + kid.warn(
  167 + "kid " + std::to_string(i) +
  168 + " (from 0) Resources is missing or invalid; repairing");
  169 + kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary());
  170 + }
164 171 }
  172 + auto annots = kid.getKey("/Annots");
  173 + if (!annots.null()) {
  174 + if (!annots.isArray()) {
  175 + kid.warn(
  176 + "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing");
  177 + kid.removeKey("/Annots");
  178 + ++errors;
  179 + } else {
  180 + QPDFObjGen::set seen_annots;
  181 + for (auto& annot: annots.as_array()) {
  182 + if (!seen_annots.add(annot)) {
  183 + kid.warn(
  184 + "kid " + std::to_string(i) +
  185 + " (from 0) Annots has duplicate entry for annotation " +
  186 + annot.id_gen().unparse(' '));
  187 + ++errors;
  188 + }
  189 + }
  190 + }
  191 + }
  192 +
165 193 if (!seen.add(kid)) {
166 194 // Make a copy of the page. This does the same as shallowCopyPage in
167 195 // QPDFPageObjectHelper.
... ...
qpdf/qpdf.testcov
... ... @@ -674,7 +674,6 @@ QPDFPageObjectHelper copied fallback 0
674 674 QPDFPageObjectHelper used fallback without copying 0
675 675 QPDF skipping cache for known unchecked object 0
676 676 QPDF fix dangling triggered xref reconstruction 0
677   -QPDFPageDocumentHelper flatten resources missing or invalid 0
678 677 QPDF recover xref stream 0
679 678 QPDFJob misplaced page range 0
680 679 QPDFJob duplicated range 0
... ...
qpdf/qtest/form-xobject.test
... ... @@ -96,7 +96,7 @@ $td->runtest("overlay on page with no resources",
96 96 {$td->COMMAND =>
97 97 "qpdf --deterministic-id page-with-no-resources.pdf" .
98 98 " --overlay minimal.pdf -- a.pdf"},
99   - {$td->STRING => "", $td->EXIT_STATUS => 0},
  99 + {$td->FILE => "page-with-no-resources.out", $td->EXIT_STATUS => 3},
100 100 $td->NORMALIZE_NEWLINES);
101 101 $td->runtest("check overlay with no resources output",
102 102 {$td->COMMAND => "qpdf-test-compare a.pdf overlay-no-resources.pdf"},
... ...
qpdf/qtest/many-nulls.test
... ... @@ -27,7 +27,7 @@ $td->runtest("compare output",
27 27 {$td->FILE => "many-nulls.pdf", $td->EXIT_STATUS => 0});
28 28 $td->runtest("run check file",
29 29 {$td->COMMAND => "qpdf --check a.pdf"},
30   - {$td->FILE => "many-nulls.out", $td->EXIT_STATUS => 0},
  30 + {$td->FILE => "many-nulls.out", $td->EXIT_STATUS => 3},
31 31 $td->NORMALIZE_NEWLINES);
32 32 $td->runtest("copy sparse array",
33 33 {$td->COMMAND => "test_driver 97 many-nulls.pdf"},
... ...
qpdf/qtest/page-errors.test
... ... @@ -29,7 +29,7 @@ $td->runtest("check output",
29 29 {$td->FILE => "page-missing-mediabox-out.pdf"});
30 30 $td->runtest("handle page with inherited MediaBox",
31 31 {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- a.pdf"},
32   - {$td->STRING => "", $td->EXIT_STATUS => 0},
  32 + {$td->FILE => "page-inherit-mediabox.out", $td->EXIT_STATUS => 0},
33 33 $td->NORMALIZE_NEWLINES);
34 34 $td->runtest("check output",
35 35 {$td->COMMAND => "qpdf-test-compare a.pdf page-inherit-mediabox-out.pdf"},
... ...
qpdf/qtest/qpdf/annotation-no-resources-warn.out 0 → 100644
  1 +WARNING: annotation-no-resources.pdf, object 7 0 at offset 1526: kid 0 (from 0) Resources is missing or invalid; repairing
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/issue-449.out
1 1 WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) Resources is missing or invalid; repairing
2 3 WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  4 +WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) Resources is missing or invalid; repairing
3 5 test 69 done
... ...
qpdf/qtest/qpdf/many-nulls.out
... ... @@ -2,5 +2,5 @@ checking a.pdf
2 2 PDF Version: 1.5
3 3 File is not encrypted
4 4 File is not linearized
5   -No syntax or stream encoding errors found; the file may still contain
6   -errors that qpdf cannot detect
  5 +WARNING: a.pdf object stream 1, object 5 0 at offset 2000188: kid 0 (from 0) Resources is missing or invalid; repairing
  6 +qpdf: operation succeeded with warnings
... ...
qpdf/qtest/qpdf/overlay-no-resources.out 0 → 100644
  1 +WARNING: annotation-no-resources.pdf, object 7 0 at offset 1526: kid 0 (from 0) Resources is missing; repairing
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/overlay-no-resources.pdf
No preview for this file type
qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf
No preview for this file type
qpdf/qtest/qpdf/page-inherit-mediabox.out 0 → 100644
qpdf/qtest/qpdf/page-inherit-mediabox.pdf
... ... @@ -25,6 +25,8 @@ endobj
25 25 612
26 26 792
27 27 ]
  28 + /Resources <<
  29 + >>
28 30 /Type /Pages
29 31 >>
30 32 endobj
... ... @@ -163,22 +165,22 @@ xref
163 165 0000000000 65535 f
164 166 0000000052 00000 n
165 167 0000000133 00000 n
166   -0000000308 00000 n
167   -0000000537 00000 n
168   -0000000626 00000 n
169   -0000000871 00000 n
170   -0000000970 00000 n
171   -0000001016 00000 n
172   -0000001161 00000 n
173   -0000001246 00000 n
174   -0000001347 00000 n
175   -0000001395 00000 n
176   -0000001542 00000 n
  168 +0000000329 00000 n
  169 +0000000558 00000 n
  170 +0000000647 00000 n
  171 +0000000892 00000 n
  172 +0000000991 00000 n
  173 +0000001037 00000 n
  174 +0000001182 00000 n
  175 +0000001267 00000 n
  176 +0000001368 00000 n
  177 +0000001416 00000 n
  178 +0000001563 00000 n
177 179 trailer <<
178 180 /Root 1 0 R
179 181 /Size 14
180 182 /ID [<c302d043194ffe0cc1787745a3d7787f><963eac977ec4dfaf9fbcb48aae925c7a>]
181 183 >>
182 184 startxref
183   -1578
  185 +1599
184 186 %%EOF
... ...
qpdf/qtest/qpdf/page-missing-mediabox-out.pdf
No preview for this file type
qpdf/qtest/qpdf/page-missing-mediabox.out
1 1 WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) Resources is missing or invalid; repairing
2 3 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/page-no-content.out
1 1 WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) Resources is missing or invalid; repairing
2 3 page 1: 3 0 R
3 4 content:
4 5 6 0 R
... ...
qpdf/qtest/qpdf/page-with-no-resources.out 0 → 100644
  1 +WARNING: page-with-no-resources.pdf, object 3 0 at offset 133: kid 0 (from 0) Resources is missing or invalid; repairing
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...