Commit 7427c7fe84a3d8d26fe19c49c00856048ecd216d

Authored by m-holger
1 parent 5c7af348

Add error handling for missing or invalid Resources and invalid or duplicate ann…

…otations in page objects

- Repair invalid or missing Resources in page object trees with warnings
- Remove invalid Annots arrays with warnings
- Warn about duplicate annotations
- Update test cases and output to reflect new error handling.
- Improve robustness for annotation and resource validation.
libqpdf/QPDFPageDocumentHelper.cc
@@ -66,7 +66,7 @@ QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_fla @@ -66,7 +66,7 @@ QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_fla
66 for (auto& ph: getAllPages()) { 66 for (auto& ph: getAllPages()) {
67 QPDFObjectHandle resources = ph.getAttribute("/Resources", true); 67 QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
68 if (!resources.isDictionary()) { 68 if (!resources.isDictionary()) {
69 - QTC::TC("qpdf", "QPDFPageDocumentHelper flatten resources missing or invalid"); 69 + // As of #1521, this should be impossible unless a user inserted an invalid page.
70 resources = ph.getObjectHandle().replaceKeyAndGetNew( 70 resources = ph.getObjectHandle().replaceKeyAndGetNew(
71 "/Resources", QPDFObjectHandle::newDictionary()); 71 "/Resources", QPDFObjectHandle::newDictionary());
72 } 72 }
libqpdf/QPDF_pages.cc
@@ -158,10 +158,38 @@ QPDF::getAllPagesInternal( @@ -158,10 +158,38 @@ QPDF::getAllPagesInternal(
158 QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); 158 QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
159 ++errors; 159 ++errors;
160 } 160 }
161 - if (!resources && !kid.getKey("/Resources").isDictionary()) {  
162 - // Consider adding an information message  
163 - ++errors; 161 + if (!resources) {
  162 + auto res = kid.getKey("/Resources");
  163 +
  164 + if (!res.isDictionary()) {
  165 + ++errors;
  166 + kid.warn(
  167 + "kid " + std::to_string(i) +
  168 + " (from 0) Resources is missing or invalid; repairing");
  169 + kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary());
  170 + }
164 } 171 }
  172 + auto annots = kid.getKey("/Annots");
  173 + if (!annots.null()) {
  174 + if (!annots.isArray()) {
  175 + kid.warn(
  176 + "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing");
  177 + kid.removeKey("/Annots");
  178 + ++errors;
  179 + } else {
  180 + QPDFObjGen::set seen_annots;
  181 + for (auto& annot: annots.as_array()) {
  182 + if (!seen_annots.add(annot)) {
  183 + kid.warn(
  184 + "kid " + std::to_string(i) +
  185 + " (from 0) Annots has duplicate entry for annotation " +
  186 + annot.id_gen().unparse(' '));
  187 + ++errors;
  188 + }
  189 + }
  190 + }
  191 + }
  192 +
165 if (!seen.add(kid)) { 193 if (!seen.add(kid)) {
166 // Make a copy of the page. This does the same as shallowCopyPage in 194 // Make a copy of the page. This does the same as shallowCopyPage in
167 // QPDFPageObjectHelper. 195 // QPDFPageObjectHelper.
qpdf/qpdf.testcov
@@ -674,7 +674,6 @@ QPDFPageObjectHelper copied fallback 0 @@ -674,7 +674,6 @@ QPDFPageObjectHelper copied fallback 0
674 QPDFPageObjectHelper used fallback without copying 0 674 QPDFPageObjectHelper used fallback without copying 0
675 QPDF skipping cache for known unchecked object 0 675 QPDF skipping cache for known unchecked object 0
676 QPDF fix dangling triggered xref reconstruction 0 676 QPDF fix dangling triggered xref reconstruction 0
677 -QPDFPageDocumentHelper flatten resources missing or invalid 0  
678 QPDF recover xref stream 0 677 QPDF recover xref stream 0
679 QPDFJob misplaced page range 0 678 QPDFJob misplaced page range 0
680 QPDFJob duplicated range 0 679 QPDFJob duplicated range 0
qpdf/qtest/form-xobject.test
@@ -96,7 +96,7 @@ $td->runtest("overlay on page with no resources", @@ -96,7 +96,7 @@ $td->runtest("overlay on page with no resources",
96 {$td->COMMAND => 96 {$td->COMMAND =>
97 "qpdf --deterministic-id page-with-no-resources.pdf" . 97 "qpdf --deterministic-id page-with-no-resources.pdf" .
98 " --overlay minimal.pdf -- a.pdf"}, 98 " --overlay minimal.pdf -- a.pdf"},
99 - {$td->STRING => "", $td->EXIT_STATUS => 0}, 99 + {$td->FILE => "page-with-no-resources.out", $td->EXIT_STATUS => 3},
100 $td->NORMALIZE_NEWLINES); 100 $td->NORMALIZE_NEWLINES);
101 $td->runtest("check overlay with no resources output", 101 $td->runtest("check overlay with no resources output",
102 {$td->COMMAND => "qpdf-test-compare a.pdf overlay-no-resources.pdf"}, 102 {$td->COMMAND => "qpdf-test-compare a.pdf overlay-no-resources.pdf"},
qpdf/qtest/many-nulls.test
@@ -27,7 +27,7 @@ $td->runtest("compare output", @@ -27,7 +27,7 @@ $td->runtest("compare output",
27 {$td->FILE => "many-nulls.pdf", $td->EXIT_STATUS => 0}); 27 {$td->FILE => "many-nulls.pdf", $td->EXIT_STATUS => 0});
28 $td->runtest("run check file", 28 $td->runtest("run check file",
29 {$td->COMMAND => "qpdf --check a.pdf"}, 29 {$td->COMMAND => "qpdf --check a.pdf"},
30 - {$td->FILE => "many-nulls.out", $td->EXIT_STATUS => 0}, 30 + {$td->FILE => "many-nulls.out", $td->EXIT_STATUS => 3},
31 $td->NORMALIZE_NEWLINES); 31 $td->NORMALIZE_NEWLINES);
32 $td->runtest("copy sparse array", 32 $td->runtest("copy sparse array",
33 {$td->COMMAND => "test_driver 97 many-nulls.pdf"}, 33 {$td->COMMAND => "test_driver 97 many-nulls.pdf"},
qpdf/qtest/page-errors.test
@@ -29,7 +29,7 @@ $td->runtest("check output", @@ -29,7 +29,7 @@ $td->runtest("check output",
29 {$td->FILE => "page-missing-mediabox-out.pdf"}); 29 {$td->FILE => "page-missing-mediabox-out.pdf"});
30 $td->runtest("handle page with inherited MediaBox", 30 $td->runtest("handle page with inherited MediaBox",
31 {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- a.pdf"}, 31 {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- a.pdf"},
32 - {$td->STRING => "", $td->EXIT_STATUS => 0}, 32 + {$td->FILE => "page-inherit-mediabox.out", $td->EXIT_STATUS => 0},
33 $td->NORMALIZE_NEWLINES); 33 $td->NORMALIZE_NEWLINES);
34 $td->runtest("check output", 34 $td->runtest("check output",
35 {$td->COMMAND => "qpdf-test-compare a.pdf page-inherit-mediabox-out.pdf"}, 35 {$td->COMMAND => "qpdf-test-compare a.pdf page-inherit-mediabox-out.pdf"},
qpdf/qtest/qpdf/annotation-no-resources-warn.out 0 → 100644
  1 +WARNING: annotation-no-resources.pdf, object 7 0 at offset 1526: kid 0 (from 0) Resources is missing or invalid; repairing
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/issue-449.out
1 WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A 1 WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) Resources is missing or invalid; repairing
2 WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A 3 WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  4 +WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) Resources is missing or invalid; repairing
3 test 69 done 5 test 69 done
qpdf/qtest/qpdf/many-nulls.out
@@ -2,5 +2,5 @@ checking a.pdf @@ -2,5 +2,5 @@ checking a.pdf
2 PDF Version: 1.5 2 PDF Version: 1.5
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
5 -No syntax or stream encoding errors found; the file may still contain  
6 -errors that qpdf cannot detect 5 +WARNING: a.pdf object stream 1, object 5 0 at offset 2000188: kid 0 (from 0) Resources is missing or invalid; repairing
  6 +qpdf: operation succeeded with warnings
qpdf/qtest/qpdf/overlay-no-resources.out 0 → 100644
  1 +WARNING: annotation-no-resources.pdf, object 7 0 at offset 1526: kid 0 (from 0) Resources is missing; repairing
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/overlay-no-resources.pdf
No preview for this file type
qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf
No preview for this file type
qpdf/qtest/qpdf/page-inherit-mediabox.out 0 → 100644
qpdf/qtest/qpdf/page-inherit-mediabox.pdf
@@ -25,6 +25,8 @@ endobj @@ -25,6 +25,8 @@ endobj
25 612 25 612
26 792 26 792
27 ] 27 ]
  28 + /Resources <<
  29 + >>
28 /Type /Pages 30 /Type /Pages
29 >> 31 >>
30 endobj 32 endobj
@@ -163,22 +165,22 @@ xref @@ -163,22 +165,22 @@ xref
163 0000000000 65535 f 165 0000000000 65535 f
164 0000000052 00000 n 166 0000000052 00000 n
165 0000000133 00000 n 167 0000000133 00000 n
166 -0000000308 00000 n  
167 -0000000537 00000 n  
168 -0000000626 00000 n  
169 -0000000871 00000 n  
170 -0000000970 00000 n  
171 -0000001016 00000 n  
172 -0000001161 00000 n  
173 -0000001246 00000 n  
174 -0000001347 00000 n  
175 -0000001395 00000 n  
176 -0000001542 00000 n 168 +0000000329 00000 n
  169 +0000000558 00000 n
  170 +0000000647 00000 n
  171 +0000000892 00000 n
  172 +0000000991 00000 n
  173 +0000001037 00000 n
  174 +0000001182 00000 n
  175 +0000001267 00000 n
  176 +0000001368 00000 n
  177 +0000001416 00000 n
  178 +0000001563 00000 n
177 trailer << 179 trailer <<
178 /Root 1 0 R 180 /Root 1 0 R
179 /Size 14 181 /Size 14
180 /ID [<c302d043194ffe0cc1787745a3d7787f><963eac977ec4dfaf9fbcb48aae925c7a>] 182 /ID [<c302d043194ffe0cc1787745a3d7787f><963eac977ec4dfaf9fbcb48aae925c7a>]
181 >> 183 >>
182 startxref 184 startxref
183 -1578 185 +1599
184 %%EOF 186 %%EOF
qpdf/qtest/qpdf/page-missing-mediabox-out.pdf
No preview for this file type
qpdf/qtest/qpdf/page-missing-mediabox.out
1 WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A 1 WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) Resources is missing or invalid; repairing
2 qpdf: operation succeeded with warnings; resulting file may have some problems 3 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/page-no-content.out
1 WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A 1 WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) Resources is missing or invalid; repairing
2 page 1: 3 0 R 3 page 1: 3 0 R
3 content: 4 content:
4 6 0 R 5 6 0 R
qpdf/qtest/qpdf/page-with-no-resources.out 0 → 100644
  1 +WARNING: page-with-no-resources.pdf, object 3 0 at offset 133: kid 0 (from 0) Resources is missing or invalid; repairing
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems