Commit d9dd99eca32e44788165ce169f1e59498ad1c16e
1 parent
c032f7c9
Attempt to repair /Type key in pages nodes (fixes #349)
Showing
7 changed files
with
47 additions
and
30 deletions
ChangeLog
| 1 | +2019-08-18 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * When traversing the pages tree, if an invalid /Type key is | |
| 4 | + encountered, fix it. This is not done for all operations, but it | |
| 5 | + will be done for any case in which getAllPages is called. This | |
| 6 | + includes all page-based CLI operations. (Hopefully) Fixes #349. | |
| 7 | + | |
| 1 | 8 | 2019-08-17 Jay Berkenbilt <ejb@ql.org> |
| 2 | 9 | |
| 3 | 10 | * Change internal implementation of QPDF arrays to use sparse | ... | ... |
libqpdf/QPDF_pages.cc
| ... | ... | @@ -56,12 +56,12 @@ QPDF::getAllPages() |
| 56 | 56 | } |
| 57 | 57 | |
| 58 | 58 | void |
| 59 | -QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, | |
| 59 | +QPDF::getAllPagesInternal(QPDFObjectHandle cur_node, | |
| 60 | 60 | std::vector<QPDFObjectHandle>& result, |
| 61 | 61 | std::set<QPDFObjGen>& visited, |
| 62 | 62 | std::set<QPDFObjGen>& seen) |
| 63 | 63 | { |
| 64 | - QPDFObjGen this_og = cur_pages.getObjGen(); | |
| 64 | + QPDFObjGen this_og = cur_node.getObjGen(); | |
| 65 | 65 | if (visited.count(this_og) > 0) |
| 66 | 66 | { |
| 67 | 67 | throw QPDFExc( |
| ... | ... | @@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, |
| 70 | 70 | "Loop detected in /Pages structure (getAllPages)"); |
| 71 | 71 | } |
| 72 | 72 | visited.insert(this_og); |
| 73 | - std::string type; | |
| 74 | - QPDFObjectHandle type_key = cur_pages.getKey("/Type"); | |
| 75 | - if (type_key.isName()) | |
| 73 | + std::string wanted_type; | |
| 74 | + if (cur_node.hasKey("/Kids")) | |
| 76 | 75 | { |
| 77 | - type = type_key.getName(); | |
| 78 | - } | |
| 79 | - else if (cur_pages.hasKey("/Kids")) | |
| 80 | - { | |
| 81 | - type = "/Pages"; | |
| 82 | - } | |
| 83 | - else | |
| 84 | - { | |
| 85 | - type = "/Page"; | |
| 86 | - } | |
| 87 | - if (type == "/Pages") | |
| 88 | - { | |
| 89 | - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); | |
| 76 | + wanted_type = "/Pages"; | |
| 77 | + QPDFObjectHandle kids = cur_node.getKey("/Kids"); | |
| 90 | 78 | int n = kids.getArrayNItems(); |
| 91 | 79 | for (int i = 0; i < n; ++i) |
| 92 | 80 | { |
| ... | ... | @@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, |
| 108 | 96 | getAllPagesInternal(kid, result, visited, seen); |
| 109 | 97 | } |
| 110 | 98 | } |
| 111 | - else if (type == "/Page") | |
| 99 | + else | |
| 112 | 100 | { |
| 101 | + wanted_type = "/Page"; | |
| 113 | 102 | seen.insert(this_og); |
| 114 | - result.push_back(cur_pages); | |
| 103 | + result.push_back(cur_node); | |
| 115 | 104 | } |
| 116 | - else | |
| 105 | + | |
| 106 | + QPDFObjectHandle type_key = cur_node.getKey("/Type"); | |
| 107 | + if (! (type_key.isName() && (type_key.getName() == wanted_type))) | |
| 117 | 108 | { |
| 118 | - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), | |
| 119 | - this->m->last_object_description, | |
| 120 | - this->m->file->getLastOffset(), | |
| 121 | - "invalid Type " + type + " in page tree"); | |
| 109 | + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), | |
| 110 | + "page tree node", | |
| 111 | + this->m->file->getLastOffset(), | |
| 112 | + "/Type key should be " + wanted_type + | |
| 113 | + " but is not; overriding")); | |
| 114 | + cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); | |
| 122 | 115 | } |
| 123 | 116 | visited.erase(this_og); |
| 124 | 117 | } | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -4490,6 +4490,13 @@ print "\n"; |
| 4490 | 4490 | </listitem> |
| 4491 | 4491 | <listitem> |
| 4492 | 4492 | <para> |
| 4493 | + When traversing the pages tree, if nodes are encountered | |
| 4494 | + with invalid types, the types are fixed, and a warning is | |
| 4495 | + issued. | |
| 4496 | + </para> | |
| 4497 | + </listitem> | |
| 4498 | + <listitem> | |
| 4499 | + <para> | |
| 4493 | 4500 | A new helper method |
| 4494 | 4501 | <function>QUtil::read_file_into_memory</function> was added. |
| 4495 | 4502 | </para> | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size", |
| 1339 | 1339 | show_ntests(); |
| 1340 | 1340 | # ---------- |
| 1341 | 1341 | $td->notify("--- Page errors ---"); |
| 1342 | -$n_tests += 3; | |
| 1342 | +$n_tests += 5; | |
| 1343 | 1343 | |
| 1344 | 1344 | $td->runtest("handle page no with contents", |
| 1345 | 1345 | {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, |
| 1346 | 1346 | {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, |
| 1347 | 1347 | $td->NORMALIZE_NEWLINES); |
| 1348 | -$td->runtest("no type key for page nodes", | |
| 1348 | +$td->runtest("check no type key for page nodes", | |
| 1349 | 1349 | {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, |
| 1350 | - {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0}, | |
| 1350 | + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3}, | |
| 1351 | 1351 | $td->NORMALIZE_NEWLINES); |
| 1352 | +$td->runtest("no type key for page nodes", | |
| 1353 | + {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"}, | |
| 1354 | + {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3}, | |
| 1355 | + $td->NORMALIZE_NEWLINES); | |
| 1356 | +$td->runtest("check output", | |
| 1357 | + {$td->FILE => "a-split-out-1.pdf"}, | |
| 1358 | + {$td->FILE => "no-pages-types-fixed.pdf"}); | |
| 1352 | 1359 | $td->runtest("detect loops in pages structure", |
| 1353 | 1360 | {$td->COMMAND => "qpdf --check pages-loop.pdf"}, |
| 1354 | 1361 | {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, | ... | ... |
qpdf/qtest/qpdf/no-pages-types-fix.out
0 → 100644
| 1 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | |
| 2 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | |
| 3 | +qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/no-pages-types-fixed.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/no-pages-types.out
| ... | ... | @@ -2,5 +2,5 @@ checking no-pages-types.pdf |
| 2 | 2 | PDF Version: 1.3 |
| 3 | 3 | File is not encrypted |
| 4 | 4 | File is not linearized |
| 5 | -No syntax or stream encoding errors found; the file may still contain | |
| 6 | -errors that qpdf cannot detect | |
| 5 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | |
| 6 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | ... | ... |