Commit d9dd99eca32e44788165ce169f1e59498ad1c16e
1 parent
c032f7c9
Attempt to repair /Type key in pages nodes (fixes #349)
Showing
7 changed files
with
47 additions
and
30 deletions
ChangeLog
| 1 | +2019-08-18 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * When traversing the pages tree, if an invalid /Type key is | ||
| 4 | + encountered, fix it. This is not done for all operations, but it | ||
| 5 | + will be done for any case in which getAllPages is called. This | ||
| 6 | + includes all page-based CLI operations. (Hopefully) Fixes #349. | ||
| 7 | + | ||
| 1 | 2019-08-17 Jay Berkenbilt <ejb@ql.org> | 8 | 2019-08-17 Jay Berkenbilt <ejb@ql.org> |
| 2 | 9 | ||
| 3 | * Change internal implementation of QPDF arrays to use sparse | 10 | * Change internal implementation of QPDF arrays to use sparse |
libqpdf/QPDF_pages.cc
| @@ -56,12 +56,12 @@ QPDF::getAllPages() | @@ -56,12 +56,12 @@ QPDF::getAllPages() | ||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | void | 58 | void |
| 59 | -QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, | 59 | +QPDF::getAllPagesInternal(QPDFObjectHandle cur_node, |
| 60 | std::vector<QPDFObjectHandle>& result, | 60 | std::vector<QPDFObjectHandle>& result, |
| 61 | std::set<QPDFObjGen>& visited, | 61 | std::set<QPDFObjGen>& visited, |
| 62 | std::set<QPDFObjGen>& seen) | 62 | std::set<QPDFObjGen>& seen) |
| 63 | { | 63 | { |
| 64 | - QPDFObjGen this_og = cur_pages.getObjGen(); | 64 | + QPDFObjGen this_og = cur_node.getObjGen(); |
| 65 | if (visited.count(this_og) > 0) | 65 | if (visited.count(this_og) > 0) |
| 66 | { | 66 | { |
| 67 | throw QPDFExc( | 67 | throw QPDFExc( |
| @@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, | @@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, | ||
| 70 | "Loop detected in /Pages structure (getAllPages)"); | 70 | "Loop detected in /Pages structure (getAllPages)"); |
| 71 | } | 71 | } |
| 72 | visited.insert(this_og); | 72 | visited.insert(this_og); |
| 73 | - std::string type; | ||
| 74 | - QPDFObjectHandle type_key = cur_pages.getKey("/Type"); | ||
| 75 | - if (type_key.isName()) | 73 | + std::string wanted_type; |
| 74 | + if (cur_node.hasKey("/Kids")) | ||
| 76 | { | 75 | { |
| 77 | - type = type_key.getName(); | ||
| 78 | - } | ||
| 79 | - else if (cur_pages.hasKey("/Kids")) | ||
| 80 | - { | ||
| 81 | - type = "/Pages"; | ||
| 82 | - } | ||
| 83 | - else | ||
| 84 | - { | ||
| 85 | - type = "/Page"; | ||
| 86 | - } | ||
| 87 | - if (type == "/Pages") | ||
| 88 | - { | ||
| 89 | - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); | 76 | + wanted_type = "/Pages"; |
| 77 | + QPDFObjectHandle kids = cur_node.getKey("/Kids"); | ||
| 90 | int n = kids.getArrayNItems(); | 78 | int n = kids.getArrayNItems(); |
| 91 | for (int i = 0; i < n; ++i) | 79 | for (int i = 0; i < n; ++i) |
| 92 | { | 80 | { |
| @@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, | @@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, | ||
| 108 | getAllPagesInternal(kid, result, visited, seen); | 96 | getAllPagesInternal(kid, result, visited, seen); |
| 109 | } | 97 | } |
| 110 | } | 98 | } |
| 111 | - else if (type == "/Page") | 99 | + else |
| 112 | { | 100 | { |
| 101 | + wanted_type = "/Page"; | ||
| 113 | seen.insert(this_og); | 102 | seen.insert(this_og); |
| 114 | - result.push_back(cur_pages); | 103 | + result.push_back(cur_node); |
| 115 | } | 104 | } |
| 116 | - else | 105 | + |
| 106 | + QPDFObjectHandle type_key = cur_node.getKey("/Type"); | ||
| 107 | + if (! (type_key.isName() && (type_key.getName() == wanted_type))) | ||
| 117 | { | 108 | { |
| 118 | - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), | ||
| 119 | - this->m->last_object_description, | ||
| 120 | - this->m->file->getLastOffset(), | ||
| 121 | - "invalid Type " + type + " in page tree"); | 109 | + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), |
| 110 | + "page tree node", | ||
| 111 | + this->m->file->getLastOffset(), | ||
| 112 | + "/Type key should be " + wanted_type + | ||
| 113 | + " but is not; overriding")); | ||
| 114 | + cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); | ||
| 122 | } | 115 | } |
| 123 | visited.erase(this_og); | 116 | visited.erase(this_og); |
| 124 | } | 117 | } |
manual/qpdf-manual.xml
| @@ -4490,6 +4490,13 @@ print "\n"; | @@ -4490,6 +4490,13 @@ print "\n"; | ||
| 4490 | </listitem> | 4490 | </listitem> |
| 4491 | <listitem> | 4491 | <listitem> |
| 4492 | <para> | 4492 | <para> |
| 4493 | + When traversing the pages tree, if nodes are encountered | ||
| 4494 | + with invalid types, the types are fixed, and a warning is | ||
| 4495 | + issued. | ||
| 4496 | + </para> | ||
| 4497 | + </listitem> | ||
| 4498 | + <listitem> | ||
| 4499 | + <para> | ||
| 4493 | A new helper method | 4500 | A new helper method |
| 4494 | <function>QUtil::read_file_into_memory</function> was added. | 4501 | <function>QUtil::read_file_into_memory</function> was added. |
| 4495 | </para> | 4502 | </para> |
qpdf/qtest/qpdf.test
| @@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size", | @@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size", | ||
| 1339 | show_ntests(); | 1339 | show_ntests(); |
| 1340 | # ---------- | 1340 | # ---------- |
| 1341 | $td->notify("--- Page errors ---"); | 1341 | $td->notify("--- Page errors ---"); |
| 1342 | -$n_tests += 3; | 1342 | +$n_tests += 5; |
| 1343 | 1343 | ||
| 1344 | $td->runtest("handle page no with contents", | 1344 | $td->runtest("handle page no with contents", |
| 1345 | {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, | 1345 | {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, |
| 1346 | {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, | 1346 | {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, |
| 1347 | $td->NORMALIZE_NEWLINES); | 1347 | $td->NORMALIZE_NEWLINES); |
| 1348 | -$td->runtest("no type key for page nodes", | 1348 | +$td->runtest("check no type key for page nodes", |
| 1349 | {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, | 1349 | {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, |
| 1350 | - {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0}, | 1350 | + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3}, |
| 1351 | $td->NORMALIZE_NEWLINES); | 1351 | $td->NORMALIZE_NEWLINES); |
| 1352 | +$td->runtest("no type key for page nodes", | ||
| 1353 | + {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"}, | ||
| 1354 | + {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3}, | ||
| 1355 | + $td->NORMALIZE_NEWLINES); | ||
| 1356 | +$td->runtest("check output", | ||
| 1357 | + {$td->FILE => "a-split-out-1.pdf"}, | ||
| 1358 | + {$td->FILE => "no-pages-types-fixed.pdf"}); | ||
| 1352 | $td->runtest("detect loops in pages structure", | 1359 | $td->runtest("detect loops in pages structure", |
| 1353 | {$td->COMMAND => "qpdf --check pages-loop.pdf"}, | 1360 | {$td->COMMAND => "qpdf --check pages-loop.pdf"}, |
| 1354 | {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, | 1361 | {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, |
qpdf/qtest/qpdf/no-pages-types-fix.out
0 → 100644
| 1 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | ||
| 2 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | ||
| 3 | +qpdf: operation succeeded with warnings; resulting file may have some problems |
qpdf/qtest/qpdf/no-pages-types-fixed.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/no-pages-types.out
| @@ -2,5 +2,5 @@ checking no-pages-types.pdf | @@ -2,5 +2,5 @@ checking no-pages-types.pdf | ||
| 2 | PDF Version: 1.3 | 2 | PDF Version: 1.3 |
| 3 | File is not encrypted | 3 | File is not encrypted |
| 4 | File is not linearized | 4 | File is not linearized |
| 5 | -No syntax or stream encoding errors found; the file may still contain | ||
| 6 | -errors that qpdf cannot detect | 5 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding |
| 6 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding |