Commit d9dd99eca32e44788165ce169f1e59498ad1c16e

Authored by Jay Berkenbilt
1 parent c032f7c9

Attempt to repair /Type key in pages nodes (fixes #349)

ChangeLog
  1 +2019-08-18 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * When traversing the pages tree, if an invalid /Type key is
  4 + encountered, fix it. This is not done for all operations, but it
  5 + will be done for any case in which getAllPages is called. This
  6 + includes all page-based CLI operations. (Hopefully) Fixes #349.
  7 +
1 8 2019-08-17 Jay Berkenbilt <ejb@ql.org>
2 9  
3 10 * Change internal implementation of QPDF arrays to use sparse
... ...
libqpdf/QPDF_pages.cc
... ... @@ -56,12 +56,12 @@ QPDF::getAllPages()
56 56 }
57 57  
58 58 void
59   -QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
  59 +QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
60 60 std::vector<QPDFObjectHandle>& result,
61 61 std::set<QPDFObjGen>& visited,
62 62 std::set<QPDFObjGen>& seen)
63 63 {
64   - QPDFObjGen this_og = cur_pages.getObjGen();
  64 + QPDFObjGen this_og = cur_node.getObjGen();
65 65 if (visited.count(this_og) > 0)
66 66 {
67 67 throw QPDFExc(
... ... @@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
70 70 "Loop detected in /Pages structure (getAllPages)");
71 71 }
72 72 visited.insert(this_og);
73   - std::string type;
74   - QPDFObjectHandle type_key = cur_pages.getKey("/Type");
75   - if (type_key.isName())
  73 + std::string wanted_type;
  74 + if (cur_node.hasKey("/Kids"))
76 75 {
77   - type = type_key.getName();
78   - }
79   - else if (cur_pages.hasKey("/Kids"))
80   - {
81   - type = "/Pages";
82   - }
83   - else
84   - {
85   - type = "/Page";
86   - }
87   - if (type == "/Pages")
88   - {
89   - QPDFObjectHandle kids = cur_pages.getKey("/Kids");
  76 + wanted_type = "/Pages";
  77 + QPDFObjectHandle kids = cur_node.getKey("/Kids");
90 78 int n = kids.getArrayNItems();
91 79 for (int i = 0; i < n; ++i)
92 80 {
... ... @@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
108 96 getAllPagesInternal(kid, result, visited, seen);
109 97 }
110 98 }
111   - else if (type == "/Page")
  99 + else
112 100 {
  101 + wanted_type = "/Page";
113 102 seen.insert(this_og);
114   - result.push_back(cur_pages);
  103 + result.push_back(cur_node);
115 104 }
116   - else
  105 +
  106 + QPDFObjectHandle type_key = cur_node.getKey("/Type");
  107 + if (! (type_key.isName() && (type_key.getName() == wanted_type)))
117 108 {
118   - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
119   - this->m->last_object_description,
120   - this->m->file->getLastOffset(),
121   - "invalid Type " + type + " in page tree");
  109 + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  110 + "page tree node",
  111 + this->m->file->getLastOffset(),
  112 + "/Type key should be " + wanted_type +
  113 + " but is not; overriding"));
  114 + cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
122 115 }
123 116 visited.erase(this_og);
124 117 }
... ...
manual/qpdf-manual.xml
... ... @@ -4490,6 +4490,13 @@ print &quot;\n&quot;;
4490 4490 </listitem>
4491 4491 <listitem>
4492 4492 <para>
  4493 + When traversing the pages tree, if nodes are encountered
  4494 + with invalid types, the types are fixed, and a warning is
  4495 + issued.
  4496 + </para>
  4497 + </listitem>
  4498 + <listitem>
  4499 + <para>
4493 4500 A new helper method
4494 4501 <function>QUtil::read_file_into_memory</function> was added.
4495 4502 </para>
... ...
qpdf/qtest/qpdf.test
... ... @@ -1339,16 +1339,23 @@ $td-&gt;runtest(&quot;sanity check array size&quot;,
1339 1339 show_ntests();
1340 1340 # ----------
1341 1341 $td->notify("--- Page errors ---");
1342   -$n_tests += 3;
  1342 +$n_tests += 5;
1343 1343  
1344 1344 $td->runtest("handle page no with contents",
1345 1345 {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
1346 1346 {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
1347 1347 $td->NORMALIZE_NEWLINES);
1348   -$td->runtest("no type key for page nodes",
  1348 +$td->runtest("check no type key for page nodes",
1349 1349 {$td->COMMAND => "qpdf --check no-pages-types.pdf"},
1350   - {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0},
  1350 + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
1351 1351 $td->NORMALIZE_NEWLINES);
  1352 +$td->runtest("no type key for page nodes",
  1353 + {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
  1354 + {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
  1355 + $td->NORMALIZE_NEWLINES);
  1356 +$td->runtest("check output",
  1357 + {$td->FILE => "a-split-out-1.pdf"},
  1358 + {$td->FILE => "no-pages-types-fixed.pdf"});
1352 1359 $td->runtest("detect loops in pages structure",
1353 1360 {$td->COMMAND => "qpdf --check pages-loop.pdf"},
1354 1361 {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
... ...
qpdf/qtest/qpdf/no-pages-types-fix.out 0 → 100644
  1 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
  2 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
  3 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/no-pages-types-fixed.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/no-pages-types.out
... ... @@ -2,5 +2,5 @@ checking no-pages-types.pdf
2 2 PDF Version: 1.3
3 3 File is not encrypted
4 4 File is not linearized
5   -No syntax or stream encoding errors found; the file may still contain
6   -errors that qpdf cannot detect
  5 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
  6 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
... ...