Commit d9dd99eca32e44788165ce169f1e59498ad1c16e

Authored by Jay Berkenbilt
1 parent c032f7c9

Attempt to repair /Type key in pages nodes (fixes #349)

ChangeLog
  1 +2019-08-18 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * When traversing the pages tree, if an invalid /Type key is
  4 + encountered, fix it. This is not done for all operations, but it
  5 + will be done for any case in which getAllPages is called. This
  6 + includes all page-based CLI operations. (Hopefully) Fixes #349.
  7 +
1 2019-08-17 Jay Berkenbilt <ejb@ql.org> 8 2019-08-17 Jay Berkenbilt <ejb@ql.org>
2 9
3 * Change internal implementation of QPDF arrays to use sparse 10 * Change internal implementation of QPDF arrays to use sparse
libqpdf/QPDF_pages.cc
@@ -56,12 +56,12 @@ QPDF::getAllPages() @@ -56,12 +56,12 @@ QPDF::getAllPages()
56 } 56 }
57 57
58 void 58 void
59 -QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, 59 +QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
60 std::vector<QPDFObjectHandle>& result, 60 std::vector<QPDFObjectHandle>& result,
61 std::set<QPDFObjGen>& visited, 61 std::set<QPDFObjGen>& visited,
62 std::set<QPDFObjGen>& seen) 62 std::set<QPDFObjGen>& seen)
63 { 63 {
64 - QPDFObjGen this_og = cur_pages.getObjGen(); 64 + QPDFObjGen this_og = cur_node.getObjGen();
65 if (visited.count(this_og) > 0) 65 if (visited.count(this_og) > 0)
66 { 66 {
67 throw QPDFExc( 67 throw QPDFExc(
@@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, @@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
70 "Loop detected in /Pages structure (getAllPages)"); 70 "Loop detected in /Pages structure (getAllPages)");
71 } 71 }
72 visited.insert(this_og); 72 visited.insert(this_og);
73 - std::string type;  
74 - QPDFObjectHandle type_key = cur_pages.getKey("/Type");  
75 - if (type_key.isName()) 73 + std::string wanted_type;
  74 + if (cur_node.hasKey("/Kids"))
76 { 75 {
77 - type = type_key.getName();  
78 - }  
79 - else if (cur_pages.hasKey("/Kids"))  
80 - {  
81 - type = "/Pages";  
82 - }  
83 - else  
84 - {  
85 - type = "/Page";  
86 - }  
87 - if (type == "/Pages")  
88 - {  
89 - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); 76 + wanted_type = "/Pages";
  77 + QPDFObjectHandle kids = cur_node.getKey("/Kids");
90 int n = kids.getArrayNItems(); 78 int n = kids.getArrayNItems();
91 for (int i = 0; i < n; ++i) 79 for (int i = 0; i < n; ++i)
92 { 80 {
@@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, @@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
108 getAllPagesInternal(kid, result, visited, seen); 96 getAllPagesInternal(kid, result, visited, seen);
109 } 97 }
110 } 98 }
111 - else if (type == "/Page") 99 + else
112 { 100 {
  101 + wanted_type = "/Page";
113 seen.insert(this_og); 102 seen.insert(this_og);
114 - result.push_back(cur_pages); 103 + result.push_back(cur_node);
115 } 104 }
116 - else 105 +
  106 + QPDFObjectHandle type_key = cur_node.getKey("/Type");
  107 + if (! (type_key.isName() && (type_key.getName() == wanted_type)))
117 { 108 {
118 - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),  
119 - this->m->last_object_description,  
120 - this->m->file->getLastOffset(),  
121 - "invalid Type " + type + " in page tree"); 109 + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  110 + "page tree node",
  111 + this->m->file->getLastOffset(),
  112 + "/Type key should be " + wanted_type +
  113 + " but is not; overriding"));
  114 + cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
122 } 115 }
123 visited.erase(this_og); 116 visited.erase(this_og);
124 } 117 }
manual/qpdf-manual.xml
@@ -4490,6 +4490,13 @@ print &quot;\n&quot;; @@ -4490,6 +4490,13 @@ print &quot;\n&quot;;
4490 </listitem> 4490 </listitem>
4491 <listitem> 4491 <listitem>
4492 <para> 4492 <para>
  4493 + When traversing the pages tree, if nodes are encountered
  4494 + with invalid types, the types are fixed, and a warning is
  4495 + issued.
  4496 + </para>
  4497 + </listitem>
  4498 + <listitem>
  4499 + <para>
4493 A new helper method 4500 A new helper method
4494 <function>QUtil::read_file_into_memory</function> was added. 4501 <function>QUtil::read_file_into_memory</function> was added.
4495 </para> 4502 </para>
qpdf/qtest/qpdf.test
@@ -1339,16 +1339,23 @@ $td-&gt;runtest(&quot;sanity check array size&quot;, @@ -1339,16 +1339,23 @@ $td-&gt;runtest(&quot;sanity check array size&quot;,
1339 show_ntests(); 1339 show_ntests();
1340 # ---------- 1340 # ----------
1341 $td->notify("--- Page errors ---"); 1341 $td->notify("--- Page errors ---");
1342 -$n_tests += 3; 1342 +$n_tests += 5;
1343 1343
1344 $td->runtest("handle page no with contents", 1344 $td->runtest("handle page no with contents",
1345 {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, 1345 {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
1346 {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, 1346 {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
1347 $td->NORMALIZE_NEWLINES); 1347 $td->NORMALIZE_NEWLINES);
1348 -$td->runtest("no type key for page nodes", 1348 +$td->runtest("check no type key for page nodes",
1349 {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, 1349 {$td->COMMAND => "qpdf --check no-pages-types.pdf"},
1350 - {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0}, 1350 + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
1351 $td->NORMALIZE_NEWLINES); 1351 $td->NORMALIZE_NEWLINES);
  1352 +$td->runtest("no type key for page nodes",
  1353 + {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
  1354 + {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
  1355 + $td->NORMALIZE_NEWLINES);
  1356 +$td->runtest("check output",
  1357 + {$td->FILE => "a-split-out-1.pdf"},
  1358 + {$td->FILE => "no-pages-types-fixed.pdf"});
1352 $td->runtest("detect loops in pages structure", 1359 $td->runtest("detect loops in pages structure",
1353 {$td->COMMAND => "qpdf --check pages-loop.pdf"}, 1360 {$td->COMMAND => "qpdf --check pages-loop.pdf"},
1354 {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, 1361 {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
qpdf/qtest/qpdf/no-pages-types-fix.out 0 → 100644
  1 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
  2 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
  3 +qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/no-pages-types-fixed.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/no-pages-types.out
@@ -2,5 +2,5 @@ checking no-pages-types.pdf @@ -2,5 +2,5 @@ checking no-pages-types.pdf
2 PDF Version: 1.3 2 PDF Version: 1.3
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
5 -No syntax or stream encoding errors found; the file may still contain  
6 -errors that qpdf cannot detect 5 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
  6 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding