Commit d8900c2255d12adbe9342ea751403740ca7a826d

Authored by Jay Berkenbilt
1 parent caab1b0e

Handle page tree node with no /Type

Original reported here:
https://bugs.launchpad.net/ubuntu/+source/qpdf/+bug/1397413

The PDF specification says that the /Type key for nodes in the pages
dictionary (both /Page and /Pages) is required, but some PDF files
omit them. Use the presence of other keys to determine the type of
pages tree node this is if the type key is not found.
ChangeLog
  1 +2014-12-01 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Some broken PDF files lack the required /Type key for /Page and
  4 + /Pages nodes in the page dictionary. QPDF now uses other methods
  5 + to figure out what kind of node it is looking at so that it can
  6 + handle those files. Original reported at
  7 + https://bugs.launchpad.net/ubuntu/+source/qpdf/+bug/1397413
  8 +
1 2014-11-14 Jay Berkenbilt <ejb@ql.org> 9 2014-11-14 Jay Berkenbilt <ejb@ql.org>
2 10
3 * Bug fix: QPDFObjectHandle::getPageContents() no longer throws an 11 * Bug fix: QPDFObjectHandle::getPageContents() no longer throws an
libqpdf/QPDF_pages.cc
@@ -56,7 +56,20 @@ void @@ -56,7 +56,20 @@ void
56 QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, 56 QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
57 std::vector<QPDFObjectHandle>& result) 57 std::vector<QPDFObjectHandle>& result)
58 { 58 {
59 - std::string type = cur_pages.getKey("/Type").getName(); 59 + std::string type;
  60 + QPDFObjectHandle type_key = cur_pages.getKey("/Type");
  61 + if (type_key.isName())
  62 + {
  63 + type = type_key.getName();
  64 + }
  65 + else if (cur_pages.hasKey("/Kids"))
  66 + {
  67 + type = "/Pages";
  68 + }
  69 + else
  70 + {
  71 + type = "/Page";
  72 + }
60 if (type == "/Pages") 73 if (type == "/Pages")
61 { 74 {
62 QPDFObjectHandle kids = cur_pages.getKey("/Kids"); 75 QPDFObjectHandle kids = cur_pages.getKey("/Kids");
qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;, @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 show_ntests(); 199 show_ntests();
200 # ---------- 200 # ----------
201 $td->notify("--- Miscellaneous Tests ---"); 201 $td->notify("--- Miscellaneous Tests ---");
202 -$n_tests += 73; 202 +$n_tests += 74;
203 203
204 $td->runtest("qpdf version", 204 $td->runtest("qpdf version",
205 {$td->COMMAND => "qpdf --version"}, 205 {$td->COMMAND => "qpdf --version"},
@@ -558,6 +558,10 @@ $td-&gt;runtest(&quot;handle page no with contents&quot;, @@ -558,6 +558,10 @@ $td-&gt;runtest(&quot;handle page no with contents&quot;,
558 {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, 558 {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
559 {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, 559 {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
560 $td->NORMALIZE_NEWLINES); 560 $td->NORMALIZE_NEWLINES);
  561 +$td->runtest("no type key for page nodes",
  562 + {$td->COMMAND => "qpdf --check no-pages-types.pdf"},
  563 + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0},
  564 + $td->NORMALIZE_NEWLINES);
561 565
562 show_ntests(); 566 show_ntests();
563 # ---------- 567 # ----------
qpdf/qtest/qpdf/no-pages-types.out 0 → 100644
  1 +checking no-pages-types.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +No syntax or stream encoding errors found; the file may still contain
  6 +errors that qpdf cannot detect
qpdf/qtest/qpdf/no-pages-types.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Zype /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Zype /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF