Commit c51bdeb19cee981cc3e6d308ecff4297a7ca29e7

Authored by Jay Berkenbilt
Committed by GitHub
2 parents ec678441 9a69cbba

Merge pull request #1017 from m-holger/mbox

Check for missing mediaboxes
include/qpdf/QPDF.hh
... ... @@ -1105,7 +1105,10 @@ class QPDF
1105 1105 // methods to support page handling
1106 1106  
1107 1107 void getAllPagesInternal(
1108   - QPDFObjectHandle cur_pages, QPDFObjGen::set& visited, QPDFObjGen::set& seen);
  1108 + QPDFObjectHandle cur_pages,
  1109 + QPDFObjGen::set& visited,
  1110 + QPDFObjGen::set& seen,
  1111 + bool media_box);
1109 1112 void insertPage(QPDFObjectHandle newpage, int pos);
1110 1113 void flattenPagesTree();
1111 1114 void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
... ...
libqpdf/QPDF_pages.cc
... ... @@ -68,7 +68,7 @@ QPDF::getAllPages()
68 68 seen.clear();
69 69 if (pages.hasKey("/Kids")) {
70 70 // Ensure we actually found a /Pages object.
71   - getAllPagesInternal(pages, visited, seen);
  71 + getAllPagesInternal(pages, visited, seen, false);
72 72 }
73 73 }
74 74 return m->all_pages;
... ... @@ -76,7 +76,7 @@ QPDF::getAllPages()
76 76  
77 77 void
78 78 QPDF::getAllPagesInternal(
79   - QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen)
  79 + QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box)
80 80 {
81 81 if (!visited.add(cur_node)) {
82 82 throw QPDFExc(
... ... @@ -90,13 +90,26 @@ QPDF::getAllPagesInternal(
90 90 cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding");
91 91 cur_node.replaceKey("/Type", "/Pages"_qpdf);
92 92 }
  93 + if (!media_box) {
  94 + media_box = cur_node.getKey("/MediaBox").isRectangle();
  95 + QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
  96 + }
93 97 auto kids = cur_node.getKey("/Kids");
94 98 int n = kids.getArrayNItems();
95 99 for (int i = 0; i < n; ++i) {
96 100 auto kid = kids.getArrayItem(i);
97 101 if (kid.hasKey("/Kids")) {
98   - getAllPagesInternal(kid, visited, seen);
  102 + getAllPagesInternal(kid, visited, seen, media_box);
99 103 } else {
  104 + if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
  105 + QTC::TC("qpdf", "QPDF missing mediabox");
  106 + kid.warnIfPossible(
  107 + "kid " + std::to_string(i) +
  108 + " (from 0) MediaBox is undefined; setting to letter / ANSI A");
  109 + kid.replaceKey(
  110 + "/MediaBox",
  111 + QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
  112 + }
100 113 if (!kid.isIndirect()) {
101 114 QTC::TC("qpdf", "QPDF handle direct page object");
102 115 cur_node.warnIfPossible(
... ...
qpdf/qpdf.testcov
... ... @@ -411,6 +411,8 @@ QPDFPageObjectHelper copy shared attribute 1
411 411 QPDFJob from_nr from repeat_nr 0
412 412 QPDF resolve duplicated page object 0
413 413 QPDF handle direct page object 0
  414 +QPDF missing mediabox 0
  415 +QPDF inherit mediabox 1
414 416 QPDFTokenizer finder found wrong word 0
415 417 QPDFTokenizer found EI by byte count 0
416 418 QPDFTokenizer found EI after more than one try 0
... ...
qpdf/qtest/copy-foreign-objects.test
... ... @@ -56,7 +56,7 @@ foreach my $i (0, 1)
56 56 }
57 57 $td->runtest("issue 449",
58 58 {$td->COMMAND => "test_driver 69 issue-449.pdf"},
59   - {$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0},
  59 + {$td->FILE => "issue-449.out", $td->EXIT_STATUS => 0},
60 60 $td->NORMALIZE_NEWLINES);
61 61  
62 62 cleanup();
... ...
qpdf/qtest/page-errors.test
... ... @@ -14,12 +14,26 @@ cleanup();
14 14  
15 15 my $td = new TestDriver('page-errors');
16 16  
17   -my $n_tests = 5;
  17 +my $n_tests = 9;
18 18  
19 19 $td->runtest("handle page no with contents",
20 20 {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
21   - {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
  21 + {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 3},
22 22 $td->NORMALIZE_NEWLINES);
  23 +$td->runtest("handle page with missing MediaBox",
  24 + {$td->COMMAND => "qpdf --static-id --empty --pages page-no-content.pdf -- out.pdf"},
  25 + {$td->FILE => "page-missing-mediabox.out", $td->EXIT_STATUS => 0},
  26 + $td->NORMALIZE_NEWLINES);
  27 +$td->runtest("check output",
  28 + {$td->FILE => "out.pdf"},
  29 + {$td->FILE => "page-missing-mediabox-out.pdf"});
  30 +$td->runtest("handle page with inherited MediaBox",
  31 + {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- out.pdf"},
  32 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  33 + $td->NORMALIZE_NEWLINES);
  34 +$td->runtest("check output",
  35 + {$td->FILE => "out.pdf"},
  36 + {$td->FILE => "page-inherit-mediabox-out.pdf"});
23 37 $td->runtest("check no type key for page nodes",
24 38 {$td->COMMAND => "qpdf --check no-pages-types.pdf"},
25 39 {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
... ...
qpdf/qtest/qpdf/issue-449.out 0 → 100644
  1 +WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A
  2 +WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
  3 +test 69 done
... ...
qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/page-inherit-mediabox.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 3
  17 + /Kids [
  18 + 3 0 R
  19 + 4 0 R
  20 + 5 0 R
  21 + ]
  22 + /MediaBox [
  23 + 0
  24 + 0
  25 + 612
  26 + 792
  27 + ]
  28 + /Type /Pages
  29 +>>
  30 +endobj
  31 +
  32 +%% Page 1
  33 +%% Original object ID: 3 0
  34 +3 0 obj
  35 +<<
  36 + /Contents 6 0 R
  37 + /MediaBox [
  38 + 0
  39 + 0
  40 + 612
  41 + 792
  42 + ]
  43 + /Parent 2 0 R
  44 + /Resources <<
  45 + /Font <<
  46 + /F1 8 0 R
  47 + >>
  48 + /ProcSet 9 0 R
  49 + >>
  50 + /Type /Page
  51 +>>
  52 +endobj
  53 +
  54 +%% Page 2
  55 +%% Original object ID: 4 0
  56 +4 0 obj
  57 +<<
  58 + /Parent 2 0 R
  59 + /Type /Page
  60 +>>
  61 +endobj
  62 +
  63 +%% Page 3
  64 +%% Original object ID: 5 0
  65 +5 0 obj
  66 +<<
  67 + /Contents 10 0 R
  68 + /MediaBox [
  69 + 0
  70 + 0
  71 + 612
  72 + 792
  73 + ]
  74 + /Parent 2 0 R
  75 + /Resources <<
  76 + /Font <<
  77 + /F1 12 0 R
  78 + >>
  79 + /ProcSet 13 0 R
  80 + >>
  81 + /Type /Page
  82 +>>
  83 +endobj
  84 +
  85 +%% Contents for page 1
  86 +%% Original object ID: 6 0
  87 +6 0 obj
  88 +<<
  89 + /Length 7 0 R
  90 +>>
  91 +stream
  92 +BT
  93 + /F1 24 Tf
  94 + 72 720 Td
  95 + (Potato) Tj
  96 +ET
  97 +endstream
  98 +endobj
  99 +
  100 +7 0 obj
  101 +44
  102 +endobj
  103 +
  104 +%% Original object ID: 7 0
  105 +8 0 obj
  106 +<<
  107 + /BaseFont /Helvetica
  108 + /Encoding /WinAnsiEncoding
  109 + /Name /F1
  110 + /Subtype /Type1
  111 + /Type /Font
  112 +>>
  113 +endobj
  114 +
  115 +%% Original object ID: 8 0
  116 +9 0 obj
  117 +[
  118 + /PDF
  119 + /Text
  120 +]
  121 +endobj
  122 +
  123 +%% Contents for page 3
  124 +%% Original object ID: 9 0
  125 +10 0 obj
  126 +<<
  127 + /Length 11 0 R
  128 +>>
  129 +stream
  130 +BT
  131 + /F1 24 Tf
  132 + 72 720 Td
  133 + (Potato) Tj
  134 +ET
  135 +endstream
  136 +endobj
  137 +
  138 +11 0 obj
  139 +44
  140 +endobj
  141 +
  142 +%% Original object ID: 10 0
  143 +12 0 obj
  144 +<<
  145 + /BaseFont /Helvetica
  146 + /Encoding /WinAnsiEncoding
  147 + /Name /F1
  148 + /Subtype /Type1
  149 + /Type /Font
  150 +>>
  151 +endobj
  152 +
  153 +%% Original object ID: 11 0
  154 +13 0 obj
  155 +[
  156 + /PDF
  157 + /Text
  158 +]
  159 +endobj
  160 +
  161 +xref
  162 +0 14
  163 +0000000000 65535 f
  164 +0000000052 00000 n
  165 +0000000133 00000 n
  166 +0000000308 00000 n
  167 +0000000537 00000 n
  168 +0000000626 00000 n
  169 +0000000871 00000 n
  170 +0000000970 00000 n
  171 +0000001016 00000 n
  172 +0000001161 00000 n
  173 +0000001246 00000 n
  174 +0000001347 00000 n
  175 +0000001395 00000 n
  176 +0000001542 00000 n
  177 +trailer <<
  178 + /Root 1 0 R
  179 + /Size 14
  180 + /ID [<c302d043194ffe0cc1787745a3d7787f><963eac977ec4dfaf9fbcb48aae925c7a>]
  181 +>>
  182 +startxref
  183 +1578
  184 +%%EOF
... ...
qpdf/qtest/qpdf/page-missing-mediabox-out.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/page-missing-mediabox.out 0 → 100644
  1 +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
... ...
qpdf/qtest/qpdf/page-no-content.out
  1 +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
1 2 page 1: 3 0 R
2 3 content:
3 4 6 0 R
... ... @@ -6,3 +7,4 @@ page 2: 4 0 R
6 7 page 3: 5 0 R
7 8 content:
8 9 9 0 R
  10 +qpdf: operation succeeded with warnings
... ...