Commit e4e2e26d990d038b0d35e7466c8a24dbfafab7d2

Authored by Jay Berkenbilt
1 parent 1a4dcb4a

Properly handle pages with no contents (fixes #194)

Remove calls to assertPageObject(). All cases in the library that
called assertPageObject() work fine if you don't call
assertPageObject() because nothing assumes anything that was being
checked by that call. Removing the calls enables more files to be
successfully processed.
ChangeLog
  1 +2018-03-06 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Properly handle pages with no contents. Fixes #194.
  4 +
1 5 2018-03-05 Jay Berkenbilt <ejb@ql.org>
2 6  
3 7 * Improve handling of loops while following cross reference
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -830,6 +830,11 @@ class QPDFObjectHandle
830 830 QPDF_DLL
831 831 void assertNumber();
832 832  
  833 + // The isPageObject method checks the /Type key of the object.
  834 + // This is not completely reliable as there are some otherwise
  835 + // valid files whose /Type is wrong for page objects. qpdf is
  836 + // slightly more accepting but may still return false here when
  837 + // treating the object as a page would work. Use this sparingly.
833 838 QPDF_DLL
834 839 bool isPageObject();
835 840 QPDF_DLL
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -932,8 +932,6 @@ QPDFObjectHandle::getGeneration() const
932 932 std::map<std::string, QPDFObjectHandle>
933 933 QPDFObjectHandle::getPageImages()
934 934 {
935   - assertPageObject();
936   -
937 935 // Note: this code doesn't handle inherited resources. If this
938 936 // page dictionary doesn't have a /Resources key or has one whose
939 937 // value is null or an empty dictionary, you are supposed to walk
... ... @@ -1081,7 +1079,6 @@ QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
1081 1079 void
1082 1080 QPDFObjectHandle::rotatePage(int angle, bool relative)
1083 1081 {
1084   - assertPageObject();
1085 1082 if ((angle % 90) != 0)
1086 1083 {
1087 1084 throw std::runtime_error(
... ... @@ -1137,7 +1134,6 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
1137 1134 void
1138 1135 QPDFObjectHandle::coalesceContentStreams()
1139 1136 {
1140   - assertPageObject();
1141 1137 QPDFObjectHandle contents = this->getKey("/Contents");
1142 1138 if (contents.isStream())
1143 1139 {
... ... @@ -1218,7 +1214,6 @@ QPDFObjectHandle::parse(std::string const&amp; object_str,
1218 1214 void
1219 1215 QPDFObjectHandle::pipePageContents(Pipeline* p)
1220 1216 {
1221   - assertPageObject();
1222 1217 std::string description = "page object " +
1223 1218 QUtil::int_to_string(this->m->objid) + " " +
1224 1219 QUtil::int_to_string(this->m->generation);
... ... @@ -1256,7 +1251,6 @@ QPDFObjectHandle::pipeContentStreams(
1256 1251 void
1257 1252 QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
1258 1253 {
1259   - assertPageObject();
1260 1254 std::string description = "page object " +
1261 1255 QUtil::int_to_string(this->m->objid) + " " +
1262 1256 QUtil::int_to_string(this->m->generation);
... ... @@ -1267,7 +1261,6 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
1267 1261 void
1268 1262 QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1269 1263 {
1270   - assertPageObject();
1271 1264 std::string description = "token filter for page object " +
1272 1265 QUtil::int_to_string(this->m->objid) + " " +
1273 1266 QUtil::int_to_string(this->m->generation);
... ... @@ -2222,8 +2215,29 @@ QPDFObjectHandle::assertNumber()
2222 2215 bool
2223 2216 QPDFObjectHandle::isPageObject()
2224 2217 {
2225   - // Some PDF files have /Type broken on pages.
2226   - return (this->isDictionary() && this->hasKey("/Contents"));
  2218 + // See comments in QPDFObjectHandle.hh.
  2219 + if (! this->isDictionary())
  2220 + {
  2221 + return false;
  2222 + }
  2223 + if (this->hasKey("/Type"))
  2224 + {
  2225 + QPDFObjectHandle type = this->getKey("/Type");
  2226 + if (type.isName() && (type.getName() == "/Page"))
  2227 + {
  2228 + return true;
  2229 + }
  2230 + // Files have been seen in the wild that have /Type (Page)
  2231 + if (type.isString() && (type.getStringValue() == "Page"))
  2232 + {
  2233 + return true;
  2234 + }
  2235 + }
  2236 + if (this->hasKey("/Contents"))
  2237 + {
  2238 + return true;
  2239 + }
  2240 + return false;
2227 2241 }
2228 2242  
2229 2243 bool
... ...
libqpdf/QPDF_pages.cc
... ... @@ -191,7 +191,6 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
191 191 // pos = npages adds to the end.
192 192  
193 193 flattenPagesTree();
194   - newpage.assertPageObject();
195 194  
196 195 if (! newpage.isIndirect())
197 196 {
... ... @@ -288,7 +287,6 @@ QPDF::addPage(QPDFObjectHandle newpage, bool first)
288 287 int
289 288 QPDF::findPage(QPDFObjectHandle& page)
290 289 {
291   - page.assertPageObject();
292 290 return findPage(page.getObjGen());
293 291 }
294 292  
... ...
qpdf/qtest/qpdf.test
... ... @@ -921,6 +921,34 @@ $td-&gt;runtest(&quot;check output&quot;,
921 921  
922 922 show_ntests();
923 923 # ----------
  924 +$td->notify("--- Page with no contents ---");
  925 +$n_tests += 7;
  926 +
  927 +$td->runtest("check no contents",
  928 + {$td->COMMAND => "qpdf --check no-contents.pdf"},
  929 + {$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0},
  930 + $td->NORMALIZE_NEWLINES);
  931 +
  932 +foreach my $arg ('--qdf', '--coalesce-contents', '')
  933 +{
  934 + $td->runtest("convert no contents ($arg)",
  935 + {$td->COMMAND =>
  936 + "qpdf $arg --static-id no-contents.pdf a.pdf"},
  937 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  938 +
  939 + my $suf = $arg;
  940 + $suf =~ s/--//;
  941 + if ($suf eq '')
  942 + {
  943 + $suf = "none";
  944 + }
  945 + $td->runtest("check output",
  946 + {$td->FILE => "a.pdf"},
  947 + {$td->FILE => "no-contents-$suf.pdf"});
  948 +}
  949 +
  950 +show_ntests();
  951 +# ----------
924 952 $td->notify("--- Token filters ---");
925 953 $n_tests += 2;
926 954  
... ...
qpdf/qtest/qpdf/no-contents-check.out 0 → 100644
  1 +checking no-contents.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +No syntax or stream encoding errors found; the file may still contain
  6 +errors that qpdf cannot detect
... ...
qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /Contents 4 0 R /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
  11 +endobj
  12 +4 0 obj
  13 +<< /Length 0 /Filter /FlateDecode >>
  14 +stream
  15 +endstream
  16 +endobj
  17 +xref
  18 +0 5
  19 +0000000000 65535 f
  20 +0000000015 00000 n
  21 +0000000064 00000 n
  22 +0000000123 00000 n
  23 +0000000229 00000 n
  24 +trailer << /Root 1 0 R /Size 5 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
  25 +startxref
  26 +298
  27 +%%EOF
... ...
qpdf/qtest/qpdf/no-contents-none.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
  11 +endobj
  12 +xref
  13 +0 4
  14 +0000000000 65535 f
  15 +0000000015 00000 n
  16 +0000000064 00000 n
  17 +0000000123 00000 n
  18 +trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
  19 +startxref
  20 +213
  21 +%%EOF
... ...
qpdf/qtest/qpdf/no-contents-qdf.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /MediaBox [
  29 + 0
  30 + 0
  31 + 720
  32 + 720
  33 + ]
  34 + /Parent 2 0 R
  35 + /Resources <<
  36 + >>
  37 + /Type /Page
  38 +>>
  39 +endobj
  40 +
  41 +xref
  42 +0 4
  43 +0000000000 65535 f
  44 +0000000052 00000 n
  45 +0000000133 00000 n
  46 +0000000242 00000 n
  47 +trailer <<
  48 + /Root 1 0 R
  49 + /Size 4
  50 + /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>]
  51 +>>
  52 +startxref
  53 +361
  54 +%%EOF
... ...
qpdf/qtest/qpdf/no-contents.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
  11 +endobj
  12 +xref
  13 +0 4
  14 +0000000000 65535 f
  15 +0000000015 00000 n
  16 +0000000064 00000 n
  17 +0000000123 00000 n
  18 +trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><52bba3c78160d0c6e851b59110e5d076>] >>
  19 +startxref
  20 +213
  21 +%%EOF
... ...