Commit b841c2d2e105d3dcb338864452f248ce872a92b7

Authored by m-holger
1 parent b0ee84e5

Enhance `QPDFParser` by introducing global `Limits` class for configurable const…

…raints, replacing hardcoded values for nesting, container size, and error limits.
libqpdf/CMakeLists.txt
@@ -100,6 +100,7 @@ set(libqpdf_SOURCES @@ -100,6 +100,7 @@ set(libqpdf_SOURCES
100 ResourceFinder.cc 100 ResourceFinder.cc
101 SecureRandomDataProvider.cc 101 SecureRandomDataProvider.cc
102 SF_FlateLzwDecode.cc 102 SF_FlateLzwDecode.cc
  103 + global.cc
103 qpdf-c.cc 104 qpdf-c.cc
104 qpdfjob-c.cc 105 qpdfjob-c.cc
105 qpdflogger-c.cc) 106 qpdflogger-c.cc)
libqpdf/QPDFParser.cc
@@ -15,6 +15,8 @@ using namespace qpdf; @@ -15,6 +15,8 @@ using namespace qpdf;
15 15
16 using ObjectPtr = std::shared_ptr<QPDFObject>; 16 using ObjectPtr = std::shared_ptr<QPDFObject>;
17 17
  18 +static uint32_t const& max_nesting{global::Limits::objects_max_nesting()};
  19 +
18 // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides 20 // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
19 // special access to allow the parser to create unresolved objects and dangling references. 21 // special access to allow the parser to create unresolved objects and dangling references.
20 class QPDF::Doc::ParseGuard 22 class QPDF::Doc::ParseGuard
@@ -170,27 +172,22 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -170,27 +172,22 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
170 // In content stream mode, leave object uninitialized to indicate EOF 172 // In content stream mode, leave object uninitialized to indicate EOF
171 return {}; 173 return {};
172 } 174 }
173 - QTC::TC("qpdf", "QPDFParser eof in parse");  
174 warn("unexpected EOF"); 175 warn("unexpected EOF");
175 return {QPDFObject::create<QPDF_Null>()}; 176 return {QPDFObject::create<QPDF_Null>()};
176 177
177 case QPDFTokenizer::tt_bad: 178 case QPDFTokenizer::tt_bad:
178 - QTC::TC("qpdf", "QPDFParser bad token in parse");  
179 return {QPDFObject::create<QPDF_Null>()}; 179 return {QPDFObject::create<QPDF_Null>()};
180 180
181 case QPDFTokenizer::tt_brace_open: 181 case QPDFTokenizer::tt_brace_open:
182 case QPDFTokenizer::tt_brace_close: 182 case QPDFTokenizer::tt_brace_close:
183 - QTC::TC("qpdf", "QPDFParser bad brace");  
184 warn("treating unexpected brace token as null"); 183 warn("treating unexpected brace token as null");
185 return {QPDFObject::create<QPDF_Null>()}; 184 return {QPDFObject::create<QPDF_Null>()};
186 185
187 case QPDFTokenizer::tt_array_close: 186 case QPDFTokenizer::tt_array_close:
188 - QTC::TC("qpdf", "QPDFParser bad array close");  
189 warn("treating unexpected array close token as null"); 187 warn("treating unexpected array close token as null");
190 return {QPDFObject::create<QPDF_Null>()}; 188 return {QPDFObject::create<QPDF_Null>()};
191 189
192 case QPDFTokenizer::tt_dict_close: 190 case QPDFTokenizer::tt_dict_close:
193 - QTC::TC("qpdf", "QPDFParser bad dictionary close");  
194 warn("unexpected dictionary close token"); 191 warn("unexpected dictionary close token");
195 return {QPDFObject::create<QPDF_Null>()}; 192 return {QPDFObject::create<QPDF_Null>()};
196 193
@@ -230,7 +227,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -230,7 +227,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
230 empty = true; 227 empty = true;
231 return {QPDFObject::create<QPDF_Null>()}; 228 return {QPDFObject::create<QPDF_Null>()};
232 } else { 229 } else {
233 - QTC::TC("qpdf", "QPDFParser treat word as string");  
234 warn("unknown token while reading object; treating as string"); 230 warn("unknown token while reading object; treating as string");
235 return withDescription<QPDF_String>(value); 231 return withDescription<QPDF_String>(value);
236 } 232 }
@@ -283,8 +279,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -283,8 +279,7 @@ QPDFParser::parseRemainder(bool content_stream)
283 } else if ( 279 } else if (
284 int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && 280 int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
285 tokenizer.getValue() == "R") { 281 tokenizer.getValue() == "R") {
286 - if (context == nullptr) {  
287 - QTC::TC("qpdf", "QPDFParser indirect without context"); 282 + if (!context) {
288 throw std::logic_error( 283 throw std::logic_error(
289 "QPDFParser::parse called without context on an object " 284 "QPDFParser::parse called without context on an object "
290 "with indirect references"); 285 "with indirect references");
@@ -294,7 +289,6 @@ QPDFParser::parseRemainder(bool content_stream) @@ -294,7 +289,6 @@ QPDFParser::parseRemainder(bool content_stream)
294 if (!(id < 1 || gen < 0 || gen >= 65535)) { 289 if (!(id < 1 || gen < 0 || gen >= 65535)) {
295 add(ParseGuard::getObject(context, id, gen, parse_pdf)); 290 add(ParseGuard::getObject(context, id, gen, parse_pdf));
296 } else { 291 } else {
297 - QTC::TC("qpdf", "QPDFParser invalid objgen");  
298 addNull(); 292 addNull();
299 } 293 }
300 int_count = 0; 294 int_count = 0;
@@ -317,12 +311,10 @@ QPDFParser::parseRemainder(bool content_stream) @@ -317,12 +311,10 @@ QPDFParser::parseRemainder(bool content_stream)
317 // In content stream mode, leave object uninitialized to indicate EOF 311 // In content stream mode, leave object uninitialized to indicate EOF
318 return {}; 312 return {};
319 } 313 }
320 - QTC::TC("qpdf", "QPDFParser eof in parseRemainder");  
321 warn("unexpected EOF"); 314 warn("unexpected EOF");
322 return {QPDFObject::create<QPDF_Null>()}; 315 return {QPDFObject::create<QPDF_Null>()};
323 316
324 case QPDFTokenizer::tt_bad: 317 case QPDFTokenizer::tt_bad:
325 - QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");  
326 if (tooManyBadTokens()) { 318 if (tooManyBadTokens()) {
327 return {QPDFObject::create<QPDF_Null>()}; 319 return {QPDFObject::create<QPDF_Null>()};
328 } 320 }
@@ -331,7 +323,6 @@ QPDFParser::parseRemainder(bool content_stream) @@ -331,7 +323,6 @@ QPDFParser::parseRemainder(bool content_stream)
331 323
332 case QPDFTokenizer::tt_brace_open: 324 case QPDFTokenizer::tt_brace_open:
333 case QPDFTokenizer::tt_brace_close: 325 case QPDFTokenizer::tt_brace_close:
334 - QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");  
335 warn("treating unexpected brace token as null"); 326 warn("treating unexpected brace token as null");
336 if (tooManyBadTokens()) { 327 if (tooManyBadTokens()) {
337 return {QPDFObject::create<QPDF_Null>()}; 328 return {QPDFObject::create<QPDF_Null>()};
@@ -361,7 +352,6 @@ QPDFParser::parseRemainder(bool content_stream) @@ -361,7 +352,6 @@ QPDFParser::parseRemainder(bool content_stream)
361 frame = &stack.back(); 352 frame = &stack.back();
362 add(std::move(object)); 353 add(std::move(object));
363 } else { 354 } else {
364 - QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");  
365 if (sanity_checks) { 355 if (sanity_checks) {
366 // During sanity checks, assume nesting of containers is corrupt and object is 356 // During sanity checks, assume nesting of containers is corrupt and object is
367 // unusable. 357 // unusable.
@@ -387,7 +377,6 @@ QPDFParser::parseRemainder(bool content_stream) @@ -387,7 +377,6 @@ QPDFParser::parseRemainder(bool content_stream)
387 auto& dict = frame->dict; 377 auto& dict = frame->dict;
388 378
389 if (frame->state == st_dictionary_value) { 379 if (frame->state == st_dictionary_value) {
390 - QTC::TC("qpdf", "QPDFParser no val for last key");  
391 warn( 380 warn(
392 frame->offset, 381 frame->offset,
393 "dictionary ended prematurely; using null as value for last key"); 382 "dictionary ended prematurely; using null as value for last key");
@@ -438,8 +427,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -438,8 +427,7 @@ QPDFParser::parseRemainder(bool content_stream)
438 427
439 case QPDFTokenizer::tt_array_open: 428 case QPDFTokenizer::tt_array_open:
440 case QPDFTokenizer::tt_dict_open: 429 case QPDFTokenizer::tt_dict_open:
441 - if (stack.size() > 499) {  
442 - QTC::TC("qpdf", "QPDFParser too deep"); 430 + if (stack.size() > max_nesting) {
443 warn("ignoring excessively deeply nested data structure"); 431 warn("ignoring excessively deeply nested data structure");
444 return {QPDFObject::create<QPDF_Null>()}; 432 return {QPDFObject::create<QPDF_Null>()};
445 } else { 433 } else {
@@ -510,7 +498,6 @@ QPDFParser::parseRemainder(bool content_stream) @@ -510,7 +498,6 @@ QPDFParser::parseRemainder(bool content_stream)
510 continue; 498 continue;
511 } 499 }
512 500
513 - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");  
514 warn("unknown token while reading object; treating as string"); 501 warn("unknown token while reading object; treating as string");
515 if (tooManyBadTokens()) { 502 if (tooManyBadTokens()) {
516 return {QPDFObject::create<QPDF_Null>()}; 503 return {QPDFObject::create<QPDF_Null>()};
@@ -592,8 +579,8 @@ template &lt;typename T, typename... Args&gt; @@ -592,8 +579,8 @@ template &lt;typename T, typename... Args&gt;
592 void 579 void
593 QPDFParser::addScalar(Args&&... args) 580 QPDFParser::addScalar(Args&&... args)
594 { 581 {
595 - if ((bad_count || sanity_checks) &&  
596 - (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { 582 + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
  583 + if (frame->olist.size() > limit || frame->dict.size() > limit) {
597 // Stop adding scalars. We are going to abort when the close token or a bad token is 584 // Stop adding scalars. We are going to abort when the close token or a bad token is
598 // encountered. 585 // encountered.
599 max_bad_count = 0; 586 max_bad_count = 0;
@@ -650,16 +637,17 @@ QPDFParser::fixMissingKeys() @@ -650,16 +637,17 @@ QPDFParser::fixMissingKeys()
650 bool 637 bool
651 QPDFParser::tooManyBadTokens() 638 QPDFParser::tooManyBadTokens()
652 { 639 {
653 - if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { 640 + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
  641 + if (frame->olist.size() > limit || frame->dict.size() > limit) {
654 if (bad_count) { 642 if (bad_count) {
655 warn( 643 warn(
656 - "encountered errors while parsing an array or dictionary with more than 5000 "  
657 - "elements; giving up on reading object"); 644 + "encountered errors while parsing an array or dictionary with more than " +
  645 + std::to_string(limit) + " elements; giving up on reading object");
658 return true; 646 return true;
659 } 647 }
660 warn( 648 warn(
661 - "encountered an array or dictionary with more than 5000 elements during xref recovery; "  
662 - "giving up on reading object"); 649 + "encountered an array or dictionary with more than " + std::to_string(limit) +
  650 + " elements during xref recovery; giving up on reading object");
663 } 651 }
664 if (max_bad_count && --max_bad_count > 0 && good_count > 4) { 652 if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
665 good_count = 0; 653 good_count = 0;
@@ -693,7 +681,6 @@ QPDFParser::warn(QPDFExc const&amp; e) const @@ -693,7 +681,6 @@ QPDFParser::warn(QPDFExc const&amp; e) const
693 void 681 void
694 QPDFParser::warnDuplicateKey() 682 QPDFParser::warnDuplicateKey()
695 { 683 {
696 - QTC::TC("qpdf", "QPDFParser duplicate dict key");  
697 warn( 684 warn(
698 frame->offset, 685 frame->offset,
699 "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); 686 "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
libqpdf/global.cc 0 → 100644
  1 +#include <qpdf/global_private.hh>
  2 +
  3 +using namespace qpdf;
  4 +
  5 +global::Limits global::Limits::l;
libqpdf/qpdf/QPDFObject_private.hh
@@ -5,11 +5,12 @@ @@ -5,11 +5,12 @@
5 // include/qpdf/QPDFObject.hh. See comments there for an explanation. 5 // include/qpdf/QPDFObject.hh. See comments there for an explanation.
6 6
7 #include <qpdf/Constants.h> 7 #include <qpdf/Constants.h>
  8 +#include <qpdf/Types.h>
  9 +
8 #include <qpdf/JSON.hh> 10 #include <qpdf/JSON.hh>
9 #include <qpdf/JSON_writer.hh> 11 #include <qpdf/JSON_writer.hh>
10 #include <qpdf/QPDF.hh> 12 #include <qpdf/QPDF.hh>
11 #include <qpdf/QPDFObjGen.hh> 13 #include <qpdf/QPDFObjGen.hh>
12 -#include <qpdf/Types.h>  
13 14
14 #include <map> 15 #include <map>
15 #include <memory> 16 #include <memory>
libqpdf/qpdf/QPDFParser.hh
@@ -5,10 +5,14 @@ @@ -5,10 +5,14 @@
5 #include <qpdf/QPDFObjectHandle_private.hh> 5 #include <qpdf/QPDFObjectHandle_private.hh>
6 #include <qpdf/QPDFObject_private.hh> 6 #include <qpdf/QPDFObject_private.hh>
7 #include <qpdf/QPDFTokenizer_private.hh> 7 #include <qpdf/QPDFTokenizer_private.hh>
  8 +#include <qpdf/global_private.hh>
8 9
9 #include <memory> 10 #include <memory>
10 #include <string> 11 #include <string>
11 12
  13 +using namespace qpdf;
  14 +using namespace qpdf::global;
  15 +
12 class QPDFParser 16 class QPDFParser
13 { 17 {
14 public: 18 public:
@@ -136,7 +140,7 @@ class QPDFParser @@ -136,7 +140,7 @@ class QPDFParser
136 // it only gets incremented or reset when a bad token is encountered. 140 // it only gets incremented or reset when a bad token is encountered.
137 int bad_count{0}; 141 int bad_count{0};
138 // Number of bad tokens (remaining) before giving up. 142 // Number of bad tokens (remaining) before giving up.
139 - int max_bad_count{15}; 143 + uint32_t max_bad_count{Limits::objects_max_errors()};
140 // Number of good tokens since last bad token. Irrelevant if bad_count == 0. 144 // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
141 int good_count{0}; 145 int good_count{0};
142 // Start offset including any leading whitespace. 146 // Start offset including any leading whitespace.
libqpdf/qpdf/global_private.hh 0 → 100644
  1 +
  2 +#ifndef GLOBAL_PRIVATE_HH
  3 +#define GLOBAL_PRIVATE_HH
  4 +
  5 +#include <qpdf/Constants.h>
  6 +
  7 +#include <cstdint>
  8 +#include <limits>
  9 +
  10 +namespace qpdf
  11 +{
  12 + namespace global
  13 + {
  14 + class Limits
  15 + {
  16 + public:
  17 + Limits(Limits const&) = delete;
  18 + Limits(Limits&&) = delete;
  19 + Limits& operator=(Limits const&) = delete;
  20 + Limits& operator=(Limits&&) = delete;
  21 +
  22 + static uint32_t const&
  23 + objects_max_nesting()
  24 + {
  25 + return l.objects_max_nesting_;
  26 + }
  27 +
  28 + static uint32_t const&
  29 + objects_max_errors()
  30 + {
  31 + return l.objects_max_errors_;
  32 + }
  33 +
  34 + static uint32_t const&
  35 + objects_max_container_size(bool damaged)
  36 + {
  37 + return damaged ? l.objects_max_container_size_damaged_
  38 + : l.objects_max_container_size_;
  39 + }
  40 +
  41 + private:
  42 + Limits() = default;
  43 + ~Limits() = default;
  44 +
  45 + static Limits l;
  46 +
  47 + uint32_t objects_max_nesting_{499};
  48 + uint32_t objects_max_errors_{15};
  49 + uint32_t objects_max_container_size_{std::numeric_limits<uint32_t>::max()};
  50 + uint32_t objects_max_container_size_damaged_{5'000};
  51 + };
  52 +
  53 + } // namespace global
  54 +
  55 +} // namespace qpdf
  56 +
  57 +#endif // GLOBAL_PRIVATE_HH
qpdf/qpdf.testcov
@@ -27,11 +27,6 @@ main QTest stream 0 @@ -27,11 +27,6 @@ main QTest stream 0
27 QPDF lin write nshared_total > nshared_first_page 1 27 QPDF lin write nshared_total > nshared_first_page 1
28 QPDFWriter encrypted hint stream 0 28 QPDFWriter encrypted hint stream 0
29 QPDF xref gen > 0 1 29 QPDF xref gen > 0 1
30 -QPDFParser bad brace 0  
31 -QPDFParser bad brace in parseRemainder 0  
32 -QPDFParser bad array close 0  
33 -QPDFParser bad array close in parseRemainder 0  
34 -QPDFParser bad dictionary close 0  
35 QPDFTokenizer bad ) 0 30 QPDFTokenizer bad ) 0
36 QPDFTokenizer bad > 0 31 QPDFTokenizer bad > 0
37 QPDFTokenizer bad hexstring character 0 32 QPDFTokenizer bad hexstring character 0
@@ -123,7 +118,6 @@ QPDF_Stream provider length not provided 0 @@ -123,7 +118,6 @@ QPDF_Stream provider length not provided 0
123 QPDF_Stream unknown stream length 0 118 QPDF_Stream unknown stream length 0
124 QPDF replaceReserved 0 119 QPDF replaceReserved 0
125 QPDFWriter copy use_aes 1 120 QPDFWriter copy use_aes 1
126 -QPDFParser indirect without context 0  
127 QPDFObjectHandle trailing data in parse 0 121 QPDFObjectHandle trailing data in parse 0
128 QPDFTokenizer EOF reading token 0 122 QPDFTokenizer EOF reading token 0
129 QPDFTokenizer EOF reading appendable token 0 123 QPDFTokenizer EOF reading appendable token 0
@@ -145,11 +139,7 @@ QPDFJob pages range omitted in middle 0 @@ -145,11 +139,7 @@ QPDFJob pages range omitted in middle 0
145 QPDFWriter standard deterministic ID 1 139 QPDFWriter standard deterministic ID 1
146 QPDFWriter linearized deterministic ID 1 140 QPDFWriter linearized deterministic ID 1
147 qpdf-c called qpdf_set_deterministic_ID 0 141 qpdf-c called qpdf_set_deterministic_ID 0
148 -QPDFParser invalid objgen 0  
149 -QPDFParser treat word as string 0  
150 -QPDFParser treat word as string in parseRemainder 0  
151 QPDFParser found fake 1 142 QPDFParser found fake 1
152 -QPDFParser no val for last key 0  
153 QPDFObjectHandle errors in parsecontent 0 143 QPDFObjectHandle errors in parsecontent 0
154 QPDFJob split-pages %d 0 144 QPDFJob split-pages %d 0
155 QPDFJob split-pages .pdf 0 145 QPDFJob split-pages .pdf 0
@@ -168,10 +158,6 @@ Pl_QPDFTokenizer found ID 0 @@ -168,10 +158,6 @@ Pl_QPDFTokenizer found ID 0
168 QPDFObjectHandle coalesce called on stream 0 158 QPDFObjectHandle coalesce called on stream 0
169 QPDFObjectHandle coalesce provide stream data 0 159 QPDFObjectHandle coalesce provide stream data 0
170 QPDF_Stream bad token at end during normalize 0 160 QPDF_Stream bad token at end during normalize 0
171 -QPDFParser bad token in parse 0  
172 -QPDFParser bad token in parseRemainder 0  
173 -QPDFParser eof in parse 0  
174 -QPDFParser eof in parseRemainder 0  
175 QPDFObjectHandle boolean returning false 0 161 QPDFObjectHandle boolean returning false 0
176 QPDFObjectHandle real returning 0.0 0 162 QPDFObjectHandle real returning 0.0 0
177 QPDFObjectHandle operator returning fake value 0 163 QPDFObjectHandle operator returning fake value 0
@@ -189,7 +175,6 @@ QPDFObjectHandle dictionary ignoring replaceKey 0 @@ -189,7 +175,6 @@ QPDFObjectHandle dictionary ignoring replaceKey 0
189 QPDFObjectHandle numeric non-numeric 0 175 QPDFObjectHandle numeric non-numeric 0
190 QPDFObjectHandle erase array bounds 0 176 QPDFObjectHandle erase array bounds 0
191 qpdf-c called qpdf_check_pdf 0 177 qpdf-c called qpdf_check_pdf 0
192 -QPDFParser too deep 0  
193 QPDFFormFieldObjectHelper TU present 0 178 QPDFFormFieldObjectHelper TU present 0
194 QPDFFormFieldObjectHelper TM present 0 179 QPDFFormFieldObjectHelper TM present 0
195 QPDFFormFieldObjectHelper TU absent 0 180 QPDFFormFieldObjectHelper TU absent 0
@@ -252,7 +237,6 @@ QPDFJob image optimize bits per component 0 @@ -252,7 +237,6 @@ QPDFJob image optimize bits per component 0
252 QPDF eof skipping spaces before xref 1 237 QPDF eof skipping spaces before xref 1
253 QPDF_encryption user matches owner V < 5 0 238 QPDF_encryption user matches owner V < 5 0
254 QPDF_encryption same password 1 239 QPDF_encryption same password 1
255 -QPDFParser duplicate dict key 0  
256 QPDFWriter no encryption sig contents 0 240 QPDFWriter no encryption sig contents 0
257 QPDFPageObjectHelper colorspace lookup 0 241 QPDFPageObjectHelper colorspace lookup 0
258 QPDFPageObjectHelper filter form xobject 0 242 QPDFPageObjectHelper filter form xobject 0