Commit b841c2d2e105d3dcb338864452f248ce872a92b7
1 parent
b0ee84e5
Enhance `QPDFParser` by introducing global `Limits` class for configurable const…
…raints, replacing hardcoded values for nesting, container size, and error limits.
Showing
7 changed files
with
82 additions
and
43 deletions
libqpdf/CMakeLists.txt
libqpdf/QPDFParser.cc
| ... | ... | @@ -15,6 +15,8 @@ using namespace qpdf; |
| 15 | 15 | |
| 16 | 16 | using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 17 | 17 | |
| 18 | +static uint32_t const& max_nesting{global::Limits::objects_max_nesting()}; | |
| 19 | + | |
| 18 | 20 | // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides |
| 19 | 21 | // special access to allow the parser to create unresolved objects and dangling references. |
| 20 | 22 | class QPDF::Doc::ParseGuard |
| ... | ... | @@ -170,27 +172,22 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 170 | 172 | // In content stream mode, leave object uninitialized to indicate EOF |
| 171 | 173 | return {}; |
| 172 | 174 | } |
| 173 | - QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 174 | 175 | warn("unexpected EOF"); |
| 175 | 176 | return {QPDFObject::create<QPDF_Null>()}; |
| 176 | 177 | |
| 177 | 178 | case QPDFTokenizer::tt_bad: |
| 178 | - QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 179 | 179 | return {QPDFObject::create<QPDF_Null>()}; |
| 180 | 180 | |
| 181 | 181 | case QPDFTokenizer::tt_brace_open: |
| 182 | 182 | case QPDFTokenizer::tt_brace_close: |
| 183 | - QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 184 | 183 | warn("treating unexpected brace token as null"); |
| 185 | 184 | return {QPDFObject::create<QPDF_Null>()}; |
| 186 | 185 | |
| 187 | 186 | case QPDFTokenizer::tt_array_close: |
| 188 | - QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 189 | 187 | warn("treating unexpected array close token as null"); |
| 190 | 188 | return {QPDFObject::create<QPDF_Null>()}; |
| 191 | 189 | |
| 192 | 190 | case QPDFTokenizer::tt_dict_close: |
| 193 | - QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 194 | 191 | warn("unexpected dictionary close token"); |
| 195 | 192 | return {QPDFObject::create<QPDF_Null>()}; |
| 196 | 193 | |
| ... | ... | @@ -230,7 +227,6 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 230 | 227 | empty = true; |
| 231 | 228 | return {QPDFObject::create<QPDF_Null>()}; |
| 232 | 229 | } else { |
| 233 | - QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 234 | 230 | warn("unknown token while reading object; treating as string"); |
| 235 | 231 | return withDescription<QPDF_String>(value); |
| 236 | 232 | } |
| ... | ... | @@ -283,8 +279,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 283 | 279 | } else if ( |
| 284 | 280 | int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && |
| 285 | 281 | tokenizer.getValue() == "R") { |
| 286 | - if (context == nullptr) { | |
| 287 | - QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 282 | + if (!context) { | |
| 288 | 283 | throw std::logic_error( |
| 289 | 284 | "QPDFParser::parse called without context on an object " |
| 290 | 285 | "with indirect references"); |
| ... | ... | @@ -294,7 +289,6 @@ QPDFParser::parseRemainder(bool content_stream) |
| 294 | 289 | if (!(id < 1 || gen < 0 || gen >= 65535)) { |
| 295 | 290 | add(ParseGuard::getObject(context, id, gen, parse_pdf)); |
| 296 | 291 | } else { |
| 297 | - QTC::TC("qpdf", "QPDFParser invalid objgen"); | |
| 298 | 292 | addNull(); |
| 299 | 293 | } |
| 300 | 294 | int_count = 0; |
| ... | ... | @@ -317,12 +311,10 @@ QPDFParser::parseRemainder(bool content_stream) |
| 317 | 311 | // In content stream mode, leave object uninitialized to indicate EOF |
| 318 | 312 | return {}; |
| 319 | 313 | } |
| 320 | - QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); | |
| 321 | 314 | warn("unexpected EOF"); |
| 322 | 315 | return {QPDFObject::create<QPDF_Null>()}; |
| 323 | 316 | |
| 324 | 317 | case QPDFTokenizer::tt_bad: |
| 325 | - QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); | |
| 326 | 318 | if (tooManyBadTokens()) { |
| 327 | 319 | return {QPDFObject::create<QPDF_Null>()}; |
| 328 | 320 | } |
| ... | ... | @@ -331,7 +323,6 @@ QPDFParser::parseRemainder(bool content_stream) |
| 331 | 323 | |
| 332 | 324 | case QPDFTokenizer::tt_brace_open: |
| 333 | 325 | case QPDFTokenizer::tt_brace_close: |
| 334 | - QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); | |
| 335 | 326 | warn("treating unexpected brace token as null"); |
| 336 | 327 | if (tooManyBadTokens()) { |
| 337 | 328 | return {QPDFObject::create<QPDF_Null>()}; |
| ... | ... | @@ -361,7 +352,6 @@ QPDFParser::parseRemainder(bool content_stream) |
| 361 | 352 | frame = &stack.back(); |
| 362 | 353 | add(std::move(object)); |
| 363 | 354 | } else { |
| 364 | - QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); | |
| 365 | 355 | if (sanity_checks) { |
| 366 | 356 | // During sanity checks, assume nesting of containers is corrupt and object is |
| 367 | 357 | // unusable. |
| ... | ... | @@ -387,7 +377,6 @@ QPDFParser::parseRemainder(bool content_stream) |
| 387 | 377 | auto& dict = frame->dict; |
| 388 | 378 | |
| 389 | 379 | if (frame->state == st_dictionary_value) { |
| 390 | - QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 391 | 380 | warn( |
| 392 | 381 | frame->offset, |
| 393 | 382 | "dictionary ended prematurely; using null as value for last key"); |
| ... | ... | @@ -438,8 +427,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 438 | 427 | |
| 439 | 428 | case QPDFTokenizer::tt_array_open: |
| 440 | 429 | case QPDFTokenizer::tt_dict_open: |
| 441 | - if (stack.size() > 499) { | |
| 442 | - QTC::TC("qpdf", "QPDFParser too deep"); | |
| 430 | + if (stack.size() > max_nesting) { | |
| 443 | 431 | warn("ignoring excessively deeply nested data structure"); |
| 444 | 432 | return {QPDFObject::create<QPDF_Null>()}; |
| 445 | 433 | } else { |
| ... | ... | @@ -510,7 +498,6 @@ QPDFParser::parseRemainder(bool content_stream) |
| 510 | 498 | continue; |
| 511 | 499 | } |
| 512 | 500 | |
| 513 | - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | |
| 514 | 501 | warn("unknown token while reading object; treating as string"); |
| 515 | 502 | if (tooManyBadTokens()) { |
| 516 | 503 | return {QPDFObject::create<QPDF_Null>()}; |
| ... | ... | @@ -592,8 +579,8 @@ template <typename T, typename... Args> |
| 592 | 579 | void |
| 593 | 580 | QPDFParser::addScalar(Args&&... args) |
| 594 | 581 | { |
| 595 | - if ((bad_count || sanity_checks) && | |
| 596 | - (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | |
| 582 | + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | |
| 583 | + if (frame->olist.size() > limit || frame->dict.size() > limit) { | |
| 597 | 584 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 598 | 585 | // encountered. |
| 599 | 586 | max_bad_count = 0; |
| ... | ... | @@ -650,16 +637,17 @@ QPDFParser::fixMissingKeys() |
| 650 | 637 | bool |
| 651 | 638 | QPDFParser::tooManyBadTokens() |
| 652 | 639 | { |
| 653 | - if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { | |
| 640 | + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | |
| 641 | + if (frame->olist.size() > limit || frame->dict.size() > limit) { | |
| 654 | 642 | if (bad_count) { |
| 655 | 643 | warn( |
| 656 | - "encountered errors while parsing an array or dictionary with more than 5000 " | |
| 657 | - "elements; giving up on reading object"); | |
| 644 | + "encountered errors while parsing an array or dictionary with more than " + | |
| 645 | + std::to_string(limit) + " elements; giving up on reading object"); | |
| 658 | 646 | return true; |
| 659 | 647 | } |
| 660 | 648 | warn( |
| 661 | - "encountered an array or dictionary with more than 5000 elements during xref recovery; " | |
| 662 | - "giving up on reading object"); | |
| 649 | + "encountered an array or dictionary with more than " + std::to_string(limit) + | |
| 650 | + " elements during xref recovery; giving up on reading object"); | |
| 663 | 651 | } |
| 664 | 652 | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { |
| 665 | 653 | good_count = 0; |
| ... | ... | @@ -693,7 +681,6 @@ QPDFParser::warn(QPDFExc const& e) const |
| 693 | 681 | void |
| 694 | 682 | QPDFParser::warnDuplicateKey() |
| 695 | 683 | { |
| 696 | - QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 697 | 684 | warn( |
| 698 | 685 | frame->offset, |
| 699 | 686 | "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); | ... | ... |
libqpdf/global.cc
0 → 100644
libqpdf/qpdf/QPDFObject_private.hh
| ... | ... | @@ -5,11 +5,12 @@ |
| 5 | 5 | // include/qpdf/QPDFObject.hh. See comments there for an explanation. |
| 6 | 6 | |
| 7 | 7 | #include <qpdf/Constants.h> |
| 8 | +#include <qpdf/Types.h> | |
| 9 | + | |
| 8 | 10 | #include <qpdf/JSON.hh> |
| 9 | 11 | #include <qpdf/JSON_writer.hh> |
| 10 | 12 | #include <qpdf/QPDF.hh> |
| 11 | 13 | #include <qpdf/QPDFObjGen.hh> |
| 12 | -#include <qpdf/Types.h> | |
| 13 | 14 | |
| 14 | 15 | #include <map> |
| 15 | 16 | #include <memory> | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -5,10 +5,14 @@ |
| 5 | 5 | #include <qpdf/QPDFObjectHandle_private.hh> |
| 6 | 6 | #include <qpdf/QPDFObject_private.hh> |
| 7 | 7 | #include <qpdf/QPDFTokenizer_private.hh> |
| 8 | +#include <qpdf/global_private.hh> | |
| 8 | 9 | |
| 9 | 10 | #include <memory> |
| 10 | 11 | #include <string> |
| 11 | 12 | |
| 13 | +using namespace qpdf; | |
| 14 | +using namespace qpdf::global; | |
| 15 | + | |
| 12 | 16 | class QPDFParser |
| 13 | 17 | { |
| 14 | 18 | public: |
| ... | ... | @@ -136,7 +140,7 @@ class QPDFParser |
| 136 | 140 | // it only gets incremented or reset when a bad token is encountered. |
| 137 | 141 | int bad_count{0}; |
| 138 | 142 | // Number of bad tokens (remaining) before giving up. |
| 139 | - int max_bad_count{15}; | |
| 143 | + uint32_t max_bad_count{Limits::objects_max_errors()}; | |
| 140 | 144 | // Number of good tokens since last bad token. Irrelevant if bad_count == 0. |
| 141 | 145 | int good_count{0}; |
| 142 | 146 | // Start offset including any leading whitespace. | ... | ... |
libqpdf/qpdf/global_private.hh
0 → 100644
| 1 | + | |
| 2 | +#ifndef GLOBAL_PRIVATE_HH | |
| 3 | +#define GLOBAL_PRIVATE_HH | |
| 4 | + | |
| 5 | +#include <qpdf/Constants.h> | |
| 6 | + | |
| 7 | +#include <cstdint> | |
| 8 | +#include <limits> | |
| 9 | + | |
| 10 | +namespace qpdf | |
| 11 | +{ | |
| 12 | + namespace global | |
| 13 | + { | |
| 14 | + class Limits | |
| 15 | + { | |
| 16 | + public: | |
| 17 | + Limits(Limits const&) = delete; | |
| 18 | + Limits(Limits&&) = delete; | |
| 19 | + Limits& operator=(Limits const&) = delete; | |
| 20 | + Limits& operator=(Limits&&) = delete; | |
| 21 | + | |
| 22 | + static uint32_t const& | |
| 23 | + objects_max_nesting() | |
| 24 | + { | |
| 25 | + return l.objects_max_nesting_; | |
| 26 | + } | |
| 27 | + | |
| 28 | + static uint32_t const& | |
| 29 | + objects_max_errors() | |
| 30 | + { | |
| 31 | + return l.objects_max_errors_; | |
| 32 | + } | |
| 33 | + | |
| 34 | + static uint32_t const& | |
| 35 | + objects_max_container_size(bool damaged) | |
| 36 | + { | |
| 37 | + return damaged ? l.objects_max_container_size_damaged_ | |
| 38 | + : l.objects_max_container_size_; | |
| 39 | + } | |
| 40 | + | |
| 41 | + private: | |
| 42 | + Limits() = default; | |
| 43 | + ~Limits() = default; | |
| 44 | + | |
| 45 | + static Limits l; | |
| 46 | + | |
| 47 | + uint32_t objects_max_nesting_{499}; | |
| 48 | + uint32_t objects_max_errors_{15}; | |
| 49 | + uint32_t objects_max_container_size_{std::numeric_limits<uint32_t>::max()}; | |
| 50 | + uint32_t objects_max_container_size_damaged_{5'000}; | |
| 51 | + }; | |
| 52 | + | |
| 53 | + } // namespace global | |
| 54 | + | |
| 55 | +} // namespace qpdf | |
| 56 | + | |
| 57 | +#endif // GLOBAL_PRIVATE_HH | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -27,11 +27,6 @@ main QTest stream 0 |
| 27 | 27 | QPDF lin write nshared_total > nshared_first_page 1 |
| 28 | 28 | QPDFWriter encrypted hint stream 0 |
| 29 | 29 | QPDF xref gen > 0 1 |
| 30 | -QPDFParser bad brace 0 | |
| 31 | -QPDFParser bad brace in parseRemainder 0 | |
| 32 | -QPDFParser bad array close 0 | |
| 33 | -QPDFParser bad array close in parseRemainder 0 | |
| 34 | -QPDFParser bad dictionary close 0 | |
| 35 | 30 | QPDFTokenizer bad ) 0 |
| 36 | 31 | QPDFTokenizer bad > 0 |
| 37 | 32 | QPDFTokenizer bad hexstring character 0 |
| ... | ... | @@ -123,7 +118,6 @@ QPDF_Stream provider length not provided 0 |
| 123 | 118 | QPDF_Stream unknown stream length 0 |
| 124 | 119 | QPDF replaceReserved 0 |
| 125 | 120 | QPDFWriter copy use_aes 1 |
| 126 | -QPDFParser indirect without context 0 | |
| 127 | 121 | QPDFObjectHandle trailing data in parse 0 |
| 128 | 122 | QPDFTokenizer EOF reading token 0 |
| 129 | 123 | QPDFTokenizer EOF reading appendable token 0 |
| ... | ... | @@ -145,11 +139,7 @@ QPDFJob pages range omitted in middle 0 |
| 145 | 139 | QPDFWriter standard deterministic ID 1 |
| 146 | 140 | QPDFWriter linearized deterministic ID 1 |
| 147 | 141 | qpdf-c called qpdf_set_deterministic_ID 0 |
| 148 | -QPDFParser invalid objgen 0 | |
| 149 | -QPDFParser treat word as string 0 | |
| 150 | -QPDFParser treat word as string in parseRemainder 0 | |
| 151 | 142 | QPDFParser found fake 1 |
| 152 | -QPDFParser no val for last key 0 | |
| 153 | 143 | QPDFObjectHandle errors in parsecontent 0 |
| 154 | 144 | QPDFJob split-pages %d 0 |
| 155 | 145 | QPDFJob split-pages .pdf 0 |
| ... | ... | @@ -168,10 +158,6 @@ Pl_QPDFTokenizer found ID 0 |
| 168 | 158 | QPDFObjectHandle coalesce called on stream 0 |
| 169 | 159 | QPDFObjectHandle coalesce provide stream data 0 |
| 170 | 160 | QPDF_Stream bad token at end during normalize 0 |
| 171 | -QPDFParser bad token in parse 0 | |
| 172 | -QPDFParser bad token in parseRemainder 0 | |
| 173 | -QPDFParser eof in parse 0 | |
| 174 | -QPDFParser eof in parseRemainder 0 | |
| 175 | 161 | QPDFObjectHandle boolean returning false 0 |
| 176 | 162 | QPDFObjectHandle real returning 0.0 0 |
| 177 | 163 | QPDFObjectHandle operator returning fake value 0 |
| ... | ... | @@ -189,7 +175,6 @@ QPDFObjectHandle dictionary ignoring replaceKey 0 |
| 189 | 175 | QPDFObjectHandle numeric non-numeric 0 |
| 190 | 176 | QPDFObjectHandle erase array bounds 0 |
| 191 | 177 | qpdf-c called qpdf_check_pdf 0 |
| 192 | -QPDFParser too deep 0 | |
| 193 | 178 | QPDFFormFieldObjectHelper TU present 0 |
| 194 | 179 | QPDFFormFieldObjectHelper TM present 0 |
| 195 | 180 | QPDFFormFieldObjectHelper TU absent 0 |
| ... | ... | @@ -252,7 +237,6 @@ QPDFJob image optimize bits per component 0 |
| 252 | 237 | QPDF eof skipping spaces before xref 1 |
| 253 | 238 | QPDF_encryption user matches owner V < 5 0 |
| 254 | 239 | QPDF_encryption same password 1 |
| 255 | -QPDFParser duplicate dict key 0 | |
| 256 | 240 | QPDFWriter no encryption sig contents 0 |
| 257 | 241 | QPDFPageObjectHelper colorspace lookup 0 |
| 258 | 242 | QPDFPageObjectHelper filter form xobject 0 | ... | ... |