Commit b841c2d2e105d3dcb338864452f248ce872a92b7

Authored by m-holger
1 parent b0ee84e5

Enhance `QPDFParser` by introducing global `Limits` class for configurable const…

…raints, replacing hardcoded values for nesting, container size, and error limits.
libqpdf/CMakeLists.txt
... ... @@ -100,6 +100,7 @@ set(libqpdf_SOURCES
100 100 ResourceFinder.cc
101 101 SecureRandomDataProvider.cc
102 102 SF_FlateLzwDecode.cc
  103 + global.cc
103 104 qpdf-c.cc
104 105 qpdfjob-c.cc
105 106 qpdflogger-c.cc)
... ...
libqpdf/QPDFParser.cc
... ... @@ -15,6 +15,8 @@ using namespace qpdf;
15 15  
16 16 using ObjectPtr = std::shared_ptr<QPDFObject>;
17 17  
  18 +static uint32_t const& max_nesting{global::Limits::objects_max_nesting()};
  19 +
18 20 // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
19 21 // special access to allow the parser to create unresolved objects and dangling references.
20 22 class QPDF::Doc::ParseGuard
... ... @@ -170,27 +172,22 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
170 172 // In content stream mode, leave object uninitialized to indicate EOF
171 173 return {};
172 174 }
173   - QTC::TC("qpdf", "QPDFParser eof in parse");
174 175 warn("unexpected EOF");
175 176 return {QPDFObject::create<QPDF_Null>()};
176 177  
177 178 case QPDFTokenizer::tt_bad:
178   - QTC::TC("qpdf", "QPDFParser bad token in parse");
179 179 return {QPDFObject::create<QPDF_Null>()};
180 180  
181 181 case QPDFTokenizer::tt_brace_open:
182 182 case QPDFTokenizer::tt_brace_close:
183   - QTC::TC("qpdf", "QPDFParser bad brace");
184 183 warn("treating unexpected brace token as null");
185 184 return {QPDFObject::create<QPDF_Null>()};
186 185  
187 186 case QPDFTokenizer::tt_array_close:
188   - QTC::TC("qpdf", "QPDFParser bad array close");
189 187 warn("treating unexpected array close token as null");
190 188 return {QPDFObject::create<QPDF_Null>()};
191 189  
192 190 case QPDFTokenizer::tt_dict_close:
193   - QTC::TC("qpdf", "QPDFParser bad dictionary close");
194 191 warn("unexpected dictionary close token");
195 192 return {QPDFObject::create<QPDF_Null>()};
196 193  
... ... @@ -230,7 +227,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
230 227 empty = true;
231 228 return {QPDFObject::create<QPDF_Null>()};
232 229 } else {
233   - QTC::TC("qpdf", "QPDFParser treat word as string");
234 230 warn("unknown token while reading object; treating as string");
235 231 return withDescription<QPDF_String>(value);
236 232 }
... ... @@ -283,8 +279,7 @@ QPDFParser::parseRemainder(bool content_stream)
283 279 } else if (
284 280 int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
285 281 tokenizer.getValue() == "R") {
286   - if (context == nullptr) {
287   - QTC::TC("qpdf", "QPDFParser indirect without context");
  282 + if (!context) {
288 283 throw std::logic_error(
289 284 "QPDFParser::parse called without context on an object "
290 285 "with indirect references");
... ... @@ -294,7 +289,6 @@ QPDFParser::parseRemainder(bool content_stream)
294 289 if (!(id < 1 || gen < 0 || gen >= 65535)) {
295 290 add(ParseGuard::getObject(context, id, gen, parse_pdf));
296 291 } else {
297   - QTC::TC("qpdf", "QPDFParser invalid objgen");
298 292 addNull();
299 293 }
300 294 int_count = 0;
... ... @@ -317,12 +311,10 @@ QPDFParser::parseRemainder(bool content_stream)
317 311 // In content stream mode, leave object uninitialized to indicate EOF
318 312 return {};
319 313 }
320   - QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
321 314 warn("unexpected EOF");
322 315 return {QPDFObject::create<QPDF_Null>()};
323 316  
324 317 case QPDFTokenizer::tt_bad:
325   - QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
326 318 if (tooManyBadTokens()) {
327 319 return {QPDFObject::create<QPDF_Null>()};
328 320 }
... ... @@ -331,7 +323,6 @@ QPDFParser::parseRemainder(bool content_stream)
331 323  
332 324 case QPDFTokenizer::tt_brace_open:
333 325 case QPDFTokenizer::tt_brace_close:
334   - QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
335 326 warn("treating unexpected brace token as null");
336 327 if (tooManyBadTokens()) {
337 328 return {QPDFObject::create<QPDF_Null>()};
... ... @@ -361,7 +352,6 @@ QPDFParser::parseRemainder(bool content_stream)
361 352 frame = &stack.back();
362 353 add(std::move(object));
363 354 } else {
364   - QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
365 355 if (sanity_checks) {
366 356 // During sanity checks, assume nesting of containers is corrupt and object is
367 357 // unusable.
... ... @@ -387,7 +377,6 @@ QPDFParser::parseRemainder(bool content_stream)
387 377 auto& dict = frame->dict;
388 378  
389 379 if (frame->state == st_dictionary_value) {
390   - QTC::TC("qpdf", "QPDFParser no val for last key");
391 380 warn(
392 381 frame->offset,
393 382 "dictionary ended prematurely; using null as value for last key");
... ... @@ -438,8 +427,7 @@ QPDFParser::parseRemainder(bool content_stream)
438 427  
439 428 case QPDFTokenizer::tt_array_open:
440 429 case QPDFTokenizer::tt_dict_open:
441   - if (stack.size() > 499) {
442   - QTC::TC("qpdf", "QPDFParser too deep");
  430 + if (stack.size() > max_nesting) {
443 431 warn("ignoring excessively deeply nested data structure");
444 432 return {QPDFObject::create<QPDF_Null>()};
445 433 } else {
... ... @@ -510,7 +498,6 @@ QPDFParser::parseRemainder(bool content_stream)
510 498 continue;
511 499 }
512 500  
513   - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
514 501 warn("unknown token while reading object; treating as string");
515 502 if (tooManyBadTokens()) {
516 503 return {QPDFObject::create<QPDF_Null>()};
... ... @@ -592,8 +579,8 @@ template &lt;typename T, typename... Args&gt;
592 579 void
593 580 QPDFParser::addScalar(Args&&... args)
594 581 {
595   - if ((bad_count || sanity_checks) &&
596   - (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
  582 + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
  583 + if (frame->olist.size() > limit || frame->dict.size() > limit) {
597 584 // Stop adding scalars. We are going to abort when the close token or a bad token is
598 585 // encountered.
599 586 max_bad_count = 0;
... ... @@ -650,16 +637,17 @@ QPDFParser::fixMissingKeys()
650 637 bool
651 638 QPDFParser::tooManyBadTokens()
652 639 {
653   - if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
  640 + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
  641 + if (frame->olist.size() > limit || frame->dict.size() > limit) {
654 642 if (bad_count) {
655 643 warn(
656   - "encountered errors while parsing an array or dictionary with more than 5000 "
657   - "elements; giving up on reading object");
  644 + "encountered errors while parsing an array or dictionary with more than " +
  645 + std::to_string(limit) + " elements; giving up on reading object");
658 646 return true;
659 647 }
660 648 warn(
661   - "encountered an array or dictionary with more than 5000 elements during xref recovery; "
662   - "giving up on reading object");
  649 + "encountered an array or dictionary with more than " + std::to_string(limit) +
  650 + " elements during xref recovery; giving up on reading object");
663 651 }
664 652 if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
665 653 good_count = 0;
... ... @@ -693,7 +681,6 @@ QPDFParser::warn(QPDFExc const&amp; e) const
693 681 void
694 682 QPDFParser::warnDuplicateKey()
695 683 {
696   - QTC::TC("qpdf", "QPDFParser duplicate dict key");
697 684 warn(
698 685 frame->offset,
699 686 "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
... ...
libqpdf/global.cc 0 → 100644
  1 +#include <qpdf/global_private.hh>
  2 +
  3 +using namespace qpdf;
  4 +
  5 +global::Limits global::Limits::l;
... ...
libqpdf/qpdf/QPDFObject_private.hh
... ... @@ -5,11 +5,12 @@
5 5 // include/qpdf/QPDFObject.hh. See comments there for an explanation.
6 6  
7 7 #include <qpdf/Constants.h>
  8 +#include <qpdf/Types.h>
  9 +
8 10 #include <qpdf/JSON.hh>
9 11 #include <qpdf/JSON_writer.hh>
10 12 #include <qpdf/QPDF.hh>
11 13 #include <qpdf/QPDFObjGen.hh>
12   -#include <qpdf/Types.h>
13 14  
14 15 #include <map>
15 16 #include <memory>
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -5,10 +5,14 @@
5 5 #include <qpdf/QPDFObjectHandle_private.hh>
6 6 #include <qpdf/QPDFObject_private.hh>
7 7 #include <qpdf/QPDFTokenizer_private.hh>
  8 +#include <qpdf/global_private.hh>
8 9  
9 10 #include <memory>
10 11 #include <string>
11 12  
  13 +using namespace qpdf;
  14 +using namespace qpdf::global;
  15 +
12 16 class QPDFParser
13 17 {
14 18 public:
... ... @@ -136,7 +140,7 @@ class QPDFParser
136 140 // it only gets incremented or reset when a bad token is encountered.
137 141 int bad_count{0};
138 142 // Number of bad tokens (remaining) before giving up.
139   - int max_bad_count{15};
  143 + uint32_t max_bad_count{Limits::objects_max_errors()};
140 144 // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
141 145 int good_count{0};
142 146 // Start offset including any leading whitespace.
... ...
libqpdf/qpdf/global_private.hh 0 → 100644
  1 +
  2 +#ifndef GLOBAL_PRIVATE_HH
  3 +#define GLOBAL_PRIVATE_HH
  4 +
  5 +#include <qpdf/Constants.h>
  6 +
  7 +#include <cstdint>
  8 +#include <limits>
  9 +
  10 +namespace qpdf
  11 +{
  12 + namespace global
  13 + {
  14 + class Limits
  15 + {
  16 + public:
  17 + Limits(Limits const&) = delete;
  18 + Limits(Limits&&) = delete;
  19 + Limits& operator=(Limits const&) = delete;
  20 + Limits& operator=(Limits&&) = delete;
  21 +
  22 + static uint32_t const&
  23 + objects_max_nesting()
  24 + {
  25 + return l.objects_max_nesting_;
  26 + }
  27 +
  28 + static uint32_t const&
  29 + objects_max_errors()
  30 + {
  31 + return l.objects_max_errors_;
  32 + }
  33 +
  34 + static uint32_t const&
  35 + objects_max_container_size(bool damaged)
  36 + {
  37 + return damaged ? l.objects_max_container_size_damaged_
  38 + : l.objects_max_container_size_;
  39 + }
  40 +
  41 + private:
  42 + Limits() = default;
  43 + ~Limits() = default;
  44 +
  45 + static Limits l;
  46 +
  47 + uint32_t objects_max_nesting_{499};
  48 + uint32_t objects_max_errors_{15};
  49 + uint32_t objects_max_container_size_{std::numeric_limits<uint32_t>::max()};
  50 + uint32_t objects_max_container_size_damaged_{5'000};
  51 + };
  52 +
  53 + } // namespace global
  54 +
  55 +} // namespace qpdf
  56 +
  57 +#endif // GLOBAL_PRIVATE_HH
... ...
qpdf/qpdf.testcov
... ... @@ -27,11 +27,6 @@ main QTest stream 0
27 27 QPDF lin write nshared_total > nshared_first_page 1
28 28 QPDFWriter encrypted hint stream 0
29 29 QPDF xref gen > 0 1
30   -QPDFParser bad brace 0
31   -QPDFParser bad brace in parseRemainder 0
32   -QPDFParser bad array close 0
33   -QPDFParser bad array close in parseRemainder 0
34   -QPDFParser bad dictionary close 0
35 30 QPDFTokenizer bad ) 0
36 31 QPDFTokenizer bad > 0
37 32 QPDFTokenizer bad hexstring character 0
... ... @@ -123,7 +118,6 @@ QPDF_Stream provider length not provided 0
123 118 QPDF_Stream unknown stream length 0
124 119 QPDF replaceReserved 0
125 120 QPDFWriter copy use_aes 1
126   -QPDFParser indirect without context 0
127 121 QPDFObjectHandle trailing data in parse 0
128 122 QPDFTokenizer EOF reading token 0
129 123 QPDFTokenizer EOF reading appendable token 0
... ... @@ -145,11 +139,7 @@ QPDFJob pages range omitted in middle 0
145 139 QPDFWriter standard deterministic ID 1
146 140 QPDFWriter linearized deterministic ID 1
147 141 qpdf-c called qpdf_set_deterministic_ID 0
148   -QPDFParser invalid objgen 0
149   -QPDFParser treat word as string 0
150   -QPDFParser treat word as string in parseRemainder 0
151 142 QPDFParser found fake 1
152   -QPDFParser no val for last key 0
153 143 QPDFObjectHandle errors in parsecontent 0
154 144 QPDFJob split-pages %d 0
155 145 QPDFJob split-pages .pdf 0
... ... @@ -168,10 +158,6 @@ Pl_QPDFTokenizer found ID 0
168 158 QPDFObjectHandle coalesce called on stream 0
169 159 QPDFObjectHandle coalesce provide stream data 0
170 160 QPDF_Stream bad token at end during normalize 0
171   -QPDFParser bad token in parse 0
172   -QPDFParser bad token in parseRemainder 0
173   -QPDFParser eof in parse 0
174   -QPDFParser eof in parseRemainder 0
175 161 QPDFObjectHandle boolean returning false 0
176 162 QPDFObjectHandle real returning 0.0 0
177 163 QPDFObjectHandle operator returning fake value 0
... ... @@ -189,7 +175,6 @@ QPDFObjectHandle dictionary ignoring replaceKey 0
189 175 QPDFObjectHandle numeric non-numeric 0
190 176 QPDFObjectHandle erase array bounds 0
191 177 qpdf-c called qpdf_check_pdf 0
192   -QPDFParser too deep 0
193 178 QPDFFormFieldObjectHelper TU present 0
194 179 QPDFFormFieldObjectHelper TM present 0
195 180 QPDFFormFieldObjectHelper TU absent 0
... ... @@ -252,7 +237,6 @@ QPDFJob image optimize bits per component 0
252 237 QPDF eof skipping spaces before xref 1
253 238 QPDF_encryption user matches owner V < 5 0
254 239 QPDF_encryption same password 1
255   -QPDFParser duplicate dict key 0
256 240 QPDFWriter no encryption sig contents 0
257 241 QPDFPageObjectHelper colorspace lookup 0
258 242 QPDFPageObjectHelper filter form xobject 0
... ...