diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt index d8f15ca..1f1580b 100644 --- a/libqpdf/CMakeLists.txt +++ b/libqpdf/CMakeLists.txt @@ -100,6 +100,7 @@ set(libqpdf_SOURCES ResourceFinder.cc SecureRandomDataProvider.cc SF_FlateLzwDecode.cc + global.cc qpdf-c.cc qpdfjob-c.cc qpdflogger-c.cc) diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index d147e60..38d8b01 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -15,6 +15,8 @@ using namespace qpdf; using ObjectPtr = std::shared_ptr; +static uint32_t const& max_nesting{global::Limits::objects_max_nesting()}; + // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides // special access to allow the parser to create unresolved objects and dangling references. class QPDF::Doc::ParseGuard @@ -170,27 +172,22 @@ QPDFParser::parse(bool& empty, bool content_stream) // In content stream mode, leave object uninitialized to indicate EOF return {}; } - QTC::TC("qpdf", "QPDFParser eof in parse"); warn("unexpected EOF"); return {QPDFObject::create()}; case QPDFTokenizer::tt_bad: - QTC::TC("qpdf", "QPDFParser bad token in parse"); return {QPDFObject::create()}; case QPDFTokenizer::tt_brace_open: case QPDFTokenizer::tt_brace_close: - QTC::TC("qpdf", "QPDFParser bad brace"); warn("treating unexpected brace token as null"); return {QPDFObject::create()}; case QPDFTokenizer::tt_array_close: - QTC::TC("qpdf", "QPDFParser bad array close"); warn("treating unexpected array close token as null"); return {QPDFObject::create()}; case QPDFTokenizer::tt_dict_close: - QTC::TC("qpdf", "QPDFParser bad dictionary close"); warn("unexpected dictionary close token"); return {QPDFObject::create()}; @@ -230,7 +227,6 @@ QPDFParser::parse(bool& empty, bool content_stream) empty = true; return {QPDFObject::create()}; } else { - QTC::TC("qpdf", "QPDFParser treat word as string"); warn("unknown token while reading object; treating as string"); return withDescription(value); } @@ -283,8 +279,7 @@ QPDFParser::parseRemainder(bool content_stream) } else if ( int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && tokenizer.getValue() == "R") { - if (context == nullptr) { - QTC::TC("qpdf", "QPDFParser indirect without context"); + if (!context) { throw std::logic_error( "QPDFParser::parse called without context on an object " "with indirect references"); @@ -294,7 +289,6 @@ QPDFParser::parseRemainder(bool content_stream) if (!(id < 1 || gen < 0 || gen >= 65535)) { add(ParseGuard::getObject(context, id, gen, parse_pdf)); } else { - QTC::TC("qpdf", "QPDFParser invalid objgen"); addNull(); } int_count = 0; @@ -317,12 +311,10 @@ QPDFParser::parseRemainder(bool content_stream) // In content stream mode, leave object uninitialized to indicate EOF return {}; } - QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); warn("unexpected EOF"); return {QPDFObject::create()}; case QPDFTokenizer::tt_bad: - QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); if (tooManyBadTokens()) { return {QPDFObject::create()}; } @@ -331,7 +323,6 @@ QPDFParser::parseRemainder(bool content_stream) case QPDFTokenizer::tt_brace_open: case QPDFTokenizer::tt_brace_close: - QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); warn("treating unexpected brace token as null"); if (tooManyBadTokens()) { return {QPDFObject::create()}; @@ -361,7 +352,6 @@ QPDFParser::parseRemainder(bool content_stream) frame = &stack.back(); add(std::move(object)); } else { - QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); if (sanity_checks) { // During sanity checks, assume nesting of containers is corrupt and object is // unusable. @@ -387,7 +377,6 @@ QPDFParser::parseRemainder(bool content_stream) auto& dict = frame->dict; if (frame->state == st_dictionary_value) { - QTC::TC("qpdf", "QPDFParser no val for last key"); warn( frame->offset, "dictionary ended prematurely; using null as value for last key"); @@ -438,8 +427,7 @@ QPDFParser::parseRemainder(bool content_stream) case QPDFTokenizer::tt_array_open: case QPDFTokenizer::tt_dict_open: - if (stack.size() > 499) { - QTC::TC("qpdf", "QPDFParser too deep"); + if (stack.size() > max_nesting) { warn("ignoring excessively deeply nested data structure"); return {QPDFObject::create()}; } else { @@ -510,7 +498,6 @@ QPDFParser::parseRemainder(bool content_stream) continue; } - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); warn("unknown token while reading object; treating as string"); if (tooManyBadTokens()) { return {QPDFObject::create()}; @@ -592,8 +579,8 @@ template void QPDFParser::addScalar(Args&&... args) { - if ((bad_count || sanity_checks) && - (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); + if (frame->olist.size() > limit || frame->dict.size() > limit) { // Stop adding scalars. We are going to abort when the close token or a bad token is // encountered. max_bad_count = 0; @@ -650,16 +637,17 @@ QPDFParser::fixMissingKeys() bool QPDFParser::tooManyBadTokens() { - if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); + if (frame->olist.size() > limit || frame->dict.size() > limit) { if (bad_count) { warn( - "encountered errors while parsing an array or dictionary with more than 5000 " - "elements; giving up on reading object"); + "encountered errors while parsing an array or dictionary with more than " + + std::to_string(limit) + " elements; giving up on reading object"); return true; } warn( - "encountered an array or dictionary with more than 5000 elements during xref recovery; " - "giving up on reading object"); + "encountered an array or dictionary with more than " + std::to_string(limit) + + " elements during xref recovery; giving up on reading object"); } if (max_bad_count && --max_bad_count > 0 && good_count > 4) { good_count = 0; @@ -693,7 +681,6 @@ QPDFParser::warn(QPDFExc const& e) const void QPDFParser::warnDuplicateKey() { - QTC::TC("qpdf", "QPDFParser duplicate dict key"); warn( frame->offset, "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); diff --git a/libqpdf/global.cc b/libqpdf/global.cc new file mode 100644 index 0000000..33f778d --- /dev/null +++ b/libqpdf/global.cc @@ -0,0 +1,5 @@ +#include + +using namespace qpdf; + +global::Limits global::Limits::l; diff --git a/libqpdf/qpdf/QPDFObject_private.hh b/libqpdf/qpdf/QPDFObject_private.hh index 43ad341..36f973a 100644 --- a/libqpdf/qpdf/QPDFObject_private.hh +++ b/libqpdf/qpdf/QPDFObject_private.hh @@ -5,11 +5,12 @@ // include/qpdf/QPDFObject.hh. See comments there for an explanation. #include +#include + #include #include #include #include -#include #include #include diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index bd8d6dd..22d24db 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -5,10 +5,14 @@ #include #include #include +#include #include #include +using namespace qpdf; +using namespace qpdf::global; + class QPDFParser { public: @@ -136,7 +140,7 @@ class QPDFParser // it only gets incremented or reset when a bad token is encountered. int bad_count{0}; // Number of bad tokens (remaining) before giving up. - int max_bad_count{15}; + uint32_t max_bad_count{Limits::objects_max_errors()}; // Number of good tokens since last bad token. Irrelevant if bad_count == 0. int good_count{0}; // Start offset including any leading whitespace. diff --git a/libqpdf/qpdf/global_private.hh b/libqpdf/qpdf/global_private.hh new file mode 100644 index 0000000..334f351 --- /dev/null +++ b/libqpdf/qpdf/global_private.hh @@ -0,0 +1,57 @@ + +#ifndef GLOBAL_PRIVATE_HH +#define GLOBAL_PRIVATE_HH + +#include + +#include +#include + +namespace qpdf +{ + namespace global + { + class Limits + { + public: + Limits(Limits const&) = delete; + Limits(Limits&&) = delete; + Limits& operator=(Limits const&) = delete; + Limits& operator=(Limits&&) = delete; + + static uint32_t const& + objects_max_nesting() + { + return l.objects_max_nesting_; + } + + static uint32_t const& + objects_max_errors() + { + return l.objects_max_errors_; + } + + static uint32_t const& + objects_max_container_size(bool damaged) + { + return damaged ? l.objects_max_container_size_damaged_ + : l.objects_max_container_size_; + } + + private: + Limits() = default; + ~Limits() = default; + + static Limits l; + + uint32_t objects_max_nesting_{499}; + uint32_t objects_max_errors_{15}; + uint32_t objects_max_container_size_{std::numeric_limits::max()}; + uint32_t objects_max_container_size_damaged_{5'000}; + }; + + } // namespace global + +} // namespace qpdf + +#endif // GLOBAL_PRIVATE_HH diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 7ef90c4..9749f07 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -27,11 +27,6 @@ main QTest stream 0 QPDF lin write nshared_total > nshared_first_page 1 QPDFWriter encrypted hint stream 0 QPDF xref gen > 0 1 -QPDFParser bad brace 0 -QPDFParser bad brace in parseRemainder 0 -QPDFParser bad array close 0 -QPDFParser bad array close in parseRemainder 0 -QPDFParser bad dictionary close 0 QPDFTokenizer bad ) 0 QPDFTokenizer bad > 0 QPDFTokenizer bad hexstring character 0 @@ -123,7 +118,6 @@ QPDF_Stream provider length not provided 0 QPDF_Stream unknown stream length 0 QPDF replaceReserved 0 QPDFWriter copy use_aes 1 -QPDFParser indirect without context 0 QPDFObjectHandle trailing data in parse 0 QPDFTokenizer EOF reading token 0 QPDFTokenizer EOF reading appendable token 0 @@ -145,11 +139,7 @@ QPDFJob pages range omitted in middle 0 QPDFWriter standard deterministic ID 1 QPDFWriter linearized deterministic ID 1 qpdf-c called qpdf_set_deterministic_ID 0 -QPDFParser invalid objgen 0 -QPDFParser treat word as string 0 -QPDFParser treat word as string in parseRemainder 0 QPDFParser found fake 1 -QPDFParser no val for last key 0 QPDFObjectHandle errors in parsecontent 0 QPDFJob split-pages %d 0 QPDFJob split-pages .pdf 0 @@ -168,10 +158,6 @@ Pl_QPDFTokenizer found ID 0 QPDFObjectHandle coalesce called on stream 0 QPDFObjectHandle coalesce provide stream data 0 QPDF_Stream bad token at end during normalize 0 -QPDFParser bad token in parse 0 -QPDFParser bad token in parseRemainder 0 -QPDFParser eof in parse 0 -QPDFParser eof in parseRemainder 0 QPDFObjectHandle boolean returning false 0 QPDFObjectHandle real returning 0.0 0 QPDFObjectHandle operator returning fake value 0 @@ -189,7 +175,6 @@ QPDFObjectHandle dictionary ignoring replaceKey 0 QPDFObjectHandle numeric non-numeric 0 QPDFObjectHandle erase array bounds 0 qpdf-c called qpdf_check_pdf 0 -QPDFParser too deep 0 QPDFFormFieldObjectHelper TU present 0 QPDFFormFieldObjectHelper TM present 0 QPDFFormFieldObjectHelper TU absent 0 @@ -252,7 +237,6 @@ QPDFJob image optimize bits per component 0 QPDF eof skipping spaces before xref 1 QPDF_encryption user matches owner V < 5 0 QPDF_encryption same password 1 -QPDFParser duplicate dict key 0 QPDFWriter no encryption sig contents 0 QPDFPageObjectHelper colorspace lookup 0 QPDFPageObjectHelper filter form xobject 0