Commit 37f7a734885f0d3c9dce64fbdb9a57192170686b
1 parent
29cd8f4f
In QPDFParser::parse refactor handling of bad tokens
Showing
2 changed files
with
42 additions
and
29 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -60,13 +60,10 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 60 | 60 | state_stack.push_back(st_top); |
| 61 | 61 | qpdf_offset_t offset; |
| 62 | 62 | bool done = false; |
| 63 | - int bad_count = 0; | |
| 64 | - int good_count = 0; | |
| 65 | 63 | bool b_contents = false; |
| 66 | 64 | bool is_null = false; |
| 67 | 65 | |
| 68 | 66 | while (!done) { |
| 69 | - bool bad = false; | |
| 70 | 67 | bool indirect_ref = false; |
| 71 | 68 | is_null = false; |
| 72 | 69 | auto& frame = stack.back(); |
| ... | ... | @@ -80,6 +77,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 80 | 77 | if (!tokenizer.nextToken(*input, object_description)) { |
| 81 | 78 | warn(tokenizer.getErrorMessage()); |
| 82 | 79 | } |
| 80 | + ++good_count; // optimistically | |
| 83 | 81 | |
| 84 | 82 | switch (tokenizer.getType()) { |
| 85 | 83 | case QPDFTokenizer::tt_eof: |
| ... | ... | @@ -87,13 +85,14 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 87 | 85 | QTC::TC("qpdf", "QPDFParser eof in parse"); |
| 88 | 86 | warn("unexpected EOF"); |
| 89 | 87 | } |
| 90 | - bad = true; | |
| 91 | 88 | state = st_eof; |
| 92 | 89 | break; |
| 93 | 90 | |
| 94 | 91 | case QPDFTokenizer::tt_bad: |
| 95 | 92 | QTC::TC("qpdf", "QPDFParser bad token in parse"); |
| 96 | - bad = true; | |
| 93 | + if (tooManyBadTokens()) { | |
| 94 | + return {QPDF_Null::create()}; | |
| 95 | + } | |
| 97 | 96 | is_null = true; |
| 98 | 97 | break; |
| 99 | 98 | |
| ... | ... | @@ -101,7 +100,9 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 101 | 100 | case QPDFTokenizer::tt_brace_close: |
| 102 | 101 | QTC::TC("qpdf", "QPDFParser bad brace"); |
| 103 | 102 | warn("treating unexpected brace token as null"); |
| 104 | - bad = true; | |
| 103 | + if (tooManyBadTokens()) { | |
| 104 | + return {QPDF_Null::create()}; | |
| 105 | + } | |
| 105 | 106 | is_null = true; |
| 106 | 107 | break; |
| 107 | 108 | |
| ... | ... | @@ -111,7 +112,9 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 111 | 112 | } else { |
| 112 | 113 | QTC::TC("qpdf", "QPDFParser bad array close"); |
| 113 | 114 | warn("treating unexpected array close token as null"); |
| 114 | - bad = true; | |
| 115 | + if (tooManyBadTokens()) { | |
| 116 | + return {QPDF_Null::create()}; | |
| 117 | + } | |
| 115 | 118 | is_null = true; |
| 116 | 119 | } |
| 117 | 120 | break; |
| ... | ... | @@ -122,7 +125,9 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 122 | 125 | } else { |
| 123 | 126 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
| 124 | 127 | warn("unexpected dictionary close token"); |
| 125 | - bad = true; | |
| 128 | + if (tooManyBadTokens()) { | |
| 129 | + return {QPDF_Null::create()}; | |
| 130 | + } | |
| 126 | 131 | is_null = true; |
| 127 | 132 | } |
| 128 | 133 | break; |
| ... | ... | @@ -132,7 +137,9 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 132 | 137 | if (stack.size() > 500) { |
| 133 | 138 | QTC::TC("qpdf", "QPDFParser too deep"); |
| 134 | 139 | warn("ignoring excessively deeply nested data structure"); |
| 135 | - bad = true; | |
| 140 | + if (tooManyBadTokens()) { | |
| 141 | + return {QPDF_Null::create()}; | |
| 142 | + } | |
| 136 | 143 | is_null = true; |
| 137 | 144 | state = st_top; |
| 138 | 145 | } else { |
| ... | ... | @@ -217,7 +224,9 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 217 | 224 | } else { |
| 218 | 225 | QTC::TC("qpdf", "QPDFParser treat word as string"); |
| 219 | 226 | warn("unknown token while reading object; treating as string"); |
| 220 | - bad = true; | |
| 227 | + if (tooManyBadTokens()) { | |
| 228 | + return {QPDF_Null::create()}; | |
| 229 | + } | |
| 221 | 230 | object = QPDF_String::create(value); |
| 222 | 231 | } |
| 223 | 232 | } |
| ... | ... | @@ -239,12 +248,13 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 239 | 248 | object = QPDF_String::create(val); |
| 240 | 249 | } |
| 241 | 250 | } |
| 242 | - | |
| 243 | 251 | break; |
| 244 | 252 | |
| 245 | 253 | default: |
| 246 | 254 | warn("treating unknown token type as null while reading object"); |
| 247 | - bad = true; | |
| 255 | + if (tooManyBadTokens()) { | |
| 256 | + return {QPDF_Null::create()}; | |
| 257 | + } | |
| 248 | 258 | is_null = true; |
| 249 | 259 | break; |
| 250 | 260 | } |
| ... | ... | @@ -255,23 +265,6 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 255 | 265 | is_null = true; |
| 256 | 266 | } |
| 257 | 267 | |
| 258 | - if (bad) { | |
| 259 | - ++bad_count; | |
| 260 | - good_count = 0; | |
| 261 | - } else { | |
| 262 | - ++good_count; | |
| 263 | - if (good_count > 3) { | |
| 264 | - bad_count = 0; | |
| 265 | - } | |
| 266 | - } | |
| 267 | - if (bad_count > 5) { | |
| 268 | - // We had too many consecutive errors without enough intervening successful objects. | |
| 269 | - // Give up. | |
| 270 | - warn("too many errors; giving up on reading object"); | |
| 271 | - state = st_top; | |
| 272 | - is_null = true; | |
| 273 | - } | |
| 274 | - | |
| 275 | 268 | switch (state) { |
| 276 | 269 | case st_eof: |
| 277 | 270 | if (state_stack.size() > 1) { |
| ... | ... | @@ -412,6 +405,21 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse |
| 412 | 405 | } |
| 413 | 406 | } |
| 414 | 407 | |
| 408 | +bool | |
| 409 | +QPDFParser::tooManyBadTokens() | |
| 410 | +{ | |
| 411 | + if (good_count <= 4) { | |
| 412 | + if (++bad_count > 5) { | |
| 413 | + warn("too many errors; giving up on reading object"); | |
| 414 | + return true; | |
| 415 | + } | |
| 416 | + } else { | |
| 417 | + bad_count = 1; | |
| 418 | + } | |
| 419 | + good_count = 0; | |
| 420 | + return false; | |
| 421 | +} | |
| 422 | + | |
| 415 | 423 | void |
| 416 | 424 | QPDFParser::warn(QPDFExc const& e) const |
| 417 | 425 | { | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -33,6 +33,7 @@ class QPDFParser |
| 33 | 33 | private: |
| 34 | 34 | enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; |
| 35 | 35 | |
| 36 | + bool tooManyBadTokens(); | |
| 36 | 37 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 37 | 38 | void warn(std::string const& msg) const; |
| 38 | 39 | void warn(QPDFExc const&) const; |
| ... | ... | @@ -43,6 +44,10 @@ class QPDFParser |
| 43 | 44 | QPDFObjectHandle::StringDecrypter* decrypter; |
| 44 | 45 | QPDF* context; |
| 45 | 46 | std::shared_ptr<QPDFValue::Description> description; |
| 47 | + // Number of recent bad tokens. | |
| 48 | + int bad_count = 0; | |
| 49 | + // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | |
| 50 | + int good_count = 0; | |
| 46 | 51 | }; |
| 47 | 52 | |
| 48 | 53 | #endif // QPDFPARSER_HH | ... | ... |