Commit 37f7a734885f0d3c9dce64fbdb9a57192170686b
1 parent
29cd8f4f
In QPDFParser::parse refactor handling of bad tokens
Showing
2 changed files
with
42 additions
and
29 deletions
libqpdf/QPDFParser.cc
| @@ -60,13 +60,10 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -60,13 +60,10 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 60 | state_stack.push_back(st_top); | 60 | state_stack.push_back(st_top); |
| 61 | qpdf_offset_t offset; | 61 | qpdf_offset_t offset; |
| 62 | bool done = false; | 62 | bool done = false; |
| 63 | - int bad_count = 0; | ||
| 64 | - int good_count = 0; | ||
| 65 | bool b_contents = false; | 63 | bool b_contents = false; |
| 66 | bool is_null = false; | 64 | bool is_null = false; |
| 67 | 65 | ||
| 68 | while (!done) { | 66 | while (!done) { |
| 69 | - bool bad = false; | ||
| 70 | bool indirect_ref = false; | 67 | bool indirect_ref = false; |
| 71 | is_null = false; | 68 | is_null = false; |
| 72 | auto& frame = stack.back(); | 69 | auto& frame = stack.back(); |
| @@ -80,6 +77,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -80,6 +77,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 80 | if (!tokenizer.nextToken(*input, object_description)) { | 77 | if (!tokenizer.nextToken(*input, object_description)) { |
| 81 | warn(tokenizer.getErrorMessage()); | 78 | warn(tokenizer.getErrorMessage()); |
| 82 | } | 79 | } |
| 80 | + ++good_count; // optimistically | ||
| 83 | 81 | ||
| 84 | switch (tokenizer.getType()) { | 82 | switch (tokenizer.getType()) { |
| 85 | case QPDFTokenizer::tt_eof: | 83 | case QPDFTokenizer::tt_eof: |
| @@ -87,13 +85,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -87,13 +85,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 87 | QTC::TC("qpdf", "QPDFParser eof in parse"); | 85 | QTC::TC("qpdf", "QPDFParser eof in parse"); |
| 88 | warn("unexpected EOF"); | 86 | warn("unexpected EOF"); |
| 89 | } | 87 | } |
| 90 | - bad = true; | ||
| 91 | state = st_eof; | 88 | state = st_eof; |
| 92 | break; | 89 | break; |
| 93 | 90 | ||
| 94 | case QPDFTokenizer::tt_bad: | 91 | case QPDFTokenizer::tt_bad: |
| 95 | QTC::TC("qpdf", "QPDFParser bad token in parse"); | 92 | QTC::TC("qpdf", "QPDFParser bad token in parse"); |
| 96 | - bad = true; | 93 | + if (tooManyBadTokens()) { |
| 94 | + return {QPDF_Null::create()}; | ||
| 95 | + } | ||
| 97 | is_null = true; | 96 | is_null = true; |
| 98 | break; | 97 | break; |
| 99 | 98 | ||
| @@ -101,7 +100,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -101,7 +100,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 101 | case QPDFTokenizer::tt_brace_close: | 100 | case QPDFTokenizer::tt_brace_close: |
| 102 | QTC::TC("qpdf", "QPDFParser bad brace"); | 101 | QTC::TC("qpdf", "QPDFParser bad brace"); |
| 103 | warn("treating unexpected brace token as null"); | 102 | warn("treating unexpected brace token as null"); |
| 104 | - bad = true; | 103 | + if (tooManyBadTokens()) { |
| 104 | + return {QPDF_Null::create()}; | ||
| 105 | + } | ||
| 105 | is_null = true; | 106 | is_null = true; |
| 106 | break; | 107 | break; |
| 107 | 108 | ||
| @@ -111,7 +112,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -111,7 +112,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 111 | } else { | 112 | } else { |
| 112 | QTC::TC("qpdf", "QPDFParser bad array close"); | 113 | QTC::TC("qpdf", "QPDFParser bad array close"); |
| 113 | warn("treating unexpected array close token as null"); | 114 | warn("treating unexpected array close token as null"); |
| 114 | - bad = true; | 115 | + if (tooManyBadTokens()) { |
| 116 | + return {QPDF_Null::create()}; | ||
| 117 | + } | ||
| 115 | is_null = true; | 118 | is_null = true; |
| 116 | } | 119 | } |
| 117 | break; | 120 | break; |
| @@ -122,7 +125,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -122,7 +125,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 122 | } else { | 125 | } else { |
| 123 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); | 126 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
| 124 | warn("unexpected dictionary close token"); | 127 | warn("unexpected dictionary close token"); |
| 125 | - bad = true; | 128 | + if (tooManyBadTokens()) { |
| 129 | + return {QPDF_Null::create()}; | ||
| 130 | + } | ||
| 126 | is_null = true; | 131 | is_null = true; |
| 127 | } | 132 | } |
| 128 | break; | 133 | break; |
| @@ -132,7 +137,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -132,7 +137,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 132 | if (stack.size() > 500) { | 137 | if (stack.size() > 500) { |
| 133 | QTC::TC("qpdf", "QPDFParser too deep"); | 138 | QTC::TC("qpdf", "QPDFParser too deep"); |
| 134 | warn("ignoring excessively deeply nested data structure"); | 139 | warn("ignoring excessively deeply nested data structure"); |
| 135 | - bad = true; | 140 | + if (tooManyBadTokens()) { |
| 141 | + return {QPDF_Null::create()}; | ||
| 142 | + } | ||
| 136 | is_null = true; | 143 | is_null = true; |
| 137 | state = st_top; | 144 | state = st_top; |
| 138 | } else { | 145 | } else { |
| @@ -217,7 +224,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -217,7 +224,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 217 | } else { | 224 | } else { |
| 218 | QTC::TC("qpdf", "QPDFParser treat word as string"); | 225 | QTC::TC("qpdf", "QPDFParser treat word as string"); |
| 219 | warn("unknown token while reading object; treating as string"); | 226 | warn("unknown token while reading object; treating as string"); |
| 220 | - bad = true; | 227 | + if (tooManyBadTokens()) { |
| 228 | + return {QPDF_Null::create()}; | ||
| 229 | + } | ||
| 221 | object = QPDF_String::create(value); | 230 | object = QPDF_String::create(value); |
| 222 | } | 231 | } |
| 223 | } | 232 | } |
| @@ -239,12 +248,13 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -239,12 +248,13 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 239 | object = QPDF_String::create(val); | 248 | object = QPDF_String::create(val); |
| 240 | } | 249 | } |
| 241 | } | 250 | } |
| 242 | - | ||
| 243 | break; | 251 | break; |
| 244 | 252 | ||
| 245 | default: | 253 | default: |
| 246 | warn("treating unknown token type as null while reading object"); | 254 | warn("treating unknown token type as null while reading object"); |
| 247 | - bad = true; | 255 | + if (tooManyBadTokens()) { |
| 256 | + return {QPDF_Null::create()}; | ||
| 257 | + } | ||
| 248 | is_null = true; | 258 | is_null = true; |
| 249 | break; | 259 | break; |
| 250 | } | 260 | } |
| @@ -255,23 +265,6 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -255,23 +265,6 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 255 | is_null = true; | 265 | is_null = true; |
| 256 | } | 266 | } |
| 257 | 267 | ||
| 258 | - if (bad) { | ||
| 259 | - ++bad_count; | ||
| 260 | - good_count = 0; | ||
| 261 | - } else { | ||
| 262 | - ++good_count; | ||
| 263 | - if (good_count > 3) { | ||
| 264 | - bad_count = 0; | ||
| 265 | - } | ||
| 266 | - } | ||
| 267 | - if (bad_count > 5) { | ||
| 268 | - // We had too many consecutive errors without enough intervening successful objects. | ||
| 269 | - // Give up. | ||
| 270 | - warn("too many errors; giving up on reading object"); | ||
| 271 | - state = st_top; | ||
| 272 | - is_null = true; | ||
| 273 | - } | ||
| 274 | - | ||
| 275 | switch (state) { | 268 | switch (state) { |
| 276 | case st_eof: | 269 | case st_eof: |
| 277 | if (state_stack.size() > 1) { | 270 | if (state_stack.size() > 1) { |
| @@ -412,6 +405,21 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse | @@ -412,6 +405,21 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse | ||
| 412 | } | 405 | } |
| 413 | } | 406 | } |
| 414 | 407 | ||
| 408 | +bool | ||
| 409 | +QPDFParser::tooManyBadTokens() | ||
| 410 | +{ | ||
| 411 | + if (good_count <= 4) { | ||
| 412 | + if (++bad_count > 5) { | ||
| 413 | + warn("too many errors; giving up on reading object"); | ||
| 414 | + return true; | ||
| 415 | + } | ||
| 416 | + } else { | ||
| 417 | + bad_count = 1; | ||
| 418 | + } | ||
| 419 | + good_count = 0; | ||
| 420 | + return false; | ||
| 421 | +} | ||
| 422 | + | ||
| 415 | void | 423 | void |
| 416 | QPDFParser::warn(QPDFExc const& e) const | 424 | QPDFParser::warn(QPDFExc const& e) const |
| 417 | { | 425 | { |
libqpdf/qpdf/QPDFParser.hh
| @@ -33,6 +33,7 @@ class QPDFParser | @@ -33,6 +33,7 @@ class QPDFParser | ||
| 33 | private: | 33 | private: |
| 34 | enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; | 34 | enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; |
| 35 | 35 | ||
| 36 | + bool tooManyBadTokens(); | ||
| 36 | void warn(qpdf_offset_t offset, std::string const& msg) const; | 37 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 37 | void warn(std::string const& msg) const; | 38 | void warn(std::string const& msg) const; |
| 38 | void warn(QPDFExc const&) const; | 39 | void warn(QPDFExc const&) const; |
| @@ -43,6 +44,10 @@ class QPDFParser | @@ -43,6 +44,10 @@ class QPDFParser | ||
| 43 | QPDFObjectHandle::StringDecrypter* decrypter; | 44 | QPDFObjectHandle::StringDecrypter* decrypter; |
| 44 | QPDF* context; | 45 | QPDF* context; |
| 45 | std::shared_ptr<QPDFValue::Description> description; | 46 | std::shared_ptr<QPDFValue::Description> description; |
| 47 | + // Number of recent bad tokens. | ||
| 48 | + int bad_count = 0; | ||
| 49 | + // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | ||
| 50 | + int good_count = 0; | ||
| 46 | }; | 51 | }; |
| 47 | 52 | ||
| 48 | #endif // QPDFPARSER_HH | 53 | #endif // QPDFPARSER_HH |