Commit 7061ee1ce14c33b64ab4e12b79882aa4cb43cfcf
1 parent
1a1c640a
Refactor `QPDFParser` internal `parse` methods to return uninitialized object ha…
…ndles on invalid input..
Showing
1 changed file
with
67 additions
and
50 deletions
libqpdf/QPDFParser.cc
| @@ -52,15 +52,18 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | @@ -52,15 +52,18 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | ||
| 52 | { | 52 | { |
| 53 | qpdf::Tokenizer tokenizer; | 53 | qpdf::Tokenizer tokenizer; |
| 54 | bool empty = false; | 54 | bool empty = false; |
| 55 | - return QPDFParser( | ||
| 56 | - input, | ||
| 57 | - make_description(input.getName(), object_description), | ||
| 58 | - object_description, | ||
| 59 | - tokenizer, | ||
| 60 | - nullptr, | ||
| 61 | - context, | ||
| 62 | - false) | ||
| 63 | - .parse(empty, false); | 55 | + if (auto result = QPDFParser( |
| 56 | + input, | ||
| 57 | + make_description(input.getName(), object_description), | ||
| 58 | + object_description, | ||
| 59 | + tokenizer, | ||
| 60 | + nullptr, | ||
| 61 | + context, | ||
| 62 | + false) | ||
| 63 | + .parse(empty, false)) { | ||
| 64 | + return result; | ||
| 65 | + } | ||
| 66 | + return {QPDFObject::create<QPDF_Null>()}; | ||
| 64 | } | 67 | } |
| 65 | 68 | ||
| 66 | QPDFObjectHandle | 69 | QPDFObjectHandle |
| @@ -71,18 +74,22 @@ QPDFParser::parse_content( | @@ -71,18 +74,22 @@ QPDFParser::parse_content( | ||
| 71 | QPDF* context) | 74 | QPDF* context) |
| 72 | { | 75 | { |
| 73 | bool empty = false; | 76 | bool empty = false; |
| 74 | - return QPDFParser( | ||
| 75 | - input, | ||
| 76 | - std::move(sp_description), | ||
| 77 | - "content", | ||
| 78 | - tokenizer, | ||
| 79 | - nullptr, | ||
| 80 | - context, | ||
| 81 | - true, | ||
| 82 | - 0, | ||
| 83 | - 0, | ||
| 84 | - context && context->doc().reconstructed_xref()) | ||
| 85 | - .parse(empty, true); | 77 | + if (auto result = QPDFParser( |
| 78 | + input, | ||
| 79 | + std::move(sp_description), | ||
| 80 | + "content", | ||
| 81 | + tokenizer, | ||
| 82 | + nullptr, | ||
| 83 | + context, | ||
| 84 | + true, | ||
| 85 | + 0, | ||
| 86 | + 0, | ||
| 87 | + context && context->doc().reconstructed_xref()) | ||
| 88 | + .parse(empty, true)) { | ||
| 89 | + return result; | ||
| 90 | + } | ||
| 91 | + // In content stream mode, leave object uninitialized to indicate EOF | ||
| 92 | + return {empty ? nullptr : QPDFObject::create<QPDF_Null>()}; | ||
| 86 | } | 93 | } |
| 87 | 94 | ||
| 88 | QPDFObjectHandle | 95 | QPDFObjectHandle |
| @@ -94,15 +101,18 @@ QPDFParser::parse( | @@ -94,15 +101,18 @@ QPDFParser::parse( | ||
| 94 | QPDFObjectHandle::StringDecrypter* decrypter, | 101 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 95 | QPDF* context) | 102 | QPDF* context) |
| 96 | { | 103 | { |
| 97 | - return QPDFParser( | ||
| 98 | - input, | ||
| 99 | - make_description(input.getName(), object_description), | ||
| 100 | - object_description, | ||
| 101 | - *tokenizer.m, | ||
| 102 | - decrypter, | ||
| 103 | - context, | ||
| 104 | - false) | ||
| 105 | - .parse(empty, false); | 104 | + if (auto result = QPDFParser( |
| 105 | + input, | ||
| 106 | + make_description(input.getName(), object_description), | ||
| 107 | + object_description, | ||
| 108 | + *tokenizer.m, | ||
| 109 | + decrypter, | ||
| 110 | + context, | ||
| 111 | + false) | ||
| 112 | + .parse(empty, false)) { | ||
| 113 | + return result; | ||
| 114 | + } | ||
| 115 | + return {QPDFObject::create<QPDF_Null>()}; | ||
| 106 | } | 116 | } |
| 107 | 117 | ||
| 108 | std::pair<QPDFObjectHandle, bool> | 118 | std::pair<QPDFObjectHandle, bool> |
| @@ -127,7 +137,10 @@ QPDFParser::parse( | @@ -127,7 +137,10 @@ QPDFParser::parse( | ||
| 127 | 0, | 137 | 0, |
| 128 | sanity_checks) | 138 | sanity_checks) |
| 129 | .parse(empty, false); | 139 | .parse(empty, false); |
| 130 | - return {result, empty}; | 140 | + if (result) { |
| 141 | + return {result, empty}; | ||
| 142 | + } | ||
| 143 | + return {QPDFObject::create<QPDF_Null>(), empty}; | ||
| 131 | } | 144 | } |
| 132 | 145 | ||
| 133 | std::pair<QPDFObjectHandle, bool> | 146 | std::pair<QPDFObjectHandle, bool> |
| @@ -147,7 +160,11 @@ QPDFParser::parse( | @@ -147,7 +160,11 @@ QPDFParser::parse( | ||
| 147 | stream_id, | 160 | stream_id, |
| 148 | obj_id) | 161 | obj_id) |
| 149 | .parse(empty, false); | 162 | .parse(empty, false); |
| 150 | - return {result, empty}; | 163 | + |
| 164 | + if (result) { | ||
| 165 | + return {result, empty}; | ||
| 166 | + } | ||
| 167 | + return {QPDFObject::create<QPDF_Null>(), empty}; | ||
| 151 | } | 168 | } |
| 152 | 169 | ||
| 153 | QPDFObjectHandle | 170 | QPDFObjectHandle |
| @@ -156,14 +173,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -156,14 +173,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 156 | try { | 173 | try { |
| 157 | return parse_first(empty, content_stream); | 174 | return parse_first(empty, content_stream); |
| 158 | } catch (Error& e) { | 175 | } catch (Error& e) { |
| 159 | - return {QPDFObject::create<QPDF_Null>()}; | 176 | + return {}; |
| 160 | } catch (QPDFExc& e) { | 177 | } catch (QPDFExc& e) { |
| 161 | throw e; | 178 | throw e; |
| 162 | } catch (std::logic_error& e) { | 179 | } catch (std::logic_error& e) { |
| 163 | throw e; | 180 | throw e; |
| 164 | } catch (std::exception& e) { | 181 | } catch (std::exception& e) { |
| 165 | warn("treating object as null because of error during parsing : "s + e.what()); | 182 | warn("treating object as null because of error during parsing : "s + e.what()); |
| 166 | - return {QPDFObject::create<QPDF_Null>()}; | 183 | + return {}; |
| 167 | } | 184 | } |
| 168 | } | 185 | } |
| 169 | 186 | ||
| @@ -186,26 +203,27 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | @@ -186,26 +203,27 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | ||
| 186 | case QPDFTokenizer::tt_eof: | 203 | case QPDFTokenizer::tt_eof: |
| 187 | if (content_stream) { | 204 | if (content_stream) { |
| 188 | // In content stream mode, leave object uninitialized to indicate EOF | 205 | // In content stream mode, leave object uninitialized to indicate EOF |
| 206 | + empty = true; | ||
| 189 | return {}; | 207 | return {}; |
| 190 | } | 208 | } |
| 191 | warn("unexpected EOF"); | 209 | warn("unexpected EOF"); |
| 192 | - return {QPDFObject::create<QPDF_Null>()}; | 210 | + return {}; |
| 193 | 211 | ||
| 194 | case QPDFTokenizer::tt_bad: | 212 | case QPDFTokenizer::tt_bad: |
| 195 | - return {QPDFObject::create<QPDF_Null>()}; | 213 | + return {}; |
| 196 | 214 | ||
| 197 | case QPDFTokenizer::tt_brace_open: | 215 | case QPDFTokenizer::tt_brace_open: |
| 198 | case QPDFTokenizer::tt_brace_close: | 216 | case QPDFTokenizer::tt_brace_close: |
| 199 | warn("treating unexpected brace token as null"); | 217 | warn("treating unexpected brace token as null"); |
| 200 | - return {QPDFObject::create<QPDF_Null>()}; | 218 | + return {}; |
| 201 | 219 | ||
| 202 | case QPDFTokenizer::tt_array_close: | 220 | case QPDFTokenizer::tt_array_close: |
| 203 | warn("treating unexpected array close token as null"); | 221 | warn("treating unexpected array close token as null"); |
| 204 | - return {QPDFObject::create<QPDF_Null>()}; | 222 | + return {}; |
| 205 | 223 | ||
| 206 | case QPDFTokenizer::tt_dict_close: | 224 | case QPDFTokenizer::tt_dict_close: |
| 207 | warn("unexpected dictionary close token"); | 225 | warn("unexpected dictionary close token"); |
| 208 | - return {QPDFObject::create<QPDF_Null>()}; | 226 | + return {}; |
| 209 | 227 | ||
| 210 | case QPDFTokenizer::tt_array_open: | 228 | case QPDFTokenizer::tt_array_open: |
| 211 | case QPDFTokenizer::tt_dict_open: | 229 | case QPDFTokenizer::tt_dict_open: |
| @@ -241,7 +259,7 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | @@ -241,7 +259,7 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | ||
| 241 | // not move the input source's offset. | 259 | // not move the input source's offset. |
| 242 | input.seek(input.getLastOffset(), SEEK_SET); | 260 | input.seek(input.getLastOffset(), SEEK_SET); |
| 243 | empty = true; | 261 | empty = true; |
| 244 | - return {QPDFObject::create<QPDF_Null>()}; | 262 | + return {}; |
| 245 | } else { | 263 | } else { |
| 246 | warn("unknown token while reading object; treating as string"); | 264 | warn("unknown token while reading object; treating as string"); |
| 247 | return withDescription<QPDF_String>(value); | 265 | return withDescription<QPDF_String>(value); |
| @@ -259,7 +277,7 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | @@ -259,7 +277,7 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | ||
| 259 | 277 | ||
| 260 | default: | 278 | default: |
| 261 | warn("treating unknown token type as null while reading object"); | 279 | warn("treating unknown token type as null while reading object"); |
| 262 | - return {QPDFObject::create<QPDF_Null>()}; | 280 | + return {}; |
| 263 | } | 281 | } |
| 264 | } | 282 | } |
| 265 | 283 | ||
| @@ -297,8 +315,8 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -297,8 +315,8 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 297 | tokenizer.getValue() == "R") { | 315 | tokenizer.getValue() == "R") { |
| 298 | if (!context) { | 316 | if (!context) { |
| 299 | throw std::logic_error( | 317 | throw std::logic_error( |
| 300 | - "QPDFParser::parse called without context on an object " | ||
| 301 | - "with indirect references"); | 318 | + "QPDFParser::parse called without context on an object with indirect " |
| 319 | + "references"); | ||
| 302 | } | 320 | } |
| 303 | auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); | 321 | auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); |
| 304 | auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); | 322 | auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); |
| @@ -328,7 +346,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -328,7 +346,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 328 | return {}; | 346 | return {}; |
| 329 | } | 347 | } |
| 330 | warn("unexpected EOF"); | 348 | warn("unexpected EOF"); |
| 331 | - return {QPDFObject::create<QPDF_Null>()}; | 349 | + return {}; |
| 332 | 350 | ||
| 333 | case QPDFTokenizer::tt_bad: | 351 | case QPDFTokenizer::tt_bad: |
| 334 | check_too_many_bad_tokens(); | 352 | check_too_many_bad_tokens(); |
| @@ -361,7 +379,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -361,7 +379,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 361 | // During sanity checks, assume nesting of containers is corrupt and object is | 379 | // During sanity checks, assume nesting of containers is corrupt and object is |
| 362 | // unusable. | 380 | // unusable. |
| 363 | warn("unexpected array close token; giving up on reading object"); | 381 | warn("unexpected array close token; giving up on reading object"); |
| 364 | - return {QPDFObject::create<QPDF_Null>()}; | 382 | + return {}; |
| 365 | } | 383 | } |
| 366 | add_bad_null("treating unexpected array close token as null"); | 384 | add_bad_null("treating unexpected array close token as null"); |
| 367 | } | 385 | } |
| @@ -411,7 +429,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -411,7 +429,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 411 | // During sanity checks, assume nesting of containers is corrupt and object is | 429 | // During sanity checks, assume nesting of containers is corrupt and object is |
| 412 | // unusable. | 430 | // unusable. |
| 413 | warn("unexpected dictionary close token; giving up on reading object"); | 431 | warn("unexpected dictionary close token; giving up on reading object"); |
| 414 | - return {QPDFObject::create<QPDF_Null>()}; | 432 | + return {}; |
| 415 | } | 433 | } |
| 416 | add_bad_null("unexpected dictionary close token"); | 434 | add_bad_null("unexpected dictionary close token"); |
| 417 | } | 435 | } |
| @@ -421,7 +439,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -421,7 +439,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 421 | case QPDFTokenizer::tt_dict_open: | 439 | case QPDFTokenizer::tt_dict_open: |
| 422 | if (stack.size() > max_nesting) { | 440 | if (stack.size() > max_nesting) { |
| 423 | warn("ignoring excessively deeply nested data structure"); | 441 | warn("ignoring excessively deeply nested data structure"); |
| 424 | - return {QPDFObject::create<QPDF_Null>()}; | 442 | + return {}; |
| 425 | } else { | 443 | } else { |
| 426 | b_contents = false; | 444 | b_contents = false; |
| 427 | stack.emplace_back( | 445 | stack.emplace_back( |
| @@ -479,7 +497,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -479,7 +497,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 479 | warn( | 497 | warn( |
| 480 | "unexpected 'endobj' or 'endstream' while reading object; giving up on " | 498 | "unexpected 'endobj' or 'endstream' while reading object; giving up on " |
| 481 | "reading object"); | 499 | "reading object"); |
| 482 | - return {QPDFObject::create<QPDF_Null>()}; | 500 | + return {}; |
| 483 | } | 501 | } |
| 484 | 502 | ||
| 485 | add_bad_null("unknown token while reading object; treating as null"); | 503 | add_bad_null("unknown token while reading object; treating as null"); |
| @@ -574,8 +592,7 @@ QPDFParser::addScalar(Args&&... args) | @@ -574,8 +592,7 @@ QPDFParser::addScalar(Args&&... args) | ||
| 574 | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 575 | // encountered. | 593 | // encountered. |
| 576 | max_bad_count = 0; | 594 | max_bad_count = 0; |
| 577 | - check_too_many_bad_tokens(); | ||
| 578 | - return; // unreachable | 595 | + check_too_many_bad_tokens(); // always throws Error() |
| 579 | } | 596 | } |
| 580 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
| 581 | obj->setDescription(context, description, input.getLastOffset()); | 598 | obj->setDescription(context, description, input.getLastOffset()); |