Commit cb0f390cc1f98a8e82b27259f8f3cd5f162992eb
1 parent
7caa9ddf
Handle parse error stream data (fixes #1123)
A parse error in stream data in which stream data contained a nested object would cause a crash because qpdf was not correctly updating its internal state. Rework the QPDF json reactor to not be sensitive to parse errors in this way.
Showing
14 changed files
with
311 additions
and
151 deletions
libqpdf/QPDF_json.cc
| ... | ... | @@ -14,7 +14,7 @@ |
| 14 | 14 | |
| 15 | 15 | // This chart shows an example of the state transitions that would occur in parsing a minimal file. |
| 16 | 16 | |
| 17 | -// | st_initial | |
| 17 | +// | | |
| 18 | 18 | // { | -> st_top |
| 19 | 19 | // "qpdf": [ | -> st_qpdf |
| 20 | 20 | // { | -> st_qpdf_meta |
| ... | ... | @@ -47,7 +47,7 @@ |
| 47 | 47 | // } | <- st_objects |
| 48 | 48 | // } | <- st_qpdf |
| 49 | 49 | // ] | <- st_top |
| 50 | -// } | <- st_initial | |
| 50 | +// } | | |
| 51 | 51 | |
| 52 | 52 | static char const* JSON_PDF = ( |
| 53 | 53 | // force line break |
| ... | ... | @@ -99,7 +99,7 @@ is_indirect_object(std::string const& v, int& obj, int& gen) |
| 99 | 99 | } |
| 100 | 100 | obj = QUtil::string_to_int(o_str.c_str()); |
| 101 | 101 | gen = QUtil::string_to_int(g_str.c_str()); |
| 102 | - return true; | |
| 102 | + return obj > 0; | |
| 103 | 103 | } |
| 104 | 104 | |
| 105 | 105 | static bool |
| ... | ... | @@ -256,7 +256,6 @@ class QPDF::JSONReactor: public JSON::Reactor |
| 256 | 256 | |
| 257 | 257 | private: |
| 258 | 258 | enum state_e { |
| 259 | - st_initial, | |
| 260 | 259 | st_top, |
| 261 | 260 | st_qpdf, |
| 262 | 261 | st_qpdf_meta, |
| ... | ... | @@ -268,28 +267,35 @@ class QPDF::JSONReactor: public JSON::Reactor |
| 268 | 267 | st_ignore, |
| 269 | 268 | }; |
| 270 | 269 | |
| 270 | + struct StackFrame | |
| 271 | + { | |
| 272 | + StackFrame(state_e state) : | |
| 273 | + state(state){}; | |
| 274 | + StackFrame(state_e state, QPDFObjectHandle&& object) : | |
| 275 | + state(state), | |
| 276 | + object(object){}; | |
| 277 | + state_e state; | |
| 278 | + QPDFObjectHandle object; | |
| 279 | + }; | |
| 280 | + | |
| 271 | 281 | void containerStart(); |
| 272 | - void nestedState(std::string const& key, JSON const& value, state_e); | |
| 282 | + bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e); | |
| 273 | 283 | void setObjectDescription(QPDFObjectHandle& oh, JSON const& value); |
| 274 | 284 | QPDFObjectHandle makeObject(JSON const& value); |
| 275 | 285 | void error(qpdf_offset_t offset, std::string const& message); |
| 276 | - void | |
| 277 | - replaceObject(QPDFObjectHandle to_replace, QPDFObjectHandle replacement, JSON const& value); | |
| 286 | + void replaceObject(QPDFObjectHandle&& replacement, JSON const& value); | |
| 278 | 287 | |
| 279 | 288 | QPDF& pdf; |
| 280 | 289 | std::shared_ptr<InputSource> is; |
| 281 | 290 | bool must_be_complete{true}; |
| 282 | 291 | std::shared_ptr<QPDFValue::Description> descr; |
| 283 | 292 | bool errors{false}; |
| 284 | - bool parse_error{false}; | |
| 285 | 293 | bool saw_qpdf{false}; |
| 286 | 294 | bool saw_qpdf_meta{false}; |
| 287 | 295 | bool saw_objects{false}; |
| 288 | 296 | bool saw_json_version{false}; |
| 289 | 297 | bool saw_pdf_version{false}; |
| 290 | 298 | bool saw_trailer{false}; |
| 291 | - state_e state{st_initial}; | |
| 292 | - state_e next_state{st_top}; | |
| 293 | 299 | std::string cur_object; |
| 294 | 300 | bool saw_value{false}; |
| 295 | 301 | bool saw_stream{false}; |
| ... | ... | @@ -297,9 +303,10 @@ class QPDF::JSONReactor: public JSON::Reactor |
| 297 | 303 | bool saw_data{false}; |
| 298 | 304 | bool saw_datafile{false}; |
| 299 | 305 | bool this_stream_needs_data{false}; |
| 300 | - std::vector<state_e> state_stack{st_initial}; | |
| 301 | - std::vector<QPDFObjectHandle> object_stack; | |
| 302 | 306 | std::set<QPDFObjGen> reserved; |
| 307 | + std::vector<StackFrame> stack; | |
| 308 | + QPDFObjectHandle next_obj; | |
| 309 | + state_e next_state{st_top}; | |
| 303 | 310 | }; |
| 304 | 311 | |
| 305 | 312 | void |
| ... | ... | @@ -322,8 +329,12 @@ QPDF::JSONReactor::anyErrors() const |
| 322 | 329 | void |
| 323 | 330 | QPDF::JSONReactor::containerStart() |
| 324 | 331 | { |
| 325 | - state_stack.push_back(state); | |
| 326 | - state = next_state; | |
| 332 | + if (next_obj.isInitialized()) { | |
| 333 | + stack.emplace_back(next_state, std::move(next_obj)); | |
| 334 | + next_obj = QPDFObjectHandle(); | |
| 335 | + } else { | |
| 336 | + stack.emplace_back(next_state); | |
| 337 | + } | |
| 327 | 338 | } |
| 328 | 339 | |
| 329 | 340 | void |
| ... | ... | @@ -335,20 +346,19 @@ QPDF::JSONReactor::dictionaryStart() |
| 335 | 346 | void |
| 336 | 347 | QPDF::JSONReactor::arrayStart() |
| 337 | 348 | { |
| 338 | - containerStart(); | |
| 339 | - if (state == st_top) { | |
| 349 | + if (stack.empty()) { | |
| 340 | 350 | QTC::TC("qpdf", "QPDF_json top-level array"); |
| 341 | 351 | throw std::runtime_error("QPDF JSON must be a dictionary"); |
| 342 | 352 | } |
| 353 | + containerStart(); | |
| 343 | 354 | } |
| 344 | 355 | |
| 345 | 356 | void |
| 346 | 357 | QPDF::JSONReactor::containerEnd(JSON const& value) |
| 347 | 358 | { |
| 348 | - auto from_state = state; | |
| 349 | - state = state_stack.back(); | |
| 350 | - state_stack.pop_back(); | |
| 351 | - if (state == st_initial) { | |
| 359 | + auto from_state = stack.back().state; | |
| 360 | + stack.pop_back(); | |
| 361 | + if (stack.empty()) { | |
| 352 | 362 | if (!this->saw_qpdf) { |
| 353 | 363 | QTC::TC("qpdf", "QPDF_json missing qpdf"); |
| 354 | 364 | error(0, "\"qpdf\" object was not seen"); |
| ... | ... | @@ -371,26 +381,16 @@ QPDF::JSONReactor::containerEnd(JSON const& value) |
| 371 | 381 | } |
| 372 | 382 | } |
| 373 | 383 | } |
| 374 | - } else if (state == st_objects) { | |
| 375 | - if (parse_error) { | |
| 376 | - QTC::TC("qpdf", "QPDF_json don't check object after parse error"); | |
| 377 | - } else if (cur_object == "trailer") { | |
| 378 | - if (!saw_value) { | |
| 379 | - QTC::TC("qpdf", "QPDF_json trailer no value"); | |
| 380 | - error(value.getStart(), "\"trailer\" is missing \"value\""); | |
| 381 | - } | |
| 382 | - } else if (saw_value == saw_stream) { | |
| 384 | + } else if (from_state == st_trailer) { | |
| 385 | + if (!saw_value) { | |
| 386 | + QTC::TC("qpdf", "QPDF_json trailer no value"); | |
| 387 | + error(value.getStart(), "\"trailer\" is missing \"value\""); | |
| 388 | + } | |
| 389 | + } else if (from_state == st_object_top) { | |
| 390 | + if (saw_value == saw_stream) { | |
| 383 | 391 | QTC::TC("qpdf", "QPDF_json value stream both or neither"); |
| 384 | 392 | error(value.getStart(), "object must have exactly one of \"value\" or \"stream\""); |
| 385 | 393 | } |
| 386 | - object_stack.clear(); | |
| 387 | - this->cur_object = ""; | |
| 388 | - this->saw_dict = false; | |
| 389 | - this->saw_data = false; | |
| 390 | - this->saw_datafile = false; | |
| 391 | - this->saw_value = false; | |
| 392 | - this->saw_stream = false; | |
| 393 | - } else if (state == st_object_top) { | |
| 394 | 394 | if (saw_stream) { |
| 395 | 395 | if (!saw_dict) { |
| 396 | 396 | QTC::TC("qpdf", "QPDF_json stream no dict"); |
| ... | ... | @@ -414,11 +414,7 @@ QPDF::JSONReactor::containerEnd(JSON const& value) |
| 414 | 414 | } |
| 415 | 415 | } |
| 416 | 416 | } |
| 417 | - } else if ((state == st_stream) || (state == st_object)) { | |
| 418 | - if (!parse_error) { | |
| 419 | - object_stack.pop_back(); | |
| 420 | - } | |
| 421 | - } else if ((state == st_top) && (from_state == st_qpdf)) { | |
| 417 | + } else if (from_state == st_qpdf) { | |
| 422 | 418 | // Handle dangling indirect object references which the PDF spec says to treat as nulls. |
| 423 | 419 | // It's tempting to make this an error, but that would be wrong since valid input files may |
| 424 | 420 | // have these. |
| ... | ... | @@ -429,16 +425,27 @@ QPDF::JSONReactor::containerEnd(JSON const& value) |
| 429 | 425 | } |
| 430 | 426 | } |
| 431 | 427 | } |
| 428 | + if (!stack.empty()) { | |
| 429 | + auto state = stack.back().state; | |
| 430 | + if (state == st_objects) { | |
| 431 | + this->cur_object = ""; | |
| 432 | + this->saw_dict = false; | |
| 433 | + this->saw_data = false; | |
| 434 | + this->saw_datafile = false; | |
| 435 | + this->saw_value = false; | |
| 436 | + this->saw_stream = false; | |
| 437 | + } | |
| 438 | + } | |
| 432 | 439 | } |
| 433 | 440 | |
| 434 | 441 | void |
| 435 | -QPDF::JSONReactor::replaceObject( | |
| 436 | - QPDFObjectHandle to_replace, QPDFObjectHandle replacement, JSON const& value) | |
| 442 | +QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value) | |
| 437 | 443 | { |
| 438 | - auto og = to_replace.getObjGen(); | |
| 444 | + auto& tos = stack.back(); | |
| 445 | + auto og = tos.object.getObjGen(); | |
| 439 | 446 | this->pdf.replaceObject(og, replacement); |
| 440 | - auto oh = pdf.getObject(og); | |
| 441 | - setObjectDescription(oh, value); | |
| 447 | + next_obj = pdf.getObject(og); | |
| 448 | + setObjectDescription(tos.object, value); | |
| 442 | 449 | } |
| 443 | 450 | |
| 444 | 451 | void |
| ... | ... | @@ -448,22 +455,26 @@ QPDF::JSONReactor::topLevelScalar() |
| 448 | 455 | throw std::runtime_error("QPDF JSON must be a dictionary"); |
| 449 | 456 | } |
| 450 | 457 | |
| 451 | -void | |
| 452 | -QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) | |
| 458 | +bool | |
| 459 | +QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next) | |
| 453 | 460 | { |
| 454 | 461 | // Use this method when the next state is for processing a nested dictionary. |
| 455 | 462 | if (value.isDictionary()) { |
| 456 | 463 | this->next_state = next; |
| 457 | - } else { | |
| 458 | - error(value.getStart(), "\"" + key + "\" must be a dictionary"); | |
| 459 | - this->next_state = st_ignore; | |
| 460 | - this->parse_error = true; | |
| 464 | + return true; | |
| 461 | 465 | } |
| 466 | + error(value.getStart(), "\"" + key + "\" must be a dictionary"); | |
| 467 | + return false; | |
| 462 | 468 | } |
| 463 | 469 | |
| 464 | 470 | bool |
| 465 | 471 | QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 466 | 472 | { |
| 473 | + if (stack.empty()) { | |
| 474 | + throw std::logic_error("stack is empty in dictionaryItem"); | |
| 475 | + } | |
| 476 | + next_state = st_ignore; | |
| 477 | + auto state = stack.back().state; | |
| 467 | 478 | if (state == st_ignore) { |
| 468 | 479 | QTC::TC("qpdf", "QPDF_json ignoring in st_ignore"); |
| 469 | 480 | // ignore |
| ... | ... | @@ -473,51 +484,48 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 473 | 484 | if (!value.isArray()) { |
| 474 | 485 | QTC::TC("qpdf", "QPDF_json qpdf not array"); |
| 475 | 486 | error(value.getStart(), "\"qpdf\" must be an array"); |
| 476 | - next_state = st_ignore; | |
| 477 | - parse_error = true; | |
| 478 | 487 | } else { |
| 479 | 488 | next_state = st_qpdf; |
| 480 | 489 | } |
| 481 | 490 | } else { |
| 482 | 491 | // Ignore all other fields. |
| 483 | 492 | QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key"); |
| 484 | - next_state = st_ignore; | |
| 485 | 493 | } |
| 486 | 494 | } else if (state == st_qpdf_meta) { |
| 487 | 495 | if (key == "pdfversion") { |
| 488 | 496 | this->saw_pdf_version = true; |
| 489 | - bool version_okay = false; | |
| 490 | 497 | std::string v; |
| 498 | + bool okay = false; | |
| 491 | 499 | if (value.getString(v)) { |
| 492 | 500 | std::string version; |
| 493 | 501 | char const* p = v.c_str(); |
| 494 | 502 | if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) { |
| 495 | - version_okay = true; | |
| 496 | 503 | this->pdf.m->pdf_version = version; |
| 504 | + okay = true; | |
| 497 | 505 | } |
| 498 | 506 | } |
| 499 | - if (!version_okay) { | |
| 507 | + if (!okay) { | |
| 500 | 508 | QTC::TC("qpdf", "QPDF_json bad pdf version"); |
| 501 | - error(value.getStart(), "invalid PDF version (must be x.y)"); | |
| 509 | + error(value.getStart(), "invalid PDF version (must be \"x.y\")"); | |
| 502 | 510 | } |
| 503 | 511 | } else if (key == "jsonversion") { |
| 504 | 512 | this->saw_json_version = true; |
| 505 | - bool version_okay = false; | |
| 506 | 513 | std::string v; |
| 514 | + bool okay = false; | |
| 507 | 515 | if (value.getNumber(v)) { |
| 508 | 516 | std::string version; |
| 509 | 517 | if (QUtil::string_to_int(v.c_str()) == 2) { |
| 510 | - version_okay = true; | |
| 518 | + okay = true; | |
| 511 | 519 | } |
| 512 | 520 | } |
| 513 | - if (!version_okay) { | |
| 521 | + if (!okay) { | |
| 514 | 522 | QTC::TC("qpdf", "QPDF_json bad json version"); |
| 515 | - error(value.getStart(), "invalid JSON version (must be 2)"); | |
| 523 | + error(value.getStart(), "invalid JSON version (must be numeric value 2)"); | |
| 516 | 524 | } |
| 517 | 525 | } else if (key == "pushedinheritedpageresources") { |
| 518 | 526 | bool v; |
| 519 | 527 | if (value.getBool(v)) { |
| 520 | - if ((!this->must_be_complete) && v) { | |
| 528 | + if (!this->must_be_complete && v) { | |
| 521 | 529 | this->pdf.pushInheritedAttributesToPage(); |
| 522 | 530 | } |
| 523 | 531 | } else { |
| ... | ... | @@ -527,7 +535,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 527 | 535 | } else if (key == "calledgetallpages") { |
| 528 | 536 | bool v; |
| 529 | 537 | if (value.getBool(v)) { |
| 530 | - if ((!this->must_be_complete) && v) { | |
| 538 | + if (!this->must_be_complete && v) { | |
| 531 | 539 | this->pdf.getAllPages(); |
| 532 | 540 | } |
| 533 | 541 | } else { |
| ... | ... | @@ -538,103 +546,95 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 538 | 546 | // ignore unknown keys for forward compatibility and to skip keys we don't care about |
| 539 | 547 | // like "maxobjectid". |
| 540 | 548 | QTC::TC("qpdf", "QPDF_json ignore second-level key"); |
| 541 | - next_state = st_ignore; | |
| 542 | 549 | } |
| 543 | 550 | } else if (state == st_objects) { |
| 544 | 551 | int obj = 0; |
| 545 | 552 | int gen = 0; |
| 546 | 553 | if (key == "trailer") { |
| 547 | 554 | this->saw_trailer = true; |
| 548 | - nestedState(key, value, st_trailer); | |
| 549 | 555 | this->cur_object = "trailer"; |
| 556 | + setNextStateIfDictionary(key, value, st_trailer); | |
| 550 | 557 | } else if (is_obj_key(key, obj, gen)) { |
| 551 | 558 | this->cur_object = key; |
| 552 | - auto oh = pdf.reserveObjectIfNotExists(QPDFObjGen(obj, gen)); | |
| 553 | - object_stack.push_back(oh); | |
| 554 | - nestedState(key, value, st_object_top); | |
| 559 | + if (setNextStateIfDictionary(key, value, st_object_top)) { | |
| 560 | + next_obj = pdf.reserveObjectIfNotExists(QPDFObjGen(obj, gen)); | |
| 561 | + } | |
| 555 | 562 | } else { |
| 556 | 563 | QTC::TC("qpdf", "QPDF_json bad object key"); |
| 557 | 564 | error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\""); |
| 558 | - next_state = st_ignore; | |
| 559 | - parse_error = true; | |
| 560 | 565 | } |
| 561 | 566 | } else if (state == st_object_top) { |
| 562 | - if (object_stack.size() == 0) { | |
| 563 | - throw std::logic_error("no object on stack in st_object_top"); | |
| 567 | + if (stack.empty()) { | |
| 568 | + throw std::logic_error("stack empty in st_object_top"); | |
| 569 | + } | |
| 570 | + auto& tos = stack.back(); | |
| 571 | + if (!tos.object.isInitialized()) { | |
| 572 | + throw std::logic_error("current object uninitialized in st_object_top"); | |
| 564 | 573 | } |
| 565 | - auto tos = object_stack.back(); | |
| 566 | - QPDFObjectHandle replacement; | |
| 567 | 574 | if (key == "value") { |
| 568 | - // Don't use nestedState since this can have any type. | |
| 575 | + // Don't use setNextStateIfDictionary since this can have any type. | |
| 569 | 576 | this->saw_value = true; |
| 577 | + replaceObject(makeObject(value), value); | |
| 570 | 578 | next_state = st_object; |
| 571 | - replacement = makeObject(value); | |
| 572 | - replaceObject(tos, replacement, value); | |
| 573 | 579 | } else if (key == "stream") { |
| 574 | 580 | this->saw_stream = true; |
| 575 | - nestedState(key, value, st_stream); | |
| 576 | - this->this_stream_needs_data = false; | |
| 577 | - if (tos.isStream()) { | |
| 578 | - QTC::TC("qpdf", "QPDF_json updating existing stream"); | |
| 581 | + if (setNextStateIfDictionary(key, value, st_stream)) { | |
| 582 | + this->this_stream_needs_data = false; | |
| 583 | + if (tos.object.isStream()) { | |
| 584 | + QTC::TC("qpdf", "QPDF_json updating existing stream"); | |
| 585 | + } else { | |
| 586 | + this->this_stream_needs_data = true; | |
| 587 | + replaceObject(pdf.reserveStream(tos.object.getObjGen()), value); | |
| 588 | + } | |
| 589 | + next_obj = tos.object; | |
| 579 | 590 | } else { |
| 580 | - this->this_stream_needs_data = true; | |
| 581 | - replacement = pdf.reserveStream(tos.getObjGen()); | |
| 582 | - replaceObject(tos, replacement, value); | |
| 591 | + // Error message already given above | |
| 592 | + QTC::TC("qpdf", "QPDF_json stream not a dictionary"); | |
| 583 | 593 | } |
| 584 | 594 | } else { |
| 585 | 595 | // Ignore unknown keys for forward compatibility |
| 586 | 596 | QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top"); |
| 587 | - next_state = st_ignore; | |
| 588 | - } | |
| 589 | - if (replacement.isInitialized()) { | |
| 590 | - object_stack.pop_back(); | |
| 591 | - object_stack.push_back(replacement); | |
| 592 | 597 | } |
| 593 | 598 | } else if (state == st_trailer) { |
| 594 | 599 | if (key == "value") { |
| 595 | 600 | this->saw_value = true; |
| 596 | - // The trailer must be a dictionary, so we can use nestedState. | |
| 597 | - nestedState("trailer.value", value, st_object); | |
| 598 | - this->pdf.m->trailer = makeObject(value); | |
| 599 | - setObjectDescription(this->pdf.m->trailer, value); | |
| 601 | + // The trailer must be a dictionary, so we can use setNextStateIfDictionary. | |
| 602 | + if (setNextStateIfDictionary("trailer.value", value, st_object)) { | |
| 603 | + this->pdf.m->trailer = makeObject(value); | |
| 604 | + setObjectDescription(this->pdf.m->trailer, value); | |
| 605 | + } | |
| 600 | 606 | } else if (key == "stream") { |
| 601 | 607 | // Don't need to set saw_stream here since there's already an error. |
| 602 | 608 | QTC::TC("qpdf", "QPDF_json trailer stream"); |
| 603 | 609 | error(value.getStart(), "the trailer may not be a stream"); |
| 604 | - next_state = st_ignore; | |
| 605 | - parse_error = true; | |
| 606 | 610 | } else { |
| 607 | 611 | // Ignore unknown keys for forward compatibility |
| 608 | 612 | QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer"); |
| 609 | - next_state = st_ignore; | |
| 610 | 613 | } |
| 611 | 614 | } else if (state == st_stream) { |
| 612 | - if (object_stack.size() == 0) { | |
| 613 | - throw std::logic_error("no object on stack in st_stream"); | |
| 615 | + if (stack.empty()) { | |
| 616 | + throw std::logic_error("stack empty in st_stream"); | |
| 614 | 617 | } |
| 615 | - auto tos = object_stack.back(); | |
| 616 | - if (!tos.isStream()) { | |
| 617 | - throw std::logic_error("top of stack is not stream in st_stream"); | |
| 618 | + auto& tos = stack.back(); | |
| 619 | + if (!tos.object.isStream()) { | |
| 620 | + throw std::logic_error("current object is not stream in st_stream"); | |
| 618 | 621 | } |
| 619 | 622 | auto uninitialized = QPDFObjectHandle(); |
| 620 | 623 | if (key == "dict") { |
| 621 | 624 | this->saw_dict = true; |
| 622 | - // Since a stream dictionary must be a dictionary, we can use nestedState to transition | |
| 623 | - // to st_value. | |
| 624 | - nestedState("stream.dict", value, st_object); | |
| 625 | - auto dict = makeObject(value); | |
| 626 | - if (dict.isDictionary()) { | |
| 627 | - tos.replaceDict(dict); | |
| 625 | + if (setNextStateIfDictionary("stream.dict", value, st_object)) { | |
| 626 | + tos.object.replaceDict(makeObject(value)); | |
| 628 | 627 | } else { |
| 629 | - // An error had already been given by nestedState | |
| 628 | + // An error had already been given by setNextStateIfDictionary | |
| 630 | 629 | QTC::TC("qpdf", "QPDF_json stream dict not dict"); |
| 631 | - parse_error = true; | |
| 632 | 630 | } |
| 633 | 631 | } else if (key == "data") { |
| 634 | 632 | this->saw_data = true; |
| 635 | 633 | std::string v; |
| 636 | 634 | if (!value.getString(v)) { |
| 635 | + QTC::TC("qpdf", "QPDF_json stream data not string"); | |
| 637 | 636 | error(value.getStart(), "\"stream.data\" must be a string"); |
| 637 | + tos.object.replaceStreamData("", uninitialized, uninitialized); | |
| 638 | 638 | } else { |
| 639 | 639 | // The range includes the quotes. |
| 640 | 640 | auto start = value.getStart() + 1; |
| ... | ... | @@ -642,34 +642,42 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 642 | 642 | if (end < start) { |
| 643 | 643 | throw std::logic_error("QPDF_json: JSON string length < 0"); |
| 644 | 644 | } |
| 645 | - tos.replaceStreamData(provide_data(is, start, end), uninitialized, uninitialized); | |
| 645 | + tos.object.replaceStreamData( | |
| 646 | + provide_data(is, start, end), uninitialized, uninitialized); | |
| 646 | 647 | } |
| 647 | 648 | } else if (key == "datafile") { |
| 648 | 649 | this->saw_datafile = true; |
| 649 | 650 | std::string filename; |
| 650 | - if (value.getString(filename)) { | |
| 651 | - tos.replaceStreamData(QUtil::file_provider(filename), uninitialized, uninitialized); | |
| 652 | - } else { | |
| 651 | + if (!value.getString(filename)) { | |
| 652 | + QTC::TC("qpdf", "QPDF_json stream datafile not string"); | |
| 653 | 653 | error( |
| 654 | 654 | value.getStart(), |
| 655 | - "\"stream.datafile\" must be a string containing a file " | |
| 656 | - "name"); | |
| 655 | + "\"stream.datafile\" must be a string containing a file name"); | |
| 656 | + tos.object.replaceStreamData("", uninitialized, uninitialized); | |
| 657 | + } else { | |
| 658 | + tos.object.replaceStreamData( | |
| 659 | + QUtil::file_provider(filename), uninitialized, uninitialized); | |
| 657 | 660 | } |
| 658 | 661 | } else { |
| 659 | 662 | // Ignore unknown keys for forward compatibility. |
| 660 | 663 | QTC::TC("qpdf", "QPDF_json ignore unknown key in stream"); |
| 661 | - next_state = st_ignore; | |
| 662 | 664 | } |
| 663 | 665 | } else if (state == st_object) { |
| 664 | - if (!parse_error) { | |
| 665 | - auto dict = object_stack.back(); | |
| 666 | - if (dict.isStream()) { | |
| 667 | - dict = dict.getDict(); | |
| 668 | - } | |
| 669 | - dict.replaceKey( | |
| 670 | - is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key, | |
| 671 | - makeObject(value)); | |
| 666 | + if (stack.empty()) { | |
| 667 | + throw std::logic_error("stack empty in st_object"); | |
| 668 | + } | |
| 669 | + auto& tos = stack.back(); | |
| 670 | + auto dict = tos.object; | |
| 671 | + if (dict.isStream()) { | |
| 672 | + dict = dict.getDict(); | |
| 673 | + } | |
| 674 | + if (!dict.isDictionary()) { | |
| 675 | + throw std::logic_error( | |
| 676 | + "current object is not stream or dictionary in st_object dictionary item"); | |
| 672 | 677 | } |
| 678 | + dict.replaceKey( | |
| 679 | + is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key, | |
| 680 | + makeObject(value)); | |
| 673 | 681 | } else { |
| 674 | 682 | throw std::logic_error("QPDF_json: unknown state " + std::to_string(state)); |
| 675 | 683 | } |
| ... | ... | @@ -679,25 +687,24 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 679 | 687 | bool |
| 680 | 688 | QPDF::JSONReactor::arrayItem(JSON const& value) |
| 681 | 689 | { |
| 690 | + if (stack.empty()) { | |
| 691 | + throw std::logic_error("stack is empty in arrayItem"); | |
| 692 | + } | |
| 693 | + next_state = st_ignore; | |
| 694 | + auto state = stack.back().state; | |
| 682 | 695 | if (state == st_qpdf) { |
| 683 | 696 | if (!this->saw_qpdf_meta) { |
| 684 | 697 | this->saw_qpdf_meta = true; |
| 685 | - nestedState("qpdf[0]", value, st_qpdf_meta); | |
| 698 | + setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta); | |
| 686 | 699 | } else if (!this->saw_objects) { |
| 687 | 700 | this->saw_objects = true; |
| 688 | - nestedState("qpdf[1]", value, st_objects); | |
| 701 | + setNextStateIfDictionary("qpdf[1]", value, st_objects); | |
| 689 | 702 | } else { |
| 690 | 703 | QTC::TC("qpdf", "QPDF_json more than two qpdf elements"); |
| 691 | 704 | error(value.getStart(), "\"qpdf\" must have two elements"); |
| 692 | - next_state = st_ignore; | |
| 693 | - parse_error = true; | |
| 694 | - } | |
| 695 | - } | |
| 696 | - if (state == st_object) { | |
| 697 | - if (!parse_error) { | |
| 698 | - auto tos = object_stack.back(); | |
| 699 | - tos.appendItem(makeObject(value)); | |
| 700 | 705 | } |
| 706 | + } else if (state == st_object) { | |
| 707 | + stack.back().object.appendItem(makeObject(value)); | |
| 701 | 708 | } |
| 702 | 709 | return true; |
| 703 | 710 | } |
| ... | ... | @@ -722,10 +729,12 @@ QPDF::JSONReactor::makeObject(JSON const& value) |
| 722 | 729 | bool bool_v = false; |
| 723 | 730 | if (value.isDictionary()) { |
| 724 | 731 | result = QPDFObjectHandle::newDictionary(); |
| 725 | - object_stack.push_back(result); | |
| 732 | + next_obj = result; | |
| 733 | + next_state = st_object; | |
| 726 | 734 | } else if (value.isArray()) { |
| 727 | 735 | result = QPDFObjectHandle::newArray(); |
| 728 | - object_stack.push_back(result); | |
| 736 | + next_obj = result; | |
| 737 | + next_state = st_object; | |
| 729 | 738 | } else if (value.isNull()) { |
| 730 | 739 | result = QPDFObjectHandle::newNull(); |
| 731 | 740 | } else if (value.getBool(bool_v)) { | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -668,7 +668,6 @@ QPDF_json value stream both or neither 0 |
| 668 | 668 | QPDFJob need json-stream-prefix for stdout 0 |
| 669 | 669 | QPDFJob write json to stdout 0 |
| 670 | 670 | QPDFJob write json to file 0 |
| 671 | -QPDF_json don't check object after parse error 0 | |
| 672 | 671 | QPDF_json ignoring unknown top-level key 0 |
| 673 | 672 | QPDF_json ignore second-level key 0 |
| 674 | 673 | QPDF_json ignore unknown key in object_top 0 |
| ... | ... | @@ -694,3 +693,6 @@ QPDFJob misplaced page range 0 |
| 694 | 693 | QPDFJob duplicated range 0 |
| 695 | 694 | QPDFJob json over/under no file 0 |
| 696 | 695 | QPDF_Array copy 1 |
| 696 | +QPDF_json stream data not string 0 | |
| 697 | +QPDF_json stream datafile not string 0 | |
| 698 | +QPDF_json stream not a dictionary 0 | ... | ... |
qpdf/qtest/qpdf-json.test
qpdf/qtest/qpdf/qjson-bad-data2.json
0 → 100644
| 1 | +{ | |
| 2 | + "qpdf": [ | |
| 3 | + { | |
| 4 | + "jsonversion": 2, | |
| 5 | + "pdfversion": "1.3", | |
| 6 | + "maxobjectid": 6 | |
| 7 | + }, | |
| 8 | + { | |
| 9 | + "obj:1 0 R": { | |
| 10 | + "value": { | |
| 11 | + "/Pages": "2 0 R", | |
| 12 | + "/Type": "/Catalog" | |
| 13 | + } | |
| 14 | + }, | |
| 15 | + "obj:2 0 R": { | |
| 16 | + "value": { | |
| 17 | + "/Count": 1, | |
| 18 | + "/Kids": [ | |
| 19 | + "3 0 R" | |
| 20 | + ], | |
| 21 | + "/Type": "/Pages" | |
| 22 | + } | |
| 23 | + }, | |
| 24 | + "obj:3 0 R": { | |
| 25 | + "value": { | |
| 26 | + "/Contents": ["4 0 R", "7 0 R"], | |
| 27 | + "/MediaBox": [ | |
| 28 | + 0, | |
| 29 | + 0, | |
| 30 | + 612, | |
| 31 | + 792 | |
| 32 | + ], | |
| 33 | + "/Parent": "2 0 R", | |
| 34 | + "/Resources": { | |
| 35 | + "/Font": { | |
| 36 | + "/F1": "6 0 R" | |
| 37 | + }, | |
| 38 | + "/ProcSet": "5 0 R" | |
| 39 | + }, | |
| 40 | + "/Type": "/Page" | |
| 41 | + } | |
| 42 | + }, | |
| 43 | + "obj:4 0 R": { | |
| 44 | + "stream": { | |
| 45 | + "data": [[]], | |
| 46 | + "dict": {} | |
| 47 | + } | |
| 48 | + }, | |
| 49 | + "obj:5 0 R": { | |
| 50 | + "value": [ | |
| 51 | + "/PDF", | |
| 52 | + "/Text" | |
| 53 | + ] | |
| 54 | + }, | |
| 55 | + "obj:6 0 R": { | |
| 56 | + "value": { | |
| 57 | + "/BaseFont": "/Helvetica", | |
| 58 | + "/Encoding": "/WinAnsiEncoding", | |
| 59 | + "/Subtype": "/Type1", | |
| 60 | + "/Type": "/Font" | |
| 61 | + } | |
| 62 | + }, | |
| 63 | + "trailer": { | |
| 64 | + "value": { | |
| 65 | + "/Root": "1 0 R", | |
| 66 | + "/Size": 7 | |
| 67 | + } | |
| 68 | + } | |
| 69 | + } | |
| 70 | + ] | |
| 71 | +} | ... | ... |
qpdf/qtest/qpdf/qjson-bad-data2.out
0 → 100644
qpdf/qtest/qpdf/qjson-bad-datafile2.json
0 → 100644
| 1 | +{ | |
| 2 | + "qpdf": [ | |
| 3 | + { | |
| 4 | + "jsonversion": 2, | |
| 5 | + "pdfversion": "1.3", | |
| 6 | + "maxobjectid": 6 | |
| 7 | + }, | |
| 8 | + { | |
| 9 | + "obj:1 0 R": { | |
| 10 | + "value": { | |
| 11 | + "/Pages": "2 0 R", | |
| 12 | + "/Type": "/Catalog" | |
| 13 | + } | |
| 14 | + }, | |
| 15 | + "obj:2 0 R": { | |
| 16 | + "value": { | |
| 17 | + "/Count": 1, | |
| 18 | + "/Kids": [ | |
| 19 | + "3 0 R" | |
| 20 | + ], | |
| 21 | + "/Type": "/Pages" | |
| 22 | + } | |
| 23 | + }, | |
| 24 | + "obj:3 0 R": { | |
| 25 | + "value": { | |
| 26 | + "/Contents": ["4 0 R", "7 0 R"], | |
| 27 | + "/MediaBox": [ | |
| 28 | + 0, | |
| 29 | + 0, | |
| 30 | + 612, | |
| 31 | + 792 | |
| 32 | + ], | |
| 33 | + "/Parent": "2 0 R", | |
| 34 | + "/Resources": { | |
| 35 | + "/Font": { | |
| 36 | + "/F1": "6 0 R" | |
| 37 | + }, | |
| 38 | + "/ProcSet": "5 0 R" | |
| 39 | + }, | |
| 40 | + "/Type": "/Page" | |
| 41 | + } | |
| 42 | + }, | |
| 43 | + "obj:4 0 R": { | |
| 44 | + "stream": { | |
| 45 | + "datafile": [[]], | |
| 46 | + "dict": {} | |
| 47 | + } | |
| 48 | + }, | |
| 49 | + "obj:5 0 R": { | |
| 50 | + "value": [ | |
| 51 | + "/PDF", | |
| 52 | + "/Text" | |
| 53 | + ] | |
| 54 | + }, | |
| 55 | + "obj:6 0 R": { | |
| 56 | + "value": { | |
| 57 | + "/BaseFont": "/Helvetica", | |
| 58 | + "/Encoding": "/WinAnsiEncoding", | |
| 59 | + "/Subtype": "/Type1", | |
| 60 | + "/Type": "/Font" | |
| 61 | + } | |
| 62 | + }, | |
| 63 | + "trailer": { | |
| 64 | + "value": { | |
| 65 | + "/Root": "1 0 R", | |
| 66 | + "/Size": 7 | |
| 67 | + } | |
| 68 | + } | |
| 69 | + } | |
| 70 | + ] | |
| 71 | +} | ... | ... |
qpdf/qtest/qpdf/qjson-bad-datafile2.out
0 → 100644
qpdf/qtest/qpdf/qjson-bad-pdf-version1.out
| 1 | -WARNING: qjson-bad-pdf-version1.json (offset 41): invalid JSON version (must be 2) | |
| 2 | -WARNING: qjson-bad-pdf-version1.json (offset 70): invalid PDF version (must be x.y) | |
| 1 | +WARNING: qjson-bad-pdf-version1.json (offset 41): invalid JSON version (must be numeric value 2) | |
| 2 | +WARNING: qjson-bad-pdf-version1.json (offset 70): invalid PDF version (must be "x.y") | |
| 3 | 3 | qpdf: qjson-bad-pdf-version1.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-bad-pdf-version2.out
| 1 | -WARNING: qjson-bad-pdf-version2.json (offset 41): invalid JSON version (must be 2) | |
| 2 | -WARNING: qjson-bad-pdf-version2.json (offset 66): invalid PDF version (must be x.y) | |
| 1 | +WARNING: qjson-bad-pdf-version2.json (offset 41): invalid JSON version (must be numeric value 2) | |
| 2 | +WARNING: qjson-bad-pdf-version2.json (offset 66): invalid PDF version (must be "x.y") | |
| 3 | 3 | WARNING: qjson-bad-pdf-version2.json (offset 97): calledgetallpages must be a boolean |
| 4 | 4 | WARNING: qjson-bad-pdf-version2.json (offset 138): pushedinheritedpageresources must be a boolean |
| 5 | 5 | qpdf: qjson-bad-pdf-version2.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-obj-key-errors.out
| 1 | 1 | WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 244): object must have exactly one of "value" or "stream" |
| 2 | 2 | WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 542): object must have exactly one of "value" or "stream" |
| 3 | -WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 710): "stream" is missing "dict" | |
| 4 | -WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 710): new "stream" must have exactly one of "data" or "datafile" | |
| 5 | -WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 800): new "stream" must have exactly one of "data" or "datafile" | |
| 3 | +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 690): "stream" is missing "dict" | |
| 4 | +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 690): new "stream" must have exactly one of "data" or "datafile" | |
| 5 | +WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 780): new "stream" must have exactly one of "data" or "datafile" | |
| 6 | 6 | WARNING: qjson-obj-key-errors.json (trailer, offset 1178): "trailer" is missing "value" |
| 7 | 7 | qpdf: qjson-obj-key-errors.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-stream-dict-not-dict.out
| 1 | 1 | WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 142): "stream.dict" must be a dictionary |
| 2 | -WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 142): unrecognized string value | |
| 3 | -WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 122): new "stream" must have exactly one of "data" or "datafile" | |
| 2 | +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 102): new "stream" must have exactly one of "data" or "datafile" | |
| 4 | 3 | WARNING: qjson-stream-dict-not-dict.json: "qpdf[1].trailer" was not seen |
| 5 | 4 | qpdf: qjson-stream-dict-not-dict.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-stream-not-dict.out
| 1 | 1 | WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary |
| 2 | +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 102): "stream" is missing "dict" | |
| 2 | 3 | WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen |
| 3 | 4 | qpdf: qjson-stream-not-dict.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-trailer-stream.out
qpdf/qtest/qpdf/update-from-json-errors.out
| 1 | -WARNING: good13.pdf (obj:4 0 R from qpdf-json-update-errors.json, offset 95): existing "stream" may at most one of "data" or "datafile" | |
| 1 | +WARNING: good13.pdf (obj:4 0 R from qpdf-json-update-errors.json, offset 75): existing "stream" may at most one of "data" or "datafile" | |
| 2 | 2 | WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 335): unrecognized string value |
| 3 | -WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 293): new "stream" must have exactly one of "data" or "datafile" | |
| 3 | +WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 273): new "stream" must have exactly one of "data" or "datafile" | |
| 4 | 4 | qpdf: qpdf-json-update-errors.json: errors found in JSON | ... | ... |