Commit cb0f390cc1f98a8e82b27259f8f3cd5f162992eb

Authored by Jay Berkenbilt
1 parent 7caa9ddf

Handle parse error stream data (fixes #1123)

A parse error in stream data in which stream data contained a nested
object would cause a crash because qpdf was not correctly updating its
internal state. Rework the QPDF json reactor to not be sensitive to
parse errors in this way.
libqpdf/QPDF_json.cc
@@ -14,7 +14,7 @@ @@ -14,7 +14,7 @@
14 14
15 // This chart shows an example of the state transitions that would occur in parsing a minimal file. 15 // This chart shows an example of the state transitions that would occur in parsing a minimal file.
16 16
17 -// | st_initial 17 +// |
18 // { | -> st_top 18 // { | -> st_top
19 // "qpdf": [ | -> st_qpdf 19 // "qpdf": [ | -> st_qpdf
20 // { | -> st_qpdf_meta 20 // { | -> st_qpdf_meta
@@ -47,7 +47,7 @@ @@ -47,7 +47,7 @@
47 // } | <- st_objects 47 // } | <- st_objects
48 // } | <- st_qpdf 48 // } | <- st_qpdf
49 // ] | <- st_top 49 // ] | <- st_top
50 -// } | <- st_initial 50 +// } |
51 51
52 static char const* JSON_PDF = ( 52 static char const* JSON_PDF = (
53 // force line break 53 // force line break
@@ -99,7 +99,7 @@ is_indirect_object(std::string const&amp; v, int&amp; obj, int&amp; gen) @@ -99,7 +99,7 @@ is_indirect_object(std::string const&amp; v, int&amp; obj, int&amp; gen)
99 } 99 }
100 obj = QUtil::string_to_int(o_str.c_str()); 100 obj = QUtil::string_to_int(o_str.c_str());
101 gen = QUtil::string_to_int(g_str.c_str()); 101 gen = QUtil::string_to_int(g_str.c_str());
102 - return true; 102 + return obj > 0;
103 } 103 }
104 104
105 static bool 105 static bool
@@ -256,7 +256,6 @@ class QPDF::JSONReactor: public JSON::Reactor @@ -256,7 +256,6 @@ class QPDF::JSONReactor: public JSON::Reactor
256 256
257 private: 257 private:
258 enum state_e { 258 enum state_e {
259 - st_initial,  
260 st_top, 259 st_top,
261 st_qpdf, 260 st_qpdf,
262 st_qpdf_meta, 261 st_qpdf_meta,
@@ -268,28 +267,35 @@ class QPDF::JSONReactor: public JSON::Reactor @@ -268,28 +267,35 @@ class QPDF::JSONReactor: public JSON::Reactor
268 st_ignore, 267 st_ignore,
269 }; 268 };
270 269
  270 + struct StackFrame
  271 + {
  272 + StackFrame(state_e state) :
  273 + state(state){};
  274 + StackFrame(state_e state, QPDFObjectHandle&& object) :
  275 + state(state),
  276 + object(object){};
  277 + state_e state;
  278 + QPDFObjectHandle object;
  279 + };
  280 +
271 void containerStart(); 281 void containerStart();
272 - void nestedState(std::string const& key, JSON const& value, state_e); 282 + bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
273 void setObjectDescription(QPDFObjectHandle& oh, JSON const& value); 283 void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
274 QPDFObjectHandle makeObject(JSON const& value); 284 QPDFObjectHandle makeObject(JSON const& value);
275 void error(qpdf_offset_t offset, std::string const& message); 285 void error(qpdf_offset_t offset, std::string const& message);
276 - void  
277 - replaceObject(QPDFObjectHandle to_replace, QPDFObjectHandle replacement, JSON const& value); 286 + void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
278 287
279 QPDF& pdf; 288 QPDF& pdf;
280 std::shared_ptr<InputSource> is; 289 std::shared_ptr<InputSource> is;
281 bool must_be_complete{true}; 290 bool must_be_complete{true};
282 std::shared_ptr<QPDFValue::Description> descr; 291 std::shared_ptr<QPDFValue::Description> descr;
283 bool errors{false}; 292 bool errors{false};
284 - bool parse_error{false};  
285 bool saw_qpdf{false}; 293 bool saw_qpdf{false};
286 bool saw_qpdf_meta{false}; 294 bool saw_qpdf_meta{false};
287 bool saw_objects{false}; 295 bool saw_objects{false};
288 bool saw_json_version{false}; 296 bool saw_json_version{false};
289 bool saw_pdf_version{false}; 297 bool saw_pdf_version{false};
290 bool saw_trailer{false}; 298 bool saw_trailer{false};
291 - state_e state{st_initial};  
292 - state_e next_state{st_top};  
293 std::string cur_object; 299 std::string cur_object;
294 bool saw_value{false}; 300 bool saw_value{false};
295 bool saw_stream{false}; 301 bool saw_stream{false};
@@ -297,9 +303,10 @@ class QPDF::JSONReactor: public JSON::Reactor @@ -297,9 +303,10 @@ class QPDF::JSONReactor: public JSON::Reactor
297 bool saw_data{false}; 303 bool saw_data{false};
298 bool saw_datafile{false}; 304 bool saw_datafile{false};
299 bool this_stream_needs_data{false}; 305 bool this_stream_needs_data{false};
300 - std::vector<state_e> state_stack{st_initial};  
301 - std::vector<QPDFObjectHandle> object_stack;  
302 std::set<QPDFObjGen> reserved; 306 std::set<QPDFObjGen> reserved;
  307 + std::vector<StackFrame> stack;
  308 + QPDFObjectHandle next_obj;
  309 + state_e next_state{st_top};
303 }; 310 };
304 311
305 void 312 void
@@ -322,8 +329,12 @@ QPDF::JSONReactor::anyErrors() const @@ -322,8 +329,12 @@ QPDF::JSONReactor::anyErrors() const
322 void 329 void
323 QPDF::JSONReactor::containerStart() 330 QPDF::JSONReactor::containerStart()
324 { 331 {
325 - state_stack.push_back(state);  
326 - state = next_state; 332 + if (next_obj.isInitialized()) {
  333 + stack.emplace_back(next_state, std::move(next_obj));
  334 + next_obj = QPDFObjectHandle();
  335 + } else {
  336 + stack.emplace_back(next_state);
  337 + }
327 } 338 }
328 339
329 void 340 void
@@ -335,20 +346,19 @@ QPDF::JSONReactor::dictionaryStart() @@ -335,20 +346,19 @@ QPDF::JSONReactor::dictionaryStart()
335 void 346 void
336 QPDF::JSONReactor::arrayStart() 347 QPDF::JSONReactor::arrayStart()
337 { 348 {
338 - containerStart();  
339 - if (state == st_top) { 349 + if (stack.empty()) {
340 QTC::TC("qpdf", "QPDF_json top-level array"); 350 QTC::TC("qpdf", "QPDF_json top-level array");
341 throw std::runtime_error("QPDF JSON must be a dictionary"); 351 throw std::runtime_error("QPDF JSON must be a dictionary");
342 } 352 }
  353 + containerStart();
343 } 354 }
344 355
345 void 356 void
346 QPDF::JSONReactor::containerEnd(JSON const& value) 357 QPDF::JSONReactor::containerEnd(JSON const& value)
347 { 358 {
348 - auto from_state = state;  
349 - state = state_stack.back();  
350 - state_stack.pop_back();  
351 - if (state == st_initial) { 359 + auto from_state = stack.back().state;
  360 + stack.pop_back();
  361 + if (stack.empty()) {
352 if (!this->saw_qpdf) { 362 if (!this->saw_qpdf) {
353 QTC::TC("qpdf", "QPDF_json missing qpdf"); 363 QTC::TC("qpdf", "QPDF_json missing qpdf");
354 error(0, "\"qpdf\" object was not seen"); 364 error(0, "\"qpdf\" object was not seen");
@@ -371,26 +381,16 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value) @@ -371,26 +381,16 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value)
371 } 381 }
372 } 382 }
373 } 383 }
374 - } else if (state == st_objects) {  
375 - if (parse_error) {  
376 - QTC::TC("qpdf", "QPDF_json don't check object after parse error");  
377 - } else if (cur_object == "trailer") {  
378 - if (!saw_value) {  
379 - QTC::TC("qpdf", "QPDF_json trailer no value");  
380 - error(value.getStart(), "\"trailer\" is missing \"value\"");  
381 - }  
382 - } else if (saw_value == saw_stream) { 384 + } else if (from_state == st_trailer) {
  385 + if (!saw_value) {
  386 + QTC::TC("qpdf", "QPDF_json trailer no value");
  387 + error(value.getStart(), "\"trailer\" is missing \"value\"");
  388 + }
  389 + } else if (from_state == st_object_top) {
  390 + if (saw_value == saw_stream) {
383 QTC::TC("qpdf", "QPDF_json value stream both or neither"); 391 QTC::TC("qpdf", "QPDF_json value stream both or neither");
384 error(value.getStart(), "object must have exactly one of \"value\" or \"stream\""); 392 error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
385 } 393 }
386 - object_stack.clear();  
387 - this->cur_object = "";  
388 - this->saw_dict = false;  
389 - this->saw_data = false;  
390 - this->saw_datafile = false;  
391 - this->saw_value = false;  
392 - this->saw_stream = false;  
393 - } else if (state == st_object_top) {  
394 if (saw_stream) { 394 if (saw_stream) {
395 if (!saw_dict) { 395 if (!saw_dict) {
396 QTC::TC("qpdf", "QPDF_json stream no dict"); 396 QTC::TC("qpdf", "QPDF_json stream no dict");
@@ -414,11 +414,7 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value) @@ -414,11 +414,7 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value)
414 } 414 }
415 } 415 }
416 } 416 }
417 - } else if ((state == st_stream) || (state == st_object)) {  
418 - if (!parse_error) {  
419 - object_stack.pop_back();  
420 - }  
421 - } else if ((state == st_top) && (from_state == st_qpdf)) { 417 + } else if (from_state == st_qpdf) {
422 // Handle dangling indirect object references which the PDF spec says to treat as nulls. 418 // Handle dangling indirect object references which the PDF spec says to treat as nulls.
423 // It's tempting to make this an error, but that would be wrong since valid input files may 419 // It's tempting to make this an error, but that would be wrong since valid input files may
424 // have these. 420 // have these.
@@ -429,16 +425,27 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value) @@ -429,16 +425,27 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value)
429 } 425 }
430 } 426 }
431 } 427 }
  428 + if (!stack.empty()) {
  429 + auto state = stack.back().state;
  430 + if (state == st_objects) {
  431 + this->cur_object = "";
  432 + this->saw_dict = false;
  433 + this->saw_data = false;
  434 + this->saw_datafile = false;
  435 + this->saw_value = false;
  436 + this->saw_stream = false;
  437 + }
  438 + }
432 } 439 }
433 440
434 void 441 void
435 -QPDF::JSONReactor::replaceObject(  
436 - QPDFObjectHandle to_replace, QPDFObjectHandle replacement, JSON const& value) 442 +QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
437 { 443 {
438 - auto og = to_replace.getObjGen(); 444 + auto& tos = stack.back();
  445 + auto og = tos.object.getObjGen();
439 this->pdf.replaceObject(og, replacement); 446 this->pdf.replaceObject(og, replacement);
440 - auto oh = pdf.getObject(og);  
441 - setObjectDescription(oh, value); 447 + next_obj = pdf.getObject(og);
  448 + setObjectDescription(tos.object, value);
442 } 449 }
443 450
444 void 451 void
@@ -448,22 +455,26 @@ QPDF::JSONReactor::topLevelScalar() @@ -448,22 +455,26 @@ QPDF::JSONReactor::topLevelScalar()
448 throw std::runtime_error("QPDF JSON must be a dictionary"); 455 throw std::runtime_error("QPDF JSON must be a dictionary");
449 } 456 }
450 457
451 -void  
452 -QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) 458 +bool
  459 +QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
453 { 460 {
454 // Use this method when the next state is for processing a nested dictionary. 461 // Use this method when the next state is for processing a nested dictionary.
455 if (value.isDictionary()) { 462 if (value.isDictionary()) {
456 this->next_state = next; 463 this->next_state = next;
457 - } else {  
458 - error(value.getStart(), "\"" + key + "\" must be a dictionary");  
459 - this->next_state = st_ignore;  
460 - this->parse_error = true; 464 + return true;
461 } 465 }
  466 + error(value.getStart(), "\"" + key + "\" must be a dictionary");
  467 + return false;
462 } 468 }
463 469
464 bool 470 bool
465 QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) 471 QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
466 { 472 {
  473 + if (stack.empty()) {
  474 + throw std::logic_error("stack is empty in dictionaryItem");
  475 + }
  476 + next_state = st_ignore;
  477 + auto state = stack.back().state;
467 if (state == st_ignore) { 478 if (state == st_ignore) {
468 QTC::TC("qpdf", "QPDF_json ignoring in st_ignore"); 479 QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
469 // ignore 480 // ignore
@@ -473,51 +484,48 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -473,51 +484,48 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
473 if (!value.isArray()) { 484 if (!value.isArray()) {
474 QTC::TC("qpdf", "QPDF_json qpdf not array"); 485 QTC::TC("qpdf", "QPDF_json qpdf not array");
475 error(value.getStart(), "\"qpdf\" must be an array"); 486 error(value.getStart(), "\"qpdf\" must be an array");
476 - next_state = st_ignore;  
477 - parse_error = true;  
478 } else { 487 } else {
479 next_state = st_qpdf; 488 next_state = st_qpdf;
480 } 489 }
481 } else { 490 } else {
482 // Ignore all other fields. 491 // Ignore all other fields.
483 QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key"); 492 QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key");
484 - next_state = st_ignore;  
485 } 493 }
486 } else if (state == st_qpdf_meta) { 494 } else if (state == st_qpdf_meta) {
487 if (key == "pdfversion") { 495 if (key == "pdfversion") {
488 this->saw_pdf_version = true; 496 this->saw_pdf_version = true;
489 - bool version_okay = false;  
490 std::string v; 497 std::string v;
  498 + bool okay = false;
491 if (value.getString(v)) { 499 if (value.getString(v)) {
492 std::string version; 500 std::string version;
493 char const* p = v.c_str(); 501 char const* p = v.c_str();
494 if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) { 502 if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) {
495 - version_okay = true;  
496 this->pdf.m->pdf_version = version; 503 this->pdf.m->pdf_version = version;
  504 + okay = true;
497 } 505 }
498 } 506 }
499 - if (!version_okay) { 507 + if (!okay) {
500 QTC::TC("qpdf", "QPDF_json bad pdf version"); 508 QTC::TC("qpdf", "QPDF_json bad pdf version");
501 - error(value.getStart(), "invalid PDF version (must be x.y)"); 509 + error(value.getStart(), "invalid PDF version (must be \"x.y\")");
502 } 510 }
503 } else if (key == "jsonversion") { 511 } else if (key == "jsonversion") {
504 this->saw_json_version = true; 512 this->saw_json_version = true;
505 - bool version_okay = false;  
506 std::string v; 513 std::string v;
  514 + bool okay = false;
507 if (value.getNumber(v)) { 515 if (value.getNumber(v)) {
508 std::string version; 516 std::string version;
509 if (QUtil::string_to_int(v.c_str()) == 2) { 517 if (QUtil::string_to_int(v.c_str()) == 2) {
510 - version_okay = true; 518 + okay = true;
511 } 519 }
512 } 520 }
513 - if (!version_okay) { 521 + if (!okay) {
514 QTC::TC("qpdf", "QPDF_json bad json version"); 522 QTC::TC("qpdf", "QPDF_json bad json version");
515 - error(value.getStart(), "invalid JSON version (must be 2)"); 523 + error(value.getStart(), "invalid JSON version (must be numeric value 2)");
516 } 524 }
517 } else if (key == "pushedinheritedpageresources") { 525 } else if (key == "pushedinheritedpageresources") {
518 bool v; 526 bool v;
519 if (value.getBool(v)) { 527 if (value.getBool(v)) {
520 - if ((!this->must_be_complete) && v) { 528 + if (!this->must_be_complete && v) {
521 this->pdf.pushInheritedAttributesToPage(); 529 this->pdf.pushInheritedAttributesToPage();
522 } 530 }
523 } else { 531 } else {
@@ -527,7 +535,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -527,7 +535,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
527 } else if (key == "calledgetallpages") { 535 } else if (key == "calledgetallpages") {
528 bool v; 536 bool v;
529 if (value.getBool(v)) { 537 if (value.getBool(v)) {
530 - if ((!this->must_be_complete) && v) { 538 + if (!this->must_be_complete && v) {
531 this->pdf.getAllPages(); 539 this->pdf.getAllPages();
532 } 540 }
533 } else { 541 } else {
@@ -538,103 +546,95 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -538,103 +546,95 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
538 // ignore unknown keys for forward compatibility and to skip keys we don't care about 546 // ignore unknown keys for forward compatibility and to skip keys we don't care about
539 // like "maxobjectid". 547 // like "maxobjectid".
540 QTC::TC("qpdf", "QPDF_json ignore second-level key"); 548 QTC::TC("qpdf", "QPDF_json ignore second-level key");
541 - next_state = st_ignore;  
542 } 549 }
543 } else if (state == st_objects) { 550 } else if (state == st_objects) {
544 int obj = 0; 551 int obj = 0;
545 int gen = 0; 552 int gen = 0;
546 if (key == "trailer") { 553 if (key == "trailer") {
547 this->saw_trailer = true; 554 this->saw_trailer = true;
548 - nestedState(key, value, st_trailer);  
549 this->cur_object = "trailer"; 555 this->cur_object = "trailer";
  556 + setNextStateIfDictionary(key, value, st_trailer);
550 } else if (is_obj_key(key, obj, gen)) { 557 } else if (is_obj_key(key, obj, gen)) {
551 this->cur_object = key; 558 this->cur_object = key;
552 - auto oh = pdf.reserveObjectIfNotExists(QPDFObjGen(obj, gen));  
553 - object_stack.push_back(oh);  
554 - nestedState(key, value, st_object_top); 559 + if (setNextStateIfDictionary(key, value, st_object_top)) {
  560 + next_obj = pdf.reserveObjectIfNotExists(QPDFObjGen(obj, gen));
  561 + }
555 } else { 562 } else {
556 QTC::TC("qpdf", "QPDF_json bad object key"); 563 QTC::TC("qpdf", "QPDF_json bad object key");
557 error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\""); 564 error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
558 - next_state = st_ignore;  
559 - parse_error = true;  
560 } 565 }
561 } else if (state == st_object_top) { 566 } else if (state == st_object_top) {
562 - if (object_stack.size() == 0) {  
563 - throw std::logic_error("no object on stack in st_object_top"); 567 + if (stack.empty()) {
  568 + throw std::logic_error("stack empty in st_object_top");
  569 + }
  570 + auto& tos = stack.back();
  571 + if (!tos.object.isInitialized()) {
  572 + throw std::logic_error("current object uninitialized in st_object_top");
564 } 573 }
565 - auto tos = object_stack.back();  
566 - QPDFObjectHandle replacement;  
567 if (key == "value") { 574 if (key == "value") {
568 - // Don't use nestedState since this can have any type. 575 + // Don't use setNextStateIfDictionary since this can have any type.
569 this->saw_value = true; 576 this->saw_value = true;
  577 + replaceObject(makeObject(value), value);
570 next_state = st_object; 578 next_state = st_object;
571 - replacement = makeObject(value);  
572 - replaceObject(tos, replacement, value);  
573 } else if (key == "stream") { 579 } else if (key == "stream") {
574 this->saw_stream = true; 580 this->saw_stream = true;
575 - nestedState(key, value, st_stream);  
576 - this->this_stream_needs_data = false;  
577 - if (tos.isStream()) {  
578 - QTC::TC("qpdf", "QPDF_json updating existing stream"); 581 + if (setNextStateIfDictionary(key, value, st_stream)) {
  582 + this->this_stream_needs_data = false;
  583 + if (tos.object.isStream()) {
  584 + QTC::TC("qpdf", "QPDF_json updating existing stream");
  585 + } else {
  586 + this->this_stream_needs_data = true;
  587 + replaceObject(pdf.reserveStream(tos.object.getObjGen()), value);
  588 + }
  589 + next_obj = tos.object;
579 } else { 590 } else {
580 - this->this_stream_needs_data = true;  
581 - replacement = pdf.reserveStream(tos.getObjGen());  
582 - replaceObject(tos, replacement, value); 591 + // Error message already given above
  592 + QTC::TC("qpdf", "QPDF_json stream not a dictionary");
583 } 593 }
584 } else { 594 } else {
585 // Ignore unknown keys for forward compatibility 595 // Ignore unknown keys for forward compatibility
586 QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top"); 596 QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top");
587 - next_state = st_ignore;  
588 - }  
589 - if (replacement.isInitialized()) {  
590 - object_stack.pop_back();  
591 - object_stack.push_back(replacement);  
592 } 597 }
593 } else if (state == st_trailer) { 598 } else if (state == st_trailer) {
594 if (key == "value") { 599 if (key == "value") {
595 this->saw_value = true; 600 this->saw_value = true;
596 - // The trailer must be a dictionary, so we can use nestedState.  
597 - nestedState("trailer.value", value, st_object);  
598 - this->pdf.m->trailer = makeObject(value);  
599 - setObjectDescription(this->pdf.m->trailer, value); 601 + // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
  602 + if (setNextStateIfDictionary("trailer.value", value, st_object)) {
  603 + this->pdf.m->trailer = makeObject(value);
  604 + setObjectDescription(this->pdf.m->trailer, value);
  605 + }
600 } else if (key == "stream") { 606 } else if (key == "stream") {
601 // Don't need to set saw_stream here since there's already an error. 607 // Don't need to set saw_stream here since there's already an error.
602 QTC::TC("qpdf", "QPDF_json trailer stream"); 608 QTC::TC("qpdf", "QPDF_json trailer stream");
603 error(value.getStart(), "the trailer may not be a stream"); 609 error(value.getStart(), "the trailer may not be a stream");
604 - next_state = st_ignore;  
605 - parse_error = true;  
606 } else { 610 } else {
607 // Ignore unknown keys for forward compatibility 611 // Ignore unknown keys for forward compatibility
608 QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer"); 612 QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer");
609 - next_state = st_ignore;  
610 } 613 }
611 } else if (state == st_stream) { 614 } else if (state == st_stream) {
612 - if (object_stack.size() == 0) {  
613 - throw std::logic_error("no object on stack in st_stream"); 615 + if (stack.empty()) {
  616 + throw std::logic_error("stack empty in st_stream");
614 } 617 }
615 - auto tos = object_stack.back();  
616 - if (!tos.isStream()) {  
617 - throw std::logic_error("top of stack is not stream in st_stream"); 618 + auto& tos = stack.back();
  619 + if (!tos.object.isStream()) {
  620 + throw std::logic_error("current object is not stream in st_stream");
618 } 621 }
619 auto uninitialized = QPDFObjectHandle(); 622 auto uninitialized = QPDFObjectHandle();
620 if (key == "dict") { 623 if (key == "dict") {
621 this->saw_dict = true; 624 this->saw_dict = true;
622 - // Since a stream dictionary must be a dictionary, we can use nestedState to transition  
623 - // to st_value.  
624 - nestedState("stream.dict", value, st_object);  
625 - auto dict = makeObject(value);  
626 - if (dict.isDictionary()) {  
627 - tos.replaceDict(dict); 625 + if (setNextStateIfDictionary("stream.dict", value, st_object)) {
  626 + tos.object.replaceDict(makeObject(value));
628 } else { 627 } else {
629 - // An error had already been given by nestedState 628 + // An error had already been given by setNextStateIfDictionary
630 QTC::TC("qpdf", "QPDF_json stream dict not dict"); 629 QTC::TC("qpdf", "QPDF_json stream dict not dict");
631 - parse_error = true;  
632 } 630 }
633 } else if (key == "data") { 631 } else if (key == "data") {
634 this->saw_data = true; 632 this->saw_data = true;
635 std::string v; 633 std::string v;
636 if (!value.getString(v)) { 634 if (!value.getString(v)) {
  635 + QTC::TC("qpdf", "QPDF_json stream data not string");
637 error(value.getStart(), "\"stream.data\" must be a string"); 636 error(value.getStart(), "\"stream.data\" must be a string");
  637 + tos.object.replaceStreamData("", uninitialized, uninitialized);
638 } else { 638 } else {
639 // The range includes the quotes. 639 // The range includes the quotes.
640 auto start = value.getStart() + 1; 640 auto start = value.getStart() + 1;
@@ -642,34 +642,42 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -642,34 +642,42 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
642 if (end < start) { 642 if (end < start) {
643 throw std::logic_error("QPDF_json: JSON string length < 0"); 643 throw std::logic_error("QPDF_json: JSON string length < 0");
644 } 644 }
645 - tos.replaceStreamData(provide_data(is, start, end), uninitialized, uninitialized); 645 + tos.object.replaceStreamData(
  646 + provide_data(is, start, end), uninitialized, uninitialized);
646 } 647 }
647 } else if (key == "datafile") { 648 } else if (key == "datafile") {
648 this->saw_datafile = true; 649 this->saw_datafile = true;
649 std::string filename; 650 std::string filename;
650 - if (value.getString(filename)) {  
651 - tos.replaceStreamData(QUtil::file_provider(filename), uninitialized, uninitialized);  
652 - } else { 651 + if (!value.getString(filename)) {
  652 + QTC::TC("qpdf", "QPDF_json stream datafile not string");
653 error( 653 error(
654 value.getStart(), 654 value.getStart(),
655 - "\"stream.datafile\" must be a string containing a file "  
656 - "name"); 655 + "\"stream.datafile\" must be a string containing a file name");
  656 + tos.object.replaceStreamData("", uninitialized, uninitialized);
  657 + } else {
  658 + tos.object.replaceStreamData(
  659 + QUtil::file_provider(filename), uninitialized, uninitialized);
657 } 660 }
658 } else { 661 } else {
659 // Ignore unknown keys for forward compatibility. 662 // Ignore unknown keys for forward compatibility.
660 QTC::TC("qpdf", "QPDF_json ignore unknown key in stream"); 663 QTC::TC("qpdf", "QPDF_json ignore unknown key in stream");
661 - next_state = st_ignore;  
662 } 664 }
663 } else if (state == st_object) { 665 } else if (state == st_object) {
664 - if (!parse_error) {  
665 - auto dict = object_stack.back();  
666 - if (dict.isStream()) {  
667 - dict = dict.getDict();  
668 - }  
669 - dict.replaceKey(  
670 - is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,  
671 - makeObject(value)); 666 + if (stack.empty()) {
  667 + throw std::logic_error("stack empty in st_object");
  668 + }
  669 + auto& tos = stack.back();
  670 + auto dict = tos.object;
  671 + if (dict.isStream()) {
  672 + dict = dict.getDict();
  673 + }
  674 + if (!dict.isDictionary()) {
  675 + throw std::logic_error(
  676 + "current object is not stream or dictionary in st_object dictionary item");
672 } 677 }
  678 + dict.replaceKey(
  679 + is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
  680 + makeObject(value));
673 } else { 681 } else {
674 throw std::logic_error("QPDF_json: unknown state " + std::to_string(state)); 682 throw std::logic_error("QPDF_json: unknown state " + std::to_string(state));
675 } 683 }
@@ -679,25 +687,24 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -679,25 +687,24 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
679 bool 687 bool
680 QPDF::JSONReactor::arrayItem(JSON const& value) 688 QPDF::JSONReactor::arrayItem(JSON const& value)
681 { 689 {
  690 + if (stack.empty()) {
  691 + throw std::logic_error("stack is empty in arrayItem");
  692 + }
  693 + next_state = st_ignore;
  694 + auto state = stack.back().state;
682 if (state == st_qpdf) { 695 if (state == st_qpdf) {
683 if (!this->saw_qpdf_meta) { 696 if (!this->saw_qpdf_meta) {
684 this->saw_qpdf_meta = true; 697 this->saw_qpdf_meta = true;
685 - nestedState("qpdf[0]", value, st_qpdf_meta); 698 + setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
686 } else if (!this->saw_objects) { 699 } else if (!this->saw_objects) {
687 this->saw_objects = true; 700 this->saw_objects = true;
688 - nestedState("qpdf[1]", value, st_objects); 701 + setNextStateIfDictionary("qpdf[1]", value, st_objects);
689 } else { 702 } else {
690 QTC::TC("qpdf", "QPDF_json more than two qpdf elements"); 703 QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
691 error(value.getStart(), "\"qpdf\" must have two elements"); 704 error(value.getStart(), "\"qpdf\" must have two elements");
692 - next_state = st_ignore;  
693 - parse_error = true;  
694 - }  
695 - }  
696 - if (state == st_object) {  
697 - if (!parse_error) {  
698 - auto tos = object_stack.back();  
699 - tos.appendItem(makeObject(value));  
700 } 705 }
  706 + } else if (state == st_object) {
  707 + stack.back().object.appendItem(makeObject(value));
701 } 708 }
702 return true; 709 return true;
703 } 710 }
@@ -722,10 +729,12 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value) @@ -722,10 +729,12 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value)
722 bool bool_v = false; 729 bool bool_v = false;
723 if (value.isDictionary()) { 730 if (value.isDictionary()) {
724 result = QPDFObjectHandle::newDictionary(); 731 result = QPDFObjectHandle::newDictionary();
725 - object_stack.push_back(result); 732 + next_obj = result;
  733 + next_state = st_object;
726 } else if (value.isArray()) { 734 } else if (value.isArray()) {
727 result = QPDFObjectHandle::newArray(); 735 result = QPDFObjectHandle::newArray();
728 - object_stack.push_back(result); 736 + next_obj = result;
  737 + next_state = st_object;
729 } else if (value.isNull()) { 738 } else if (value.isNull()) {
730 result = QPDFObjectHandle::newNull(); 739 result = QPDFObjectHandle::newNull();
731 } else if (value.getBool(bool_v)) { 740 } else if (value.getBool(bool_v)) {
qpdf/qpdf.testcov
@@ -668,7 +668,6 @@ QPDF_json value stream both or neither 0 @@ -668,7 +668,6 @@ QPDF_json value stream both or neither 0
668 QPDFJob need json-stream-prefix for stdout 0 668 QPDFJob need json-stream-prefix for stdout 0
669 QPDFJob write json to stdout 0 669 QPDFJob write json to stdout 0
670 QPDFJob write json to file 0 670 QPDFJob write json to file 0
671 -QPDF_json don't check object after parse error 0  
672 QPDF_json ignoring unknown top-level key 0 671 QPDF_json ignoring unknown top-level key 0
673 QPDF_json ignore second-level key 0 672 QPDF_json ignore second-level key 0
674 QPDF_json ignore unknown key in object_top 0 673 QPDF_json ignore unknown key in object_top 0
@@ -694,3 +693,6 @@ QPDFJob misplaced page range 0 @@ -694,3 +693,6 @@ QPDFJob misplaced page range 0
694 QPDFJob duplicated range 0 693 QPDFJob duplicated range 0
695 QPDFJob json over/under no file 0 694 QPDFJob json over/under no file 0
696 QPDF_Array copy 1 695 QPDF_Array copy 1
  696 +QPDF_json stream data not string 0
  697 +QPDF_json stream datafile not string 0
  698 +QPDF_json stream not a dictionary 0
qpdf/qtest/qpdf-json.test
@@ -37,6 +37,8 @@ my @badfiles = ( @@ -37,6 +37,8 @@ my @badfiles = (
37 'obj-key-errors', 37 'obj-key-errors',
38 'bad-data', 38 'bad-data',
39 'bad-datafile', 39 'bad-datafile',
  40 + 'bad-data2',
  41 + 'bad-datafile2',
40 ); 42 );
41 43
42 $n_tests += scalar(@badfiles); 44 $n_tests += scalar(@badfiles);
qpdf/qtest/qpdf/qjson-bad-data2.json 0 → 100644
  1 +{
  2 + "qpdf": [
  3 + {
  4 + "jsonversion": 2,
  5 + "pdfversion": "1.3",
  6 + "maxobjectid": 6
  7 + },
  8 + {
  9 + "obj:1 0 R": {
  10 + "value": {
  11 + "/Pages": "2 0 R",
  12 + "/Type": "/Catalog"
  13 + }
  14 + },
  15 + "obj:2 0 R": {
  16 + "value": {
  17 + "/Count": 1,
  18 + "/Kids": [
  19 + "3 0 R"
  20 + ],
  21 + "/Type": "/Pages"
  22 + }
  23 + },
  24 + "obj:3 0 R": {
  25 + "value": {
  26 + "/Contents": ["4 0 R", "7 0 R"],
  27 + "/MediaBox": [
  28 + 0,
  29 + 0,
  30 + 612,
  31 + 792
  32 + ],
  33 + "/Parent": "2 0 R",
  34 + "/Resources": {
  35 + "/Font": {
  36 + "/F1": "6 0 R"
  37 + },
  38 + "/ProcSet": "5 0 R"
  39 + },
  40 + "/Type": "/Page"
  41 + }
  42 + },
  43 + "obj:4 0 R": {
  44 + "stream": {
  45 + "data": [[]],
  46 + "dict": {}
  47 + }
  48 + },
  49 + "obj:5 0 R": {
  50 + "value": [
  51 + "/PDF",
  52 + "/Text"
  53 + ]
  54 + },
  55 + "obj:6 0 R": {
  56 + "value": {
  57 + "/BaseFont": "/Helvetica",
  58 + "/Encoding": "/WinAnsiEncoding",
  59 + "/Subtype": "/Type1",
  60 + "/Type": "/Font"
  61 + }
  62 + },
  63 + "trailer": {
  64 + "value": {
  65 + "/Root": "1 0 R",
  66 + "/Size": 7
  67 + }
  68 + }
  69 + }
  70 + ]
  71 +}
qpdf/qtest/qpdf/qjson-bad-data2.out 0 → 100644
  1 +WARNING: qjson-bad-data2.json (obj:4 0 R, offset 846): "stream.data" must be a string
  2 +qpdf: qjson-bad-data2.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-datafile2.json 0 → 100644
  1 +{
  2 + "qpdf": [
  3 + {
  4 + "jsonversion": 2,
  5 + "pdfversion": "1.3",
  6 + "maxobjectid": 6
  7 + },
  8 + {
  9 + "obj:1 0 R": {
  10 + "value": {
  11 + "/Pages": "2 0 R",
  12 + "/Type": "/Catalog"
  13 + }
  14 + },
  15 + "obj:2 0 R": {
  16 + "value": {
  17 + "/Count": 1,
  18 + "/Kids": [
  19 + "3 0 R"
  20 + ],
  21 + "/Type": "/Pages"
  22 + }
  23 + },
  24 + "obj:3 0 R": {
  25 + "value": {
  26 + "/Contents": ["4 0 R", "7 0 R"],
  27 + "/MediaBox": [
  28 + 0,
  29 + 0,
  30 + 612,
  31 + 792
  32 + ],
  33 + "/Parent": "2 0 R",
  34 + "/Resources": {
  35 + "/Font": {
  36 + "/F1": "6 0 R"
  37 + },
  38 + "/ProcSet": "5 0 R"
  39 + },
  40 + "/Type": "/Page"
  41 + }
  42 + },
  43 + "obj:4 0 R": {
  44 + "stream": {
  45 + "datafile": [[]],
  46 + "dict": {}
  47 + }
  48 + },
  49 + "obj:5 0 R": {
  50 + "value": [
  51 + "/PDF",
  52 + "/Text"
  53 + ]
  54 + },
  55 + "obj:6 0 R": {
  56 + "value": {
  57 + "/BaseFont": "/Helvetica",
  58 + "/Encoding": "/WinAnsiEncoding",
  59 + "/Subtype": "/Type1",
  60 + "/Type": "/Font"
  61 + }
  62 + },
  63 + "trailer": {
  64 + "value": {
  65 + "/Root": "1 0 R",
  66 + "/Size": 7
  67 + }
  68 + }
  69 + }
  70 + ]
  71 +}
qpdf/qtest/qpdf/qjson-bad-datafile2.out 0 → 100644
  1 +WARNING: qjson-bad-datafile2.json (obj:4 0 R, offset 850): "stream.datafile" must be a string containing a file name
  2 +qpdf: qjson-bad-datafile2.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-pdf-version1.out
1 -WARNING: qjson-bad-pdf-version1.json (offset 41): invalid JSON version (must be 2)  
2 -WARNING: qjson-bad-pdf-version1.json (offset 70): invalid PDF version (must be x.y) 1 +WARNING: qjson-bad-pdf-version1.json (offset 41): invalid JSON version (must be numeric value 2)
  2 +WARNING: qjson-bad-pdf-version1.json (offset 70): invalid PDF version (must be "x.y")
3 qpdf: qjson-bad-pdf-version1.json: errors found in JSON 3 qpdf: qjson-bad-pdf-version1.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-pdf-version2.out
1 -WARNING: qjson-bad-pdf-version2.json (offset 41): invalid JSON version (must be 2)  
2 -WARNING: qjson-bad-pdf-version2.json (offset 66): invalid PDF version (must be x.y) 1 +WARNING: qjson-bad-pdf-version2.json (offset 41): invalid JSON version (must be numeric value 2)
  2 +WARNING: qjson-bad-pdf-version2.json (offset 66): invalid PDF version (must be "x.y")
3 WARNING: qjson-bad-pdf-version2.json (offset 97): calledgetallpages must be a boolean 3 WARNING: qjson-bad-pdf-version2.json (offset 97): calledgetallpages must be a boolean
4 WARNING: qjson-bad-pdf-version2.json (offset 138): pushedinheritedpageresources must be a boolean 4 WARNING: qjson-bad-pdf-version2.json (offset 138): pushedinheritedpageresources must be a boolean
5 qpdf: qjson-bad-pdf-version2.json: errors found in JSON 5 qpdf: qjson-bad-pdf-version2.json: errors found in JSON
qpdf/qtest/qpdf/qjson-obj-key-errors.out
1 WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 244): object must have exactly one of "value" or "stream" 1 WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 244): object must have exactly one of "value" or "stream"
2 WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 542): object must have exactly one of "value" or "stream" 2 WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 542): object must have exactly one of "value" or "stream"
3 -WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 710): "stream" is missing "dict"  
4 -WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 710): new "stream" must have exactly one of "data" or "datafile"  
5 -WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 800): new "stream" must have exactly one of "data" or "datafile" 3 +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 690): "stream" is missing "dict"
  4 +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 690): new "stream" must have exactly one of "data" or "datafile"
  5 +WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 780): new "stream" must have exactly one of "data" or "datafile"
6 WARNING: qjson-obj-key-errors.json (trailer, offset 1178): "trailer" is missing "value" 6 WARNING: qjson-obj-key-errors.json (trailer, offset 1178): "trailer" is missing "value"
7 qpdf: qjson-obj-key-errors.json: errors found in JSON 7 qpdf: qjson-obj-key-errors.json: errors found in JSON
qpdf/qtest/qpdf/qjson-stream-dict-not-dict.out
1 WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 142): "stream.dict" must be a dictionary 1 WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 142): "stream.dict" must be a dictionary
2 -WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 142): unrecognized string value  
3 -WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 122): new "stream" must have exactly one of "data" or "datafile" 2 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 102): new "stream" must have exactly one of "data" or "datafile"
4 WARNING: qjson-stream-dict-not-dict.json: "qpdf[1].trailer" was not seen 3 WARNING: qjson-stream-dict-not-dict.json: "qpdf[1].trailer" was not seen
5 qpdf: qjson-stream-dict-not-dict.json: errors found in JSON 4 qpdf: qjson-stream-dict-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-stream-not-dict.out
1 WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary 1 WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary
  2 +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 102): "stream" is missing "dict"
2 WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen 3 WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen
3 qpdf: qjson-stream-not-dict.json: errors found in JSON 4 qpdf: qjson-stream-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-trailer-stream.out
1 WARNING: qjson-trailer-stream.json (trailer, offset 1269): the trailer may not be a stream 1 WARNING: qjson-trailer-stream.json (trailer, offset 1269): the trailer may not be a stream
  2 +WARNING: qjson-trailer-stream.json (trailer, offset 1249): "trailer" is missing "value"
2 qpdf: qjson-trailer-stream.json: errors found in JSON 3 qpdf: qjson-trailer-stream.json: errors found in JSON
qpdf/qtest/qpdf/update-from-json-errors.out
1 -WARNING: good13.pdf (obj:4 0 R from qpdf-json-update-errors.json, offset 95): existing "stream" may at most one of "data" or "datafile" 1 +WARNING: good13.pdf (obj:4 0 R from qpdf-json-update-errors.json, offset 75): existing "stream" may at most one of "data" or "datafile"
2 WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 335): unrecognized string value 2 WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 335): unrecognized string value
3 -WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 293): new "stream" must have exactly one of "data" or "datafile" 3 +WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 273): new "stream" must have exactly one of "data" or "datafile"
4 qpdf: qpdf-json-update-errors.json: errors found in JSON 4 qpdf: qpdf-json-update-errors.json: errors found in JSON