Commit 31372edce0b60211c7af98340b3afa054f414ca4

Authored by Jay Berkenbilt
1 parent c1363563

Inline image token value ends with EI, not delimiter

The inline image token erroneously included the delimiter that
followed EI. The ObjectHandle created from it was correct.
ChangeLog
1 1 2019-01-29 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Bug fix: when returning an inline image token, the tokenizer no
  4 + longer includes the delimiter that follows EI. The
  5 + QPDFObjectHandle created from the token was correct.
  6 +
3 7 * Handle files with direct page objects, which is not allowed by
4 8 the PDF spec but has been seen in the wild. Fixes #164.
5 9  
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1571,9 +1571,9 @@ QPDFObjectHandle::parseContentStream_data(
1571 1571 else
1572 1572 {
1573 1573 // Skip back over EI
1574   - input->seek(-3, SEEK_CUR);
  1574 + input->seek(-2, SEEK_CUR);
1575 1575 std::string inline_image = t.getRawValue();
1576   - for (int i = 0; i < 4; ++i)
  1576 + for (int i = 0; i < 3; ++i)
1577 1577 {
1578 1578 if (inline_image.length() > 0)
1579 1579 {
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -468,6 +468,7 @@ QPDFTokenizer::presentCharacter(char ch)
468 468 }
469 469 else if (this->m->state == st_inline_image)
470 470 {
  471 + this->m->val += ch;
471 472 size_t len = this->m->val.length();
472 473 if ((len >= 4) &&
473 474 isDelimiter(this->m->val.at(len-4)) &&
... ... @@ -475,22 +476,18 @@ QPDFTokenizer::presentCharacter(char ch)
475 476 (this->m->val.at(len-2) == 'I') &&
476 477 isDelimiter(this->m->val.at(len-1)))
477 478 {
  479 + this->m->val.erase(len - 1);
478 480 this->m->type = tt_inline_image;
479 481 this->m->unread_char = true;
480 482 this->m->char_to_unread = ch;
481 483 this->m->state = st_token_ready;
482 484 }
483   - else
484   - {
485   - this->m->val += ch;
486   - }
487 485 }
488 486 else
489 487 {
490 488 handled = false;
491 489 }
492 490  
493   -
494 491 if (handled)
495 492 {
496 493 // okay
... ...
qpdf/qtest/qpdf/tokens-maxlen.out
... ... @@ -599,7 +599,8 @@ skipping to endstream
599 599 103: dict_close: >>
600 600 105: space: \x0a
601 601 106: word: ID
602   -108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
  602 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  603 +354: space: \x0a
603 604 355: word: BT
604 605 357: space: \x0a
605 606 360: name: /F1
... ...
qpdf/qtest/qpdf/tokens-no-ignorable.out
... ... @@ -291,7 +291,7 @@ skipping to endstream
291 291 101: integer: 66
292 292 103: dict_close: >>
293 293 106: word: ID
294   -108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
  294 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
295 295 355: word: BT
296 296 360: name: /F1
297 297 364: integer: 24
... ...
qpdf/qtest/qpdf/tokens.out
... ... @@ -599,7 +599,8 @@ skipping to endstream
599 599 103: dict_close: >>
600 600 105: space: \x0a
601 601 106: word: ID
602   -108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
  602 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  603 +354: space: \x0a
603 604 355: word: BT
604 605 357: space: \x0a
605 606 360: name: /F1
... ...