Commit b389268f16fcd571bbc57ef848caba25490a1b86
1 parent
a1368242
Better handle split content streams (fixes #73)
When parsing content streams, allow content to be split arbitrarily across stream boundaries.
Showing
11 changed files
with
8793 additions
and
20 deletions
ChangeLog
| 1 | +2017-07-29 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Fix content stream parsing to handle cases of structures within | ||
| 4 | + the stream split across stream boundaries. Fixes #73. | ||
| 5 | + | ||
| 1 | 2017-07-28 Jay Berkenbilt <ejb@ql.org> | 6 | 2017-07-28 Jay Berkenbilt <ejb@ql.org> |
| 2 | 7 | ||
| 3 | * Add --preserve-unreferenced command-line option and | 8 | * Add --preserve-unreferenced command-line option and |
include/qpdf/QPDFObjectHandle.hh
| @@ -623,7 +623,9 @@ class QPDFObjectHandle | @@ -623,7 +623,9 @@ class QPDFObjectHandle | ||
| 623 | bool in_array, bool in_dictionary, | 623 | bool in_array, bool in_dictionary, |
| 624 | bool content_stream); | 624 | bool content_stream); |
| 625 | static void parseContentStream_internal( | 625 | static void parseContentStream_internal( |
| 626 | - QPDFObjectHandle stream, ParserCallbacks* callbacks); | 626 | + PointerHolder<Buffer> stream_data, |
| 627 | + std::string const& description, | ||
| 628 | + ParserCallbacks* callbacks); | ||
| 627 | 629 | ||
| 628 | // Other methods | 630 | // Other methods |
| 629 | static void warn(QPDF*, QPDFExc const&); | 631 | static void warn(QPDF*, QPDFExc const&); |
libqpdf/QPDFObjectHandle.cc
| @@ -13,6 +13,7 @@ | @@ -13,6 +13,7 @@ | ||
| 13 | #include <qpdf/QPDF_Dictionary.hh> | 13 | #include <qpdf/QPDF_Dictionary.hh> |
| 14 | #include <qpdf/QPDF_Stream.hh> | 14 | #include <qpdf/QPDF_Stream.hh> |
| 15 | #include <qpdf/QPDF_Reserved.hh> | 15 | #include <qpdf/QPDF_Reserved.hh> |
| 16 | +#include <qpdf/Pl_Buffer.hh> | ||
| 16 | #include <qpdf/BufferInputSource.hh> | 17 | #include <qpdf/BufferInputSource.hh> |
| 17 | #include <qpdf/QPDFExc.hh> | 18 | #include <qpdf/QPDFExc.hh> |
| 18 | 19 | ||
| @@ -739,37 +740,63 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, | @@ -739,37 +740,63 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, | ||
| 739 | { | 740 | { |
| 740 | streams.push_back(stream_or_array); | 741 | streams.push_back(stream_or_array); |
| 741 | } | 742 | } |
| 743 | + Pl_Buffer buf("concatenated stream data buffer"); | ||
| 744 | + std::string all_description = "content stream objects"; | ||
| 745 | + bool first = true; | ||
| 742 | for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); | 746 | for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); |
| 743 | iter != streams.end(); ++iter) | 747 | iter != streams.end(); ++iter) |
| 744 | { | 748 | { |
| 745 | QPDFObjectHandle stream = *iter; | 749 | QPDFObjectHandle stream = *iter; |
| 746 | if (! stream.isStream()) | 750 | if (! stream.isStream()) |
| 747 | { | 751 | { |
| 748 | - throw std::logic_error( | ||
| 749 | - "QPDFObjectHandle: parseContentStream called on non-stream"); | 752 | + QTC::TC("qpdf", "QPDFObjectHandle non-stream in parsecontent"); |
| 753 | + warn(stream.getOwningQPDF(), | ||
| 754 | + QPDFExc(qpdf_e_damaged_pdf, "content stream", | ||
| 755 | + "", 0, | ||
| 756 | + "ignoring non-stream while parsing content streams")); | ||
| 750 | } | 757 | } |
| 751 | - try | ||
| 752 | - { | ||
| 753 | - parseContentStream_internal(stream, callbacks); | ||
| 754 | - } | ||
| 755 | - catch (TerminateParsing&) | 758 | + else |
| 756 | { | 759 | { |
| 757 | - return; | 760 | + std::string og = QUtil::int_to_string(stream.getObjectID()) + " " + |
| 761 | + QUtil::int_to_string(stream.getGeneration()); | ||
| 762 | + std::string description = "content stream object " + og; | ||
| 763 | + if (first) | ||
| 764 | + { | ||
| 765 | + first = false; | ||
| 766 | + } | ||
| 767 | + else | ||
| 768 | + { | ||
| 769 | + all_description += ","; | ||
| 770 | + } | ||
| 771 | + all_description += " " + og; | ||
| 772 | + if (! stream.pipeStreamData(&buf, true, false, false, false)) | ||
| 773 | + { | ||
| 774 | + QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent"); | ||
| 775 | + warn(stream.getOwningQPDF(), | ||
| 776 | + QPDFExc(qpdf_e_damaged_pdf, "content stream", | ||
| 777 | + description, 0, | ||
| 778 | + "errors while decoding content stream")); | ||
| 779 | + } | ||
| 758 | } | 780 | } |
| 759 | } | 781 | } |
| 782 | + PointerHolder<Buffer> stream_data = buf.getBuffer(); | ||
| 783 | + try | ||
| 784 | + { | ||
| 785 | + parseContentStream_internal(stream_data, all_description, callbacks); | ||
| 786 | + } | ||
| 787 | + catch (TerminateParsing&) | ||
| 788 | + { | ||
| 789 | + return; | ||
| 790 | + } | ||
| 760 | callbacks->handleEOF(); | 791 | callbacks->handleEOF(); |
| 761 | } | 792 | } |
| 762 | 793 | ||
| 763 | void | 794 | void |
| 764 | -QPDFObjectHandle::parseContentStream_internal(QPDFObjectHandle stream, | 795 | +QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data, |
| 796 | + std::string const& description, | ||
| 765 | ParserCallbacks* callbacks) | 797 | ParserCallbacks* callbacks) |
| 766 | { | 798 | { |
| 767 | - stream.assertStream(); | ||
| 768 | - PointerHolder<Buffer> stream_data = stream.getStreamData(); | ||
| 769 | size_t length = stream_data->getSize(); | 799 | size_t length = stream_data->getSize(); |
| 770 | - std::string description = "content stream object " + | ||
| 771 | - QUtil::int_to_string(stream.getObjectID()) + " " + | ||
| 772 | - QUtil::int_to_string(stream.getGeneration()); | ||
| 773 | PointerHolder<InputSource> input = | 800 | PointerHolder<InputSource> input = |
| 774 | new BufferInputSource(description, stream_data.getPointer()); | 801 | new BufferInputSource(description, stream_data.getPointer()); |
| 775 | QPDFTokenizer tokenizer; | 802 | QPDFTokenizer tokenizer; |
qpdf/qpdf.testcov
| @@ -281,3 +281,5 @@ QPDFObjectHandle no val for last key 0 | @@ -281,3 +281,5 @@ QPDFObjectHandle no val for last key 0 | ||
| 281 | QPDF resolve failure to null 0 | 281 | QPDF resolve failure to null 0 |
| 282 | QPDFWriter precheck stream 0 | 282 | QPDFWriter precheck stream 0 |
| 283 | QPDFWriter preserve unreferenced standard 0 | 283 | QPDFWriter preserve unreferenced standard 0 |
| 284 | +QPDFObjectHandle non-stream in parsecontent 0 | ||
| 285 | +QPDFObjectHandle errors in parsecontent 0 |
qpdf/qtest/qpdf.test
| @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", | @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", | ||
| 206 | show_ntests(); | 206 | show_ntests(); |
| 207 | # ---------- | 207 | # ---------- |
| 208 | $td->notify("--- Miscellaneous Tests ---"); | 208 | $td->notify("--- Miscellaneous Tests ---"); |
| 209 | -$n_tests += 86; | 209 | +$n_tests += 88; |
| 210 | 210 | ||
| 211 | $td->runtest("qpdf version", | 211 | $td->runtest("qpdf version", |
| 212 | {$td->COMMAND => "qpdf --version"}, | 212 | {$td->COMMAND => "qpdf --version"}, |
| @@ -604,6 +604,20 @@ $td->runtest("no trailing space in xref table", | @@ -604,6 +604,20 @@ $td->runtest("no trailing space in xref table", | ||
| 604 | {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0}, | 604 | {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0}, |
| 605 | $td->NORMALIZE_NEWLINES); | 605 | $td->NORMALIZE_NEWLINES); |
| 606 | 606 | ||
| 607 | +# An array is split across multiple content streams starting object | ||
| 608 | +# 42. This was reported in github issue 73. The file is modified from | ||
| 609 | +# that example. | ||
| 610 | +$td->runtest("parse split content stream", | ||
| 611 | + {$td->COMMAND => "qpdf --check split-content-stream.pdf"}, | ||
| 612 | + {$td->FILE => "split-content-stream.out", $td->EXIT_STATUS => 0}, | ||
| 613 | + $td->NORMALIZE_NEWLINES); | ||
| 614 | +$td->runtest("split content stream errors", | ||
| 615 | + {$td->COMMAND => "qpdf --check split-content-stream-errors.pdf"}, | ||
| 616 | + {$td->FILE => "split-content-stream-errors.out", | ||
| 617 | + $td->EXIT_STATUS => 3}, | ||
| 618 | + $td->NORMALIZE_NEWLINES); | ||
| 619 | + | ||
| 620 | + | ||
| 607 | show_ntests(); | 621 | show_ntests(); |
| 608 | # ---------- | 622 | # ---------- |
| 609 | $td->notify("--- Numeric range parsing tests ---"); | 623 | $td->notify("--- Numeric range parsing tests ---"); |
qpdf/qtest/qpdf/content-stream-errors.out
| @@ -2,6 +2,6 @@ checking content-stream-errors.pdf | @@ -2,6 +2,6 @@ checking content-stream-errors.pdf | ||
| 2 | PDF Version: 1.3 | 2 | PDF Version: 1.3 |
| 3 | File is not encrypted | 3 | File is not encrypted |
| 4 | File is not linearized | 4 | File is not linearized |
| 5 | -page 1: content stream object 7 0 (content, file position 52): parse error while reading object | ||
| 6 | -page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image | ||
| 7 | -page 4: content stream object 19 0 (content, file position 53): parse error while reading object | 5 | +page 1: content stream objects 7 0 (content, file position 52): parse error while reading object |
| 6 | +page 3: content stream objects 15 0 (stream data, file position 117): EOF found while reading inline image | ||
| 7 | +page 4: content stream objects 19 0 (content, file position 53): parse error while reading object |
qpdf/qtest/qpdf/eof-in-inline-image.out
| @@ -22,4 +22,4 @@ name: /Fl | @@ -22,4 +22,4 @@ name: /Fl | ||
| 22 | name: /DP | 22 | name: /DP |
| 23 | dictionary: << /Columns 1 /Predictor 15 >> | 23 | dictionary: << /Columns 1 /Predictor 15 >> |
| 24 | operator: ID | 24 | operator: ID |
| 25 | -content stream object 4 0 (stream data, file position 139): EOF found while reading inline image | 25 | +content stream objects 4 0 (stream data, file position 139): EOF found while reading inline image |
qpdf/qtest/qpdf/split-content-stream-errors.out
0 → 100644
| 1 | +WARNING: split-content-stream-errors.pdf: file is damaged | ||
| 2 | +WARNING: split-content-stream-errors.pdf (file position 802): xref not found | ||
| 3 | +WARNING: split-content-stream-errors.pdf: Attempting to reconstruct cross-reference table | ||
| 4 | +checking split-content-stream-errors.pdf | ||
| 5 | +PDF Version: 1.3 | ||
| 6 | +File is not encrypted | ||
| 7 | +File is not linearized | ||
| 8 | +WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received | ||
| 9 | +WARNING: content stream: ignoring non-stream while parsing content streams | ||
| 10 | +WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received | ||
| 11 | +WARNING: content stream (content stream object 6 0): errors while decoding content stream |
qpdf/qtest/qpdf/split-content-stream-errors.pdf
0 → 100644
| 1 | +%PDF-1.3 | ||
| 2 | +%¿÷¢þ | ||
| 3 | +%QDF-1.0 | ||
| 4 | + | ||
| 5 | +1 0 obj | ||
| 6 | +<< | ||
| 7 | + /Pages 2 0 R | ||
| 8 | + /Type /Catalog | ||
| 9 | +>> | ||
| 10 | +endobj | ||
| 11 | + | ||
| 12 | +2 0 obj | ||
| 13 | +<< | ||
| 14 | + /Count 1 | ||
| 15 | + /Kids [ | ||
| 16 | + 3 0 R | ||
| 17 | + ] | ||
| 18 | + /Type /Pages | ||
| 19 | +>> | ||
| 20 | +endobj | ||
| 21 | + | ||
| 22 | +%% Page 1 | ||
| 23 | +3 0 obj | ||
| 24 | +<< | ||
| 25 | + /Contents [ | ||
| 26 | + 4 0 R | ||
| 27 | + 6 0 R | ||
| 28 | + ] | ||
| 29 | + /MediaBox [ | ||
| 30 | + 0 | ||
| 31 | + 0 | ||
| 32 | + 612 | ||
| 33 | + 792 | ||
| 34 | + ] | ||
| 35 | + /Parent 2 0 R | ||
| 36 | + /Resources << | ||
| 37 | + /Font << | ||
| 38 | + /F1 8 0 R | ||
| 39 | + >> | ||
| 40 | + /ProcSet 9 0 R | ||
| 41 | + >> | ||
| 42 | + /Type /Page | ||
| 43 | +>> | ||
| 44 | +endobj | ||
| 45 | + | ||
| 46 | +%% Contents for page 1 | ||
| 47 | +4 0 obj | ||
| 48 | +<< | ||
| 49 | + /Length 5 0 R | ||
| 50 | + /Oops (Not a stream) | ||
| 51 | +>> | ||
| 52 | +endobj | ||
| 53 | + | ||
| 54 | +5 0 obj | ||
| 55 | +44 | ||
| 56 | +endobj | ||
| 57 | + | ||
| 58 | +%% Contents for page 1 | ||
| 59 | +6 0 obj | ||
| 60 | +<< | ||
| 61 | + /Length 7 0 R | ||
| 62 | + /Filter /LZWDecode | ||
| 63 | +>> | ||
| 64 | +stream | ||
| 65 | +BT | ||
| 66 | + /F1 24 Tf | ||
| 67 | + 72 720 Td | ||
| 68 | + (Encoding errors) Tj | ||
| 69 | +ET | ||
| 70 | +endstream | ||
| 71 | +endobj | ||
| 72 | + | ||
| 73 | +7 0 obj | ||
| 74 | +53 | ||
| 75 | +endobj | ||
| 76 | + | ||
| 77 | +8 0 obj | ||
| 78 | +<< | ||
| 79 | + /BaseFont /Helvetica | ||
| 80 | + /Encoding /WinAnsiEncoding | ||
| 81 | + /Name /F1 | ||
| 82 | + /Subtype /Type1 | ||
| 83 | + /Type /Font | ||
| 84 | +>> | ||
| 85 | +endobj | ||
| 86 | + | ||
| 87 | +9 0 obj | ||
| 88 | +[ | ||
| 89 | |||
| 90 | + /Text | ||
| 91 | +] | ||
| 92 | +endobj | ||
| 93 | + | ||
| 94 | +xref | ||
| 95 | +0 10 | ||
| 96 | +0000000000 65535 f | ||
| 97 | +0000000025 00000 n | ||
| 98 | +0000000079 00000 n | ||
| 99 | +0000000161 00000 n | ||
| 100 | +0000000396 00000 n | ||
| 101 | +0000000457 00000 n | ||
| 102 | +0000000499 00000 n | ||
| 103 | +0000000630 00000 n | ||
| 104 | +0000000649 00000 n | ||
| 105 | +0000000767 00000 n | ||
| 106 | +trailer << | ||
| 107 | + /Root 1 0 R | ||
| 108 | + /Size 10 | ||
| 109 | + /ID [<cbdd966f9b7b2bb31ad606c532d7cce5><e5f7cff7a542641606230aadd53106a4>] | ||
| 110 | +>> | ||
| 111 | +startxref | ||
| 112 | +802 | ||
| 113 | +%%EOF |
qpdf/qtest/qpdf/split-content-stream.out
0 → 100644
qpdf/qtest/qpdf/split-content-stream.pdf
0 → 100644
No preview for this file type