Commit b389268f16fcd571bbc57ef848caba25490a1b86
1 parent
a1368242
Better handle split content streams (fixes #73)
When parsing content streams, allow content to be split arbitrarily across stream boundaries.
Showing
11 changed files
with
8793 additions
and
20 deletions
ChangeLog
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -623,7 +623,9 @@ class QPDFObjectHandle |
| 623 | 623 | bool in_array, bool in_dictionary, |
| 624 | 624 | bool content_stream); |
| 625 | 625 | static void parseContentStream_internal( |
| 626 | - QPDFObjectHandle stream, ParserCallbacks* callbacks); | |
| 626 | + PointerHolder<Buffer> stream_data, | |
| 627 | + std::string const& description, | |
| 628 | + ParserCallbacks* callbacks); | |
| 627 | 629 | |
| 628 | 630 | // Other methods |
| 629 | 631 | static void warn(QPDF*, QPDFExc const&); | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -13,6 +13,7 @@ |
| 13 | 13 | #include <qpdf/QPDF_Dictionary.hh> |
| 14 | 14 | #include <qpdf/QPDF_Stream.hh> |
| 15 | 15 | #include <qpdf/QPDF_Reserved.hh> |
| 16 | +#include <qpdf/Pl_Buffer.hh> | |
| 16 | 17 | #include <qpdf/BufferInputSource.hh> |
| 17 | 18 | #include <qpdf/QPDFExc.hh> |
| 18 | 19 | |
| ... | ... | @@ -739,37 +740,63 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, |
| 739 | 740 | { |
| 740 | 741 | streams.push_back(stream_or_array); |
| 741 | 742 | } |
| 743 | + Pl_Buffer buf("concatenated stream data buffer"); | |
| 744 | + std::string all_description = "content stream objects"; | |
| 745 | + bool first = true; | |
| 742 | 746 | for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); |
| 743 | 747 | iter != streams.end(); ++iter) |
| 744 | 748 | { |
| 745 | 749 | QPDFObjectHandle stream = *iter; |
| 746 | 750 | if (! stream.isStream()) |
| 747 | 751 | { |
| 748 | - throw std::logic_error( | |
| 749 | - "QPDFObjectHandle: parseContentStream called on non-stream"); | |
| 752 | + QTC::TC("qpdf", "QPDFObjectHandle non-stream in parsecontent"); | |
| 753 | + warn(stream.getOwningQPDF(), | |
| 754 | + QPDFExc(qpdf_e_damaged_pdf, "content stream", | |
| 755 | + "", 0, | |
| 756 | + "ignoring non-stream while parsing content streams")); | |
| 750 | 757 | } |
| 751 | - try | |
| 752 | - { | |
| 753 | - parseContentStream_internal(stream, callbacks); | |
| 754 | - } | |
| 755 | - catch (TerminateParsing&) | |
| 758 | + else | |
| 756 | 759 | { |
| 757 | - return; | |
| 760 | + std::string og = QUtil::int_to_string(stream.getObjectID()) + " " + | |
| 761 | + QUtil::int_to_string(stream.getGeneration()); | |
| 762 | + std::string description = "content stream object " + og; | |
| 763 | + if (first) | |
| 764 | + { | |
| 765 | + first = false; | |
| 766 | + } | |
| 767 | + else | |
| 768 | + { | |
| 769 | + all_description += ","; | |
| 770 | + } | |
| 771 | + all_description += " " + og; | |
| 772 | + if (! stream.pipeStreamData(&buf, true, false, false, false)) | |
| 773 | + { | |
| 774 | + QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent"); | |
| 775 | + warn(stream.getOwningQPDF(), | |
| 776 | + QPDFExc(qpdf_e_damaged_pdf, "content stream", | |
| 777 | + description, 0, | |
| 778 | + "errors while decoding content stream")); | |
| 779 | + } | |
| 758 | 780 | } |
| 759 | 781 | } |
| 782 | + PointerHolder<Buffer> stream_data = buf.getBuffer(); | |
| 783 | + try | |
| 784 | + { | |
| 785 | + parseContentStream_internal(stream_data, all_description, callbacks); | |
| 786 | + } | |
| 787 | + catch (TerminateParsing&) | |
| 788 | + { | |
| 789 | + return; | |
| 790 | + } | |
| 760 | 791 | callbacks->handleEOF(); |
| 761 | 792 | } |
| 762 | 793 | |
| 763 | 794 | void |
| 764 | -QPDFObjectHandle::parseContentStream_internal(QPDFObjectHandle stream, | |
| 795 | +QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data, | |
| 796 | + std::string const& description, | |
| 765 | 797 | ParserCallbacks* callbacks) |
| 766 | 798 | { |
| 767 | - stream.assertStream(); | |
| 768 | - PointerHolder<Buffer> stream_data = stream.getStreamData(); | |
| 769 | 799 | size_t length = stream_data->getSize(); |
| 770 | - std::string description = "content stream object " + | |
| 771 | - QUtil::int_to_string(stream.getObjectID()) + " " + | |
| 772 | - QUtil::int_to_string(stream.getGeneration()); | |
| 773 | 800 | PointerHolder<InputSource> input = |
| 774 | 801 | new BufferInputSource(description, stream_data.getPointer()); |
| 775 | 802 | QPDFTokenizer tokenizer; | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -281,3 +281,5 @@ QPDFObjectHandle no val for last key 0 |
| 281 | 281 | QPDF resolve failure to null 0 |
| 282 | 282 | QPDFWriter precheck stream 0 |
| 283 | 283 | QPDFWriter preserve unreferenced standard 0 |
| 284 | +QPDFObjectHandle non-stream in parsecontent 0 | |
| 285 | +QPDFObjectHandle errors in parsecontent 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", |
| 206 | 206 | show_ntests(); |
| 207 | 207 | # ---------- |
| 208 | 208 | $td->notify("--- Miscellaneous Tests ---"); |
| 209 | -$n_tests += 86; | |
| 209 | +$n_tests += 88; | |
| 210 | 210 | |
| 211 | 211 | $td->runtest("qpdf version", |
| 212 | 212 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -604,6 +604,20 @@ $td->runtest("no trailing space in xref table", |
| 604 | 604 | {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0}, |
| 605 | 605 | $td->NORMALIZE_NEWLINES); |
| 606 | 606 | |
| 607 | +# An array is split across multiple content streams starting object | |
| 608 | +# 42. This was reported in github issue 73. The file is modified from | |
| 609 | +# that example. | |
| 610 | +$td->runtest("parse split content stream", | |
| 611 | + {$td->COMMAND => "qpdf --check split-content-stream.pdf"}, | |
| 612 | + {$td->FILE => "split-content-stream.out", $td->EXIT_STATUS => 0}, | |
| 613 | + $td->NORMALIZE_NEWLINES); | |
| 614 | +$td->runtest("split content stream errors", | |
| 615 | + {$td->COMMAND => "qpdf --check split-content-stream-errors.pdf"}, | |
| 616 | + {$td->FILE => "split-content-stream-errors.out", | |
| 617 | + $td->EXIT_STATUS => 3}, | |
| 618 | + $td->NORMALIZE_NEWLINES); | |
| 619 | + | |
| 620 | + | |
| 607 | 621 | show_ntests(); |
| 608 | 622 | # ---------- |
| 609 | 623 | $td->notify("--- Numeric range parsing tests ---"); | ... | ... |
qpdf/qtest/qpdf/content-stream-errors.out
| ... | ... | @@ -2,6 +2,6 @@ checking content-stream-errors.pdf |
| 2 | 2 | PDF Version: 1.3 |
| 3 | 3 | File is not encrypted |
| 4 | 4 | File is not linearized |
| 5 | -page 1: content stream object 7 0 (content, file position 52): parse error while reading object | |
| 6 | -page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image | |
| 7 | -page 4: content stream object 19 0 (content, file position 53): parse error while reading object | |
| 5 | +page 1: content stream objects 7 0 (content, file position 52): parse error while reading object | |
| 6 | +page 3: content stream objects 15 0 (stream data, file position 117): EOF found while reading inline image | |
| 7 | +page 4: content stream objects 19 0 (content, file position 53): parse error while reading object | ... | ... |
qpdf/qtest/qpdf/eof-in-inline-image.out
| ... | ... | @@ -22,4 +22,4 @@ name: /Fl |
| 22 | 22 | name: /DP |
| 23 | 23 | dictionary: << /Columns 1 /Predictor 15 >> |
| 24 | 24 | operator: ID |
| 25 | -content stream object 4 0 (stream data, file position 139): EOF found while reading inline image | |
| 25 | +content stream objects 4 0 (stream data, file position 139): EOF found while reading inline image | ... | ... |
qpdf/qtest/qpdf/split-content-stream-errors.out
0 → 100644
| 1 | +WARNING: split-content-stream-errors.pdf: file is damaged | |
| 2 | +WARNING: split-content-stream-errors.pdf (file position 802): xref not found | |
| 3 | +WARNING: split-content-stream-errors.pdf: Attempting to reconstruct cross-reference table | |
| 4 | +checking split-content-stream-errors.pdf | |
| 5 | +PDF Version: 1.3 | |
| 6 | +File is not encrypted | |
| 7 | +File is not linearized | |
| 8 | +WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received | |
| 9 | +WARNING: content stream: ignoring non-stream while parsing content streams | |
| 10 | +WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received | |
| 11 | +WARNING: content stream (content stream object 6 0): errors while decoding content stream | ... | ... |
qpdf/qtest/qpdf/split-content-stream-errors.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +%¿÷¢þ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +1 0 obj | |
| 6 | +<< | |
| 7 | + /Pages 2 0 R | |
| 8 | + /Type /Catalog | |
| 9 | +>> | |
| 10 | +endobj | |
| 11 | + | |
| 12 | +2 0 obj | |
| 13 | +<< | |
| 14 | + /Count 1 | |
| 15 | + /Kids [ | |
| 16 | + 3 0 R | |
| 17 | + ] | |
| 18 | + /Type /Pages | |
| 19 | +>> | |
| 20 | +endobj | |
| 21 | + | |
| 22 | +%% Page 1 | |
| 23 | +3 0 obj | |
| 24 | +<< | |
| 25 | + /Contents [ | |
| 26 | + 4 0 R | |
| 27 | + 6 0 R | |
| 28 | + ] | |
| 29 | + /MediaBox [ | |
| 30 | + 0 | |
| 31 | + 0 | |
| 32 | + 612 | |
| 33 | + 792 | |
| 34 | + ] | |
| 35 | + /Parent 2 0 R | |
| 36 | + /Resources << | |
| 37 | + /Font << | |
| 38 | + /F1 8 0 R | |
| 39 | + >> | |
| 40 | + /ProcSet 9 0 R | |
| 41 | + >> | |
| 42 | + /Type /Page | |
| 43 | +>> | |
| 44 | +endobj | |
| 45 | + | |
| 46 | +%% Contents for page 1 | |
| 47 | +4 0 obj | |
| 48 | +<< | |
| 49 | + /Length 5 0 R | |
| 50 | + /Oops (Not a stream) | |
| 51 | +>> | |
| 52 | +endobj | |
| 53 | + | |
| 54 | +5 0 obj | |
| 55 | +44 | |
| 56 | +endobj | |
| 57 | + | |
| 58 | +%% Contents for page 1 | |
| 59 | +6 0 obj | |
| 60 | +<< | |
| 61 | + /Length 7 0 R | |
| 62 | + /Filter /LZWDecode | |
| 63 | +>> | |
| 64 | +stream | |
| 65 | +BT | |
| 66 | + /F1 24 Tf | |
| 67 | + 72 720 Td | |
| 68 | + (Encoding errors) Tj | |
| 69 | +ET | |
| 70 | +endstream | |
| 71 | +endobj | |
| 72 | + | |
| 73 | +7 0 obj | |
| 74 | +53 | |
| 75 | +endobj | |
| 76 | + | |
| 77 | +8 0 obj | |
| 78 | +<< | |
| 79 | + /BaseFont /Helvetica | |
| 80 | + /Encoding /WinAnsiEncoding | |
| 81 | + /Name /F1 | |
| 82 | + /Subtype /Type1 | |
| 83 | + /Type /Font | |
| 84 | +>> | |
| 85 | +endobj | |
| 86 | + | |
| 87 | +9 0 obj | |
| 88 | +[ | |
| 89 | ||
| 90 | + /Text | |
| 91 | +] | |
| 92 | +endobj | |
| 93 | + | |
| 94 | +xref | |
| 95 | +0 10 | |
| 96 | +0000000000 65535 f | |
| 97 | +0000000025 00000 n | |
| 98 | +0000000079 00000 n | |
| 99 | +0000000161 00000 n | |
| 100 | +0000000396 00000 n | |
| 101 | +0000000457 00000 n | |
| 102 | +0000000499 00000 n | |
| 103 | +0000000630 00000 n | |
| 104 | +0000000649 00000 n | |
| 105 | +0000000767 00000 n | |
| 106 | +trailer << | |
| 107 | + /Root 1 0 R | |
| 108 | + /Size 10 | |
| 109 | + /ID [<cbdd966f9b7b2bb31ad606c532d7cce5><e5f7cff7a542641606230aadd53106a4>] | |
| 110 | +>> | |
| 111 | +startxref | |
| 112 | +802 | |
| 113 | +%%EOF | ... | ... |
qpdf/qtest/qpdf/split-content-stream.out
0 → 100644
qpdf/qtest/qpdf/split-content-stream.pdf
0 → 100644
No preview for this file type