Commit f81152311e5737e5e0de9dd9462311f306c6921b
1 parent
1d88955f
Add QPDFObjectHandle::parseContentStream method
This method allows parsing of the PDF objects in a content stream or array of content streams.
Showing
17 changed files
with
494 additions
and
12 deletions
ChangeLog
| 1 | 2013-01-20 Jay Berkenbilt <ejb@ql.org> | 1 | 2013-01-20 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Added QPDFObjectHandle::parseContentStream, which parses the | ||
| 4 | + objects in a content stream and calls handlers in a callback | ||
| 5 | + class. The example pdf-parse-content illustrates it use. | ||
| 6 | + | ||
| 3 | * Added QPDF_Keyword and QPDF_InlineImage types along with | 7 | * Added QPDF_Keyword and QPDF_InlineImage types along with |
| 4 | appropriate wrapper methods in QPDFObjectHandle. These new object | 8 | appropriate wrapper methods in QPDFObjectHandle. These new object |
| 5 | types are to facilitate content stream parsing. | 9 | types are to facilitate content stream parsing. |
examples/build.mk
| @@ -4,7 +4,8 @@ BINS_examples = \ | @@ -4,7 +4,8 @@ BINS_examples = \ | ||
| 4 | pdf-npages \ | 4 | pdf-npages \ |
| 5 | pdf-double-page-size \ | 5 | pdf-double-page-size \ |
| 6 | pdf-invert-images \ | 6 | pdf-invert-images \ |
| 7 | - pdf-create | 7 | + pdf-create \ |
| 8 | + pdf-parse-content | ||
| 8 | CBINS_examples = pdf-linearize | 9 | CBINS_examples = pdf-linearize |
| 9 | 10 | ||
| 10 | TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) | 11 | TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) |
examples/pdf-parse-content.cc
0 → 100644
| 1 | +#include <iostream> | ||
| 2 | +#include <string.h> | ||
| 3 | +#include <stdlib.h> | ||
| 4 | + | ||
| 5 | +#include <qpdf/QPDF.hh> | ||
| 6 | +#include <qpdf/QUtil.hh> | ||
| 7 | + | ||
| 8 | +static char const* whoami = 0; | ||
| 9 | + | ||
| 10 | +void usage() | ||
| 11 | +{ | ||
| 12 | + std::cerr << "Usage: " << whoami << " filename page-number" << std::endl | ||
| 13 | + << "Prints a dump of the objects in the content streams" | ||
| 14 | + << " of the given page." << std::endl | ||
| 15 | + << "Pages are numbered from 1." << std::endl; | ||
| 16 | + exit(2); | ||
| 17 | +} | ||
| 18 | + | ||
| 19 | +class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks | ||
| 20 | +{ | ||
| 21 | + public: | ||
| 22 | + virtual ~ParserCallbacks() | ||
| 23 | + { | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + virtual void handleObject(QPDFObjectHandle); | ||
| 27 | + virtual void handleEOF(); | ||
| 28 | +}; | ||
| 29 | + | ||
| 30 | +void | ||
| 31 | +ParserCallbacks::handleObject(QPDFObjectHandle obj) | ||
| 32 | +{ | ||
| 33 | + if (obj.isInlineImage()) | ||
| 34 | + { | ||
| 35 | + std::string val = obj.getInlineImageValue(); | ||
| 36 | + std::cout << "inline image: "; | ||
| 37 | + char buf[3]; | ||
| 38 | + buf[2] = '\0'; | ||
| 39 | + for (size_t i = 0; i < val.length(); ++i) | ||
| 40 | + { | ||
| 41 | + sprintf(buf, "%02x", (unsigned char)(val[i])); | ||
| 42 | + std::cout << buf; | ||
| 43 | + } | ||
| 44 | + std::cout << std::endl; | ||
| 45 | + } | ||
| 46 | + else | ||
| 47 | + { | ||
| 48 | + std::cout << obj.unparse() << std::endl; | ||
| 49 | + } | ||
| 50 | +} | ||
| 51 | + | ||
| 52 | +void | ||
| 53 | +ParserCallbacks::handleEOF() | ||
| 54 | +{ | ||
| 55 | + std::cout << "-EOF-" << std::endl; | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | +int main(int argc, char* argv[]) | ||
| 59 | +{ | ||
| 60 | + whoami = QUtil::getWhoami(argv[0]); | ||
| 61 | + | ||
| 62 | + // For libtool's sake.... | ||
| 63 | + if (strncmp(whoami, "lt-", 3) == 0) | ||
| 64 | + { | ||
| 65 | + whoami += 3; | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + if (argc != 3) | ||
| 69 | + { | ||
| 70 | + usage(); | ||
| 71 | + } | ||
| 72 | + char const* filename = argv[1]; | ||
| 73 | + int pageno = atoi(argv[2]); | ||
| 74 | + | ||
| 75 | + try | ||
| 76 | + { | ||
| 77 | + QPDF pdf; | ||
| 78 | + pdf.processFile(filename); | ||
| 79 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | ||
| 80 | + if ((pageno < 1) || (pageno > (int)pages.size())) | ||
| 81 | + { | ||
| 82 | + usage(); | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + QPDFObjectHandle page = pages[pageno-1]; | ||
| 86 | + QPDFObjectHandle contents = page.getKey("/Contents"); | ||
| 87 | + ParserCallbacks cb; | ||
| 88 | + QPDFObjectHandle::parseContentStream(contents, &cb); | ||
| 89 | + } | ||
| 90 | + catch (std::exception& e) | ||
| 91 | + { | ||
| 92 | + std::cerr << whoami << ": " << e.what() << std::endl; | ||
| 93 | + exit(2); | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + return 0; | ||
| 97 | +} |
examples/qtest/parse-content.test
0 → 100644
| 1 | +#!/usr/bin/env perl | ||
| 2 | +require 5.008; | ||
| 3 | +BEGIN { $^W = 1; } | ||
| 4 | +use strict; | ||
| 5 | + | ||
| 6 | +chdir("parse-content"); | ||
| 7 | + | ||
| 8 | +require TestDriver; | ||
| 9 | + | ||
| 10 | +my $td = new TestDriver('pdf-parse-content'); | ||
| 11 | + | ||
| 12 | +$td->runtest("parse content", | ||
| 13 | + {$td->COMMAND => "pdf-parse-content input.pdf 1"}, | ||
| 14 | + {$td->FILE => "content.out", $td->EXIT_STATUS => 0}, | ||
| 15 | + $td->NORMALIZE_NEWLINES); | ||
| 16 | + | ||
| 17 | +$td->report(1); |
examples/qtest/parse-content/content.out
0 → 100644
examples/qtest/parse-content/input.pdf
0 → 100644
No preview for this file type
include/qpdf/QPDFObjectHandle.hh
| @@ -71,6 +71,21 @@ class QPDFObjectHandle | @@ -71,6 +71,21 @@ class QPDFObjectHandle | ||
| 71 | virtual void decryptString(std::string& val) = 0; | 71 | virtual void decryptString(std::string& val) = 0; |
| 72 | }; | 72 | }; |
| 73 | 73 | ||
| 74 | + // This class is used by parseContentStream. Callers must | ||
| 75 | + // instantiate a subclass of this with handlers defined to accept | ||
| 76 | + // QPDFObjectHandles that are parsed from the stream. | ||
| 77 | + class ParserCallbacks | ||
| 78 | + { | ||
| 79 | + public: | ||
| 80 | + QPDF_DLL | ||
| 81 | + virtual ~ParserCallbacks() | ||
| 82 | + { | ||
| 83 | + } | ||
| 84 | + virtual void handleObject(QPDFObjectHandle) = 0; | ||
| 85 | + virtual void handleEOF() = 0; | ||
| 86 | + }; | ||
| 87 | + | ||
| 88 | + | ||
| 74 | QPDF_DLL | 89 | QPDF_DLL |
| 75 | QPDFObjectHandle(); | 90 | QPDFObjectHandle(); |
| 76 | QPDF_DLL | 91 | QPDF_DLL |
| @@ -138,6 +153,11 @@ class QPDFObjectHandle | @@ -138,6 +153,11 @@ class QPDFObjectHandle | ||
| 138 | StringDecrypter* decrypter, | 153 | StringDecrypter* decrypter, |
| 139 | QPDF* context); | 154 | QPDF* context); |
| 140 | 155 | ||
| 156 | + // Helpers for parsing content streams | ||
| 157 | + QPDF_DLL | ||
| 158 | + static void parseContentStream(QPDFObjectHandle stream_or_array, | ||
| 159 | + ParserCallbacks* callbacks); | ||
| 160 | + | ||
| 141 | // Type-specific factories | 161 | // Type-specific factories |
| 142 | QPDF_DLL | 162 | QPDF_DLL |
| 143 | static QPDFObjectHandle newNull(); | 163 | static QPDFObjectHandle newNull(); |
| @@ -571,7 +591,10 @@ class QPDFObjectHandle | @@ -571,7 +591,10 @@ class QPDFObjectHandle | ||
| 571 | std::string const& object_description, | 591 | std::string const& object_description, |
| 572 | QPDFTokenizer& tokenizer, bool& empty, | 592 | QPDFTokenizer& tokenizer, bool& empty, |
| 573 | StringDecrypter* decrypter, QPDF* context, | 593 | StringDecrypter* decrypter, QPDF* context, |
| 574 | - bool in_array, bool in_dictionary); | 594 | + bool in_array, bool in_dictionary, |
| 595 | + bool content_stream); | ||
| 596 | + static void parseContentStream_internal( | ||
| 597 | + QPDFObjectHandle stream, ParserCallbacks* callbacks); | ||
| 575 | 598 | ||
| 576 | bool initialized; | 599 | bool initialized; |
| 577 | 600 |
include/qpdf/QPDFTokenizer.hh
| @@ -18,6 +18,8 @@ | @@ -18,6 +18,8 @@ | ||
| 18 | class QPDFTokenizer | 18 | class QPDFTokenizer |
| 19 | { | 19 | { |
| 20 | public: | 20 | public: |
| 21 | + // Token type tt_eof is only returned of allowEOF() is called on | ||
| 22 | + // the tokenizer. tt_eof was introduced in QPDF version 4.1. | ||
| 21 | enum token_type_e | 23 | enum token_type_e |
| 22 | { | 24 | { |
| 23 | tt_bad, | 25 | tt_bad, |
| @@ -34,6 +36,7 @@ class QPDFTokenizer | @@ -34,6 +36,7 @@ class QPDFTokenizer | ||
| 34 | tt_null, | 36 | tt_null, |
| 35 | tt_bool, | 37 | tt_bool, |
| 36 | tt_word, | 38 | tt_word, |
| 39 | + tt_eof, | ||
| 37 | }; | 40 | }; |
| 38 | 41 | ||
| 39 | class Token | 42 | class Token |
| @@ -97,6 +100,12 @@ class QPDFTokenizer | @@ -97,6 +100,12 @@ class QPDFTokenizer | ||
| 97 | QPDF_DLL | 100 | QPDF_DLL |
| 98 | void allowPoundAnywhereInName(); | 101 | void allowPoundAnywhereInName(); |
| 99 | 102 | ||
| 103 | + // If called, treat EOF as a separate token type instead of an | ||
| 104 | + // error. This was introduced in QPDF 4.1 to facilitate | ||
| 105 | + // tokenizing content streams. | ||
| 106 | + QPDF_DLL | ||
| 107 | + void allowEOF(); | ||
| 108 | + | ||
| 100 | // Mode of operation: | 109 | // Mode of operation: |
| 101 | 110 | ||
| 102 | // Keep presenting characters and calling getToken() until | 111 | // Keep presenting characters and calling getToken() until |
| @@ -140,6 +149,7 @@ class QPDFTokenizer | @@ -140,6 +149,7 @@ class QPDFTokenizer | ||
| 140 | st_literal, st_in_hexstring, st_token_ready } state; | 149 | st_literal, st_in_hexstring, st_token_ready } state; |
| 141 | 150 | ||
| 142 | bool pound_special_in_name; | 151 | bool pound_special_in_name; |
| 152 | + bool allow_eof; | ||
| 143 | 153 | ||
| 144 | // Current token accumulation | 154 | // Current token accumulation |
| 145 | token_type_e type; | 155 | token_type_e type; |
libqpdf/QPDFObjectHandle.cc
| @@ -680,6 +680,106 @@ QPDFObjectHandle::parse(std::string const& object_str, | @@ -680,6 +680,106 @@ QPDFObjectHandle::parse(std::string const& object_str, | ||
| 680 | return result; | 680 | return result; |
| 681 | } | 681 | } |
| 682 | 682 | ||
| 683 | +void | ||
| 684 | +QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, | ||
| 685 | + ParserCallbacks* callbacks) | ||
| 686 | +{ | ||
| 687 | + std::vector<QPDFObjectHandle> streams; | ||
| 688 | + if (stream_or_array.isArray()) | ||
| 689 | + { | ||
| 690 | + streams = stream_or_array.getArrayAsVector(); | ||
| 691 | + } | ||
| 692 | + else | ||
| 693 | + { | ||
| 694 | + streams.push_back(stream_or_array); | ||
| 695 | + } | ||
| 696 | + for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); | ||
| 697 | + iter != streams.end(); ++iter) | ||
| 698 | + { | ||
| 699 | + QPDFObjectHandle stream = *iter; | ||
| 700 | + if (! stream.isStream()) | ||
| 701 | + { | ||
| 702 | + throw std::logic_error( | ||
| 703 | + "QPDFObjectHandle: parseContentStream called on non-stream"); | ||
| 704 | + } | ||
| 705 | + parseContentStream_internal(stream, callbacks); | ||
| 706 | + } | ||
| 707 | + callbacks->handleEOF(); | ||
| 708 | +} | ||
| 709 | + | ||
| 710 | +void | ||
| 711 | +QPDFObjectHandle::parseContentStream_internal(QPDFObjectHandle stream, | ||
| 712 | + ParserCallbacks* callbacks) | ||
| 713 | +{ | ||
| 714 | + stream.assertStream(); | ||
| 715 | + PointerHolder<Buffer> stream_data = stream.getStreamData(); | ||
| 716 | + size_t length = stream_data->getSize(); | ||
| 717 | + std::string description = "content stream object " + | ||
| 718 | + QUtil::int_to_string(stream.getObjectID()) + " " + | ||
| 719 | + QUtil::int_to_string(stream.getGeneration()); | ||
| 720 | + PointerHolder<InputSource> input = | ||
| 721 | + new BufferInputSource(description, stream_data.getPointer()); | ||
| 722 | + QPDFTokenizer tokenizer; | ||
| 723 | + tokenizer.allowEOF(); | ||
| 724 | + bool empty = false; | ||
| 725 | + while ((size_t) input->tell() < length) | ||
| 726 | + { | ||
| 727 | + QPDFObjectHandle obj = | ||
| 728 | + parseInternal(input, "content", tokenizer, empty, | ||
| 729 | + 0, 0, false, false, true); | ||
| 730 | + if (! obj.isInitialized()) | ||
| 731 | + { | ||
| 732 | + // EOF | ||
| 733 | + break; | ||
| 734 | + } | ||
| 735 | + | ||
| 736 | + callbacks->handleObject(obj); | ||
| 737 | + if (obj.isKeyword() && (obj.getKeywordValue() == "ID")) | ||
| 738 | + { | ||
| 739 | + // Discard next character; it is the space after ID that | ||
| 740 | + // terminated the token. Read until end of inline image. | ||
| 741 | + char ch; | ||
| 742 | + input->read(&ch, 1); | ||
| 743 | + char buf[4]; | ||
| 744 | + memset(buf, '\0', sizeof(buf)); | ||
| 745 | + bool done = false; | ||
| 746 | + std::string inline_image; | ||
| 747 | + while (! done) | ||
| 748 | + { | ||
| 749 | + if (input->read(&ch, 1) == 0) | ||
| 750 | + { | ||
| 751 | + QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image"); | ||
| 752 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 753 | + "stream data", input->tell(), | ||
| 754 | + "EOF found while reading inline image"); | ||
| 755 | + } | ||
| 756 | + inline_image += ch; | ||
| 757 | + memmove(buf, buf + 1, sizeof(buf) - 1); | ||
| 758 | + buf[sizeof(buf) - 1] = ch; | ||
| 759 | + if (strchr(" \t\n\v\f\r", buf[0]) && | ||
| 760 | + (buf[1] == 'E') && | ||
| 761 | + (buf[2] == 'I') && | ||
| 762 | + strchr(" \t\n\v\f\r", buf[3])) | ||
| 763 | + { | ||
| 764 | + // We've found an EI operator. | ||
| 765 | + done = true; | ||
| 766 | + input->seek(-3, SEEK_CUR); | ||
| 767 | + for (int i = 0; i < 4; ++i) | ||
| 768 | + { | ||
| 769 | + if (inline_image.length() > 0) | ||
| 770 | + { | ||
| 771 | + inline_image.erase(inline_image.length() - 1); | ||
| 772 | + } | ||
| 773 | + } | ||
| 774 | + } | ||
| 775 | + } | ||
| 776 | + QTC::TC("qpdf", "QPDFObjectHandle inline image token"); | ||
| 777 | + callbacks->handleObject( | ||
| 778 | + QPDFObjectHandle::newInlineImage(inline_image)); | ||
| 779 | + } | ||
| 780 | + } | ||
| 781 | +} | ||
| 782 | + | ||
| 683 | QPDFObjectHandle | 783 | QPDFObjectHandle |
| 684 | QPDFObjectHandle::parse(PointerHolder<InputSource> input, | 784 | QPDFObjectHandle::parse(PointerHolder<InputSource> input, |
| 685 | std::string const& object_description, | 785 | std::string const& object_description, |
| @@ -687,7 +787,7 @@ QPDFObjectHandle::parse(PointerHolder<InputSource> input, | @@ -687,7 +787,7 @@ QPDFObjectHandle::parse(PointerHolder<InputSource> input, | ||
| 687 | StringDecrypter* decrypter, QPDF* context) | 787 | StringDecrypter* decrypter, QPDF* context) |
| 688 | { | 788 | { |
| 689 | return parseInternal(input, object_description, tokenizer, empty, | 789 | return parseInternal(input, object_description, tokenizer, empty, |
| 690 | - decrypter, context, false, false); | 790 | + decrypter, context, false, false, false); |
| 691 | } | 791 | } |
| 692 | 792 | ||
| 693 | QPDFObjectHandle | 793 | QPDFObjectHandle |
| @@ -695,7 +795,8 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | @@ -695,7 +795,8 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | ||
| 695 | std::string const& object_description, | 795 | std::string const& object_description, |
| 696 | QPDFTokenizer& tokenizer, bool& empty, | 796 | QPDFTokenizer& tokenizer, bool& empty, |
| 697 | StringDecrypter* decrypter, QPDF* context, | 797 | StringDecrypter* decrypter, QPDF* context, |
| 698 | - bool in_array, bool in_dictionary) | 798 | + bool in_array, bool in_dictionary, |
| 799 | + bool content_stream) | ||
| 699 | { | 800 | { |
| 700 | empty = false; | 801 | empty = false; |
| 701 | if (in_dictionary && in_array) | 802 | if (in_dictionary && in_array) |
| @@ -721,6 +822,21 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | @@ -721,6 +822,21 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | ||
| 721 | 822 | ||
| 722 | switch (token.getType()) | 823 | switch (token.getType()) |
| 723 | { | 824 | { |
| 825 | + case QPDFTokenizer::tt_eof: | ||
| 826 | + if (content_stream) | ||
| 827 | + { | ||
| 828 | + // Return uninitialized object to indicate EOF | ||
| 829 | + return object; | ||
| 830 | + } | ||
| 831 | + else | ||
| 832 | + { | ||
| 833 | + // When not in content stream mode, EOF is tt_bad and | ||
| 834 | + // throws an exception before we get here. | ||
| 835 | + throw std::logic_error( | ||
| 836 | + "EOF received while not in content stream mode"); | ||
| 837 | + } | ||
| 838 | + break; | ||
| 839 | + | ||
| 724 | case QPDFTokenizer::tt_brace_open: | 840 | case QPDFTokenizer::tt_brace_open: |
| 725 | case QPDFTokenizer::tt_brace_close: | 841 | case QPDFTokenizer::tt_brace_close: |
| 726 | // Don't know what to do with these for now | 842 | // Don't know what to do with these for now |
| @@ -764,13 +880,13 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | @@ -764,13 +880,13 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | ||
| 764 | case QPDFTokenizer::tt_array_open: | 880 | case QPDFTokenizer::tt_array_open: |
| 765 | object = parseInternal( | 881 | object = parseInternal( |
| 766 | input, object_description, tokenizer, empty, | 882 | input, object_description, tokenizer, empty, |
| 767 | - decrypter, context, true, false); | 883 | + decrypter, context, true, false, content_stream); |
| 768 | break; | 884 | break; |
| 769 | 885 | ||
| 770 | case QPDFTokenizer::tt_dict_open: | 886 | case QPDFTokenizer::tt_dict_open: |
| 771 | object = parseInternal( | 887 | object = parseInternal( |
| 772 | input, object_description, tokenizer, empty, | 888 | input, object_description, tokenizer, empty, |
| 773 | - decrypter, context, false, true); | 889 | + decrypter, context, false, true, content_stream); |
| 774 | break; | 890 | break; |
| 775 | 891 | ||
| 776 | case QPDFTokenizer::tt_bool: | 892 | case QPDFTokenizer::tt_bool: |
| @@ -826,6 +942,10 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | @@ -826,6 +942,10 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | ||
| 826 | input->seek(input->getLastOffset(), SEEK_SET); | 942 | input->seek(input->getLastOffset(), SEEK_SET); |
| 827 | empty = true; | 943 | empty = true; |
| 828 | } | 944 | } |
| 945 | + else if (content_stream) | ||
| 946 | + { | ||
| 947 | + object = QPDFObjectHandle::newKeyword(token.getValue()); | ||
| 948 | + } | ||
| 829 | else | 949 | else |
| 830 | { | 950 | { |
| 831 | throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | 951 | throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), |
libqpdf/QPDFTokenizer.cc
| @@ -22,7 +22,8 @@ static bool is_space(char ch) | @@ -22,7 +22,8 @@ static bool is_space(char ch) | ||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | QPDFTokenizer::QPDFTokenizer() : | 24 | QPDFTokenizer::QPDFTokenizer() : |
| 25 | - pound_special_in_name(true) | 25 | + pound_special_in_name(true), |
| 26 | + allow_eof(false) | ||
| 26 | { | 27 | { |
| 27 | reset(); | 28 | reset(); |
| 28 | } | 29 | } |
| @@ -35,6 +36,12 @@ QPDFTokenizer::allowPoundAnywhereInName() | @@ -35,6 +36,12 @@ QPDFTokenizer::allowPoundAnywhereInName() | ||
| 35 | } | 36 | } |
| 36 | 37 | ||
| 37 | void | 38 | void |
| 39 | +QPDFTokenizer::allowEOF() | ||
| 40 | +{ | ||
| 41 | + this->allow_eof = true; | ||
| 42 | +} | ||
| 43 | + | ||
| 44 | +void | ||
| 38 | QPDFTokenizer::reset() | 45 | QPDFTokenizer::reset() |
| 39 | { | 46 | { |
| 40 | state = st_top; | 47 | state = st_top; |
| @@ -441,9 +448,17 @@ QPDFTokenizer::presentEOF() | @@ -441,9 +448,17 @@ QPDFTokenizer::presentEOF() | ||
| 441 | } | 448 | } |
| 442 | else if (state != st_token_ready) | 449 | else if (state != st_token_ready) |
| 443 | { | 450 | { |
| 444 | - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token"); | ||
| 445 | - type = tt_bad; | ||
| 446 | - error_message = "EOF while reading token"; | 451 | + QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token", |
| 452 | + this->allow_eof ? 1 : 0); | ||
| 453 | + if (this->allow_eof) | ||
| 454 | + { | ||
| 455 | + type = tt_eof; | ||
| 456 | + } | ||
| 457 | + else | ||
| 458 | + { | ||
| 459 | + type = tt_bad; | ||
| 460 | + error_message = "EOF while reading token"; | ||
| 461 | + } | ||
| 447 | } | 462 | } |
| 448 | 463 | ||
| 449 | state = st_token_ready; | 464 | state = st_token_ready; |
qpdf/qpdf.testcov
| @@ -236,7 +236,7 @@ QPDFWriter copy use_aes 1 | @@ -236,7 +236,7 @@ QPDFWriter copy use_aes 1 | ||
| 236 | QPDFObjectHandle indirect without context 0 | 236 | QPDFObjectHandle indirect without context 0 |
| 237 | QPDFObjectHandle trailing data in parse 0 | 237 | QPDFObjectHandle trailing data in parse 0 |
| 238 | qpdf pages encryption password 0 | 238 | qpdf pages encryption password 0 |
| 239 | -QPDF_Tokenizer EOF reading token 0 | 239 | +QPDF_Tokenizer EOF reading token 1 |
| 240 | QPDF_Tokenizer EOF reading appendable token 0 | 240 | QPDF_Tokenizer EOF reading appendable token 0 |
| 241 | QPDFWriter extra header text no newline 0 | 241 | QPDFWriter extra header text no newline 0 |
| 242 | QPDFWriter extra header text add newline 0 | 242 | QPDFWriter extra header text add newline 0 |
| @@ -259,3 +259,5 @@ QPDFWriter remove Crypt 0 | @@ -259,3 +259,5 @@ QPDFWriter remove Crypt 0 | ||
| 259 | qpdf-c called qpdf_get_pdf_extension_level 0 | 259 | qpdf-c called qpdf_get_pdf_extension_level 0 |
| 260 | qpdf-c called qpdf_set_r5_encryption_parameters 0 | 260 | qpdf-c called qpdf_set_r5_encryption_parameters 0 |
| 261 | qpdf-c called qpdf_set_r6_encryption_parameters 0 | 261 | qpdf-c called qpdf_set_r6_encryption_parameters 0 |
| 262 | +QPDFObjectHandle EOF in inline image 0 | ||
| 263 | +QPDFObjectHandle inline image token 0 |
qpdf/qtest/qpdf.test
| @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", | @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", | ||
| 199 | show_ntests(); | 199 | show_ntests(); |
| 200 | # ---------- | 200 | # ---------- |
| 201 | $td->notify("--- Miscellaneous Tests ---"); | 201 | $td->notify("--- Miscellaneous Tests ---"); |
| 202 | -$n_tests += 57; | 202 | +$n_tests += 59; |
| 203 | 203 | ||
| 204 | $td->runtest("qpdf version", | 204 | $td->runtest("qpdf version", |
| 205 | {$td->COMMAND => "qpdf --version"}, | 205 | {$td->COMMAND => "qpdf --version"}, |
| @@ -468,6 +468,16 @@ $td->runtest("check file with leading junk", | @@ -468,6 +468,16 @@ $td->runtest("check file with leading junk", | ||
| 468 | {$td->COMMAND => "qpdf --check leading-junk.pdf"}, | 468 | {$td->COMMAND => "qpdf --check leading-junk.pdf"}, |
| 469 | {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, | 469 | {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, |
| 470 | $td->NORMALIZE_NEWLINES); | 470 | $td->NORMALIZE_NEWLINES); |
| 471 | +$td->runtest("EOF inside inline image", | ||
| 472 | + {$td->COMMAND => "test_driver 37 eof-in-inline-image.pdf"}, | ||
| 473 | + {$td->FILE => "eof-in-inline-image.out", | ||
| 474 | + $td->EXIT_STATUS => 2}, | ||
| 475 | + $td->NORMALIZE_NEWLINES); | ||
| 476 | +$td->runtest("tokenize content streams", | ||
| 477 | + {$td->COMMAND => "test_driver 37 tokenize-content-streams.pdf"}, | ||
| 478 | + {$td->FILE => "tokenize-content-streams.out", | ||
| 479 | + $td->EXIT_STATUS => 0}, | ||
| 480 | + $td->NORMALIZE_NEWLINES); | ||
| 471 | 481 | ||
| 472 | show_ntests(); | 482 | show_ntests(); |
| 473 | # ---------- | 483 | # ---------- |
qpdf/qtest/qpdf/eof-in-inline-image.out
0 → 100644
qpdf/qtest/qpdf/eof-in-inline-image.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/tokenize-content-streams.out
0 → 100644
| 1 | +BT | ||
| 2 | +/F1 | ||
| 3 | +24 | ||
| 4 | +Tf | ||
| 5 | +72 | ||
| 6 | +720 | ||
| 7 | +Td | ||
| 8 | +(Potato) | ||
| 9 | +Tj | ||
| 10 | +ET | ||
| 11 | +-EOF- | ||
| 12 | +0.1 | ||
| 13 | +0 | ||
| 14 | +0 | ||
| 15 | +0.1 | ||
| 16 | +0 | ||
| 17 | +0 | ||
| 18 | +cm | ||
| 19 | +q | ||
| 20 | +0 | ||
| 21 | +1.1999 | ||
| 22 | +-1.1999 | ||
| 23 | +0 | ||
| 24 | +121.19 | ||
| 25 | +150.009 | ||
| 26 | +cm | ||
| 27 | +BI | ||
| 28 | +/CS | ||
| 29 | +/G | ||
| 30 | +/W | ||
| 31 | +1 | ||
| 32 | +/H | ||
| 33 | +1 | ||
| 34 | +/BPC | ||
| 35 | +8 | ||
| 36 | +/F | ||
| 37 | +/Fl | ||
| 38 | +/DP | ||
| 39 | +<< /Columns 1 /Predictor 15 >> | ||
| 40 | +ID | ||
| 41 | +inline image: 789c63fc0f0001030101 | ||
| 42 | +EI | ||
| 43 | +Q | ||
| 44 | +q | ||
| 45 | +0 | ||
| 46 | +35.997 | ||
| 47 | +-128.389 | ||
| 48 | +0 | ||
| 49 | +431.964 | ||
| 50 | +7269.02 | ||
| 51 | +cm | ||
| 52 | +BI | ||
| 53 | +/CS | ||
| 54 | +/G | ||
| 55 | +/W | ||
| 56 | +30 | ||
| 57 | +/H | ||
| 58 | +107 | ||
| 59 | +/BPC | ||
| 60 | +8 | ||
| 61 | +/F | ||
| 62 | +/Fl | ||
| 63 | +/DP | ||
| 64 | +<< /Columns 30 /Predictor 15 >> | ||
| 65 | +ID | ||
| 66 | +inline image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a | ||
| 67 | +EI | ||
| 68 | +Q | ||
| 69 | +q | ||
| 70 | +0 | ||
| 71 | +38.3968 | ||
| 72 | +-93.5922 | ||
| 73 | +0 | ||
| 74 | +431.964 | ||
| 75 | +7567.79 | ||
| 76 | +cm | ||
| 77 | +BI | ||
| 78 | +/CS | ||
| 79 | +/G | ||
| 80 | +/W | ||
| 81 | +32 | ||
| 82 | +/H | ||
| 83 | +78 | ||
| 84 | +/BPC | ||
| 85 | +8 | ||
| 86 | +/F | ||
| 87 | +/Fl | ||
| 88 | +/DP | ||
| 89 | +<< /Columns 32 /Predictor 15 >> | ||
| 90 | +ID | ||
| 91 | +inline image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c13 | ||
| 92 | +EI | ||
| 93 | +Q | ||
| 94 | +-EOF- | ||
| 95 | +test 37 done |
qpdf/qtest/qpdf/tokenize-content-streams.pdf
0 → 100644
No preview for this file type
qpdf/test_driver.cc
| @@ -58,6 +58,45 @@ class Provider: public QPDFObjectHandle::StreamDataProvider | @@ -58,6 +58,45 @@ class Provider: public QPDFObjectHandle::StreamDataProvider | ||
| 58 | bool bad_length; | 58 | bool bad_length; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | +class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks | ||
| 62 | +{ | ||
| 63 | + public: | ||
| 64 | + virtual ~ParserCallbacks() | ||
| 65 | + { | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + virtual void handleObject(QPDFObjectHandle); | ||
| 69 | + virtual void handleEOF(); | ||
| 70 | +}; | ||
| 71 | + | ||
| 72 | +void | ||
| 73 | +ParserCallbacks::handleObject(QPDFObjectHandle obj) | ||
| 74 | +{ | ||
| 75 | + if (obj.isInlineImage()) | ||
| 76 | + { | ||
| 77 | + std::string val = obj.getInlineImageValue(); | ||
| 78 | + std::cout << "inline image: "; | ||
| 79 | + char buf[3]; | ||
| 80 | + buf[2] = '\0'; | ||
| 81 | + for (size_t i = 0; i < val.length(); ++i) | ||
| 82 | + { | ||
| 83 | + sprintf(buf, "%02x", (unsigned char)(val[i])); | ||
| 84 | + std::cout << buf; | ||
| 85 | + } | ||
| 86 | + std::cout << std::endl; | ||
| 87 | + } | ||
| 88 | + else | ||
| 89 | + { | ||
| 90 | + std::cout << obj.unparse() << std::endl; | ||
| 91 | + } | ||
| 92 | +} | ||
| 93 | + | ||
| 94 | +void | ||
| 95 | +ParserCallbacks::handleEOF() | ||
| 96 | +{ | ||
| 97 | + std::cout << "-EOF-" << std::endl; | ||
| 98 | +} | ||
| 99 | + | ||
| 61 | static std::string getPageContents(QPDFObjectHandle page) | 100 | static std::string getPageContents(QPDFObjectHandle page) |
| 62 | { | 101 | { |
| 63 | PointerHolder<Buffer> b1 = | 102 | PointerHolder<Buffer> b1 = |
| @@ -1245,6 +1284,19 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -1245,6 +1284,19 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 1245 | } | 1284 | } |
| 1246 | } | 1285 | } |
| 1247 | } | 1286 | } |
| 1287 | + else if (n == 37) | ||
| 1288 | + { | ||
| 1289 | + // Parse content streams of all pages | ||
| 1290 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | ||
| 1291 | + for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin(); | ||
| 1292 | + iter != pages.end(); ++iter) | ||
| 1293 | + { | ||
| 1294 | + QPDFObjectHandle page = *iter; | ||
| 1295 | + QPDFObjectHandle contents = page.getKey("/Contents"); | ||
| 1296 | + ParserCallbacks cb; | ||
| 1297 | + QPDFObjectHandle::parseContentStream(contents, &cb); | ||
| 1298 | + } | ||
| 1299 | + } | ||
| 1248 | else | 1300 | else |
| 1249 | { | 1301 | { |
| 1250 | throw std::runtime_error(std::string("invalid test ") + | 1302 | throw std::runtime_error(std::string("invalid test ") + |