Commit f81152311e5737e5e0de9dd9462311f306c6921b
1 parent
1d88955f
Add QPDFObjectHandle::parseContentStream method
This method allows parsing of the PDF objects in a content stream or array of content streams.
Showing
17 changed files
with
494 additions
and
12 deletions
ChangeLog
| 1 | 1 | 2013-01-20 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Added QPDFObjectHandle::parseContentStream, which parses the | |
| 4 | + objects in a content stream and calls handlers in a callback | |
| 5 | + class. The example pdf-parse-content illustrates it use. | |
| 6 | + | |
| 3 | 7 | * Added QPDF_Keyword and QPDF_InlineImage types along with |
| 4 | 8 | appropriate wrapper methods in QPDFObjectHandle. These new object |
| 5 | 9 | types are to facilitate content stream parsing. | ... | ... |
examples/build.mk
| ... | ... | @@ -4,7 +4,8 @@ BINS_examples = \ |
| 4 | 4 | pdf-npages \ |
| 5 | 5 | pdf-double-page-size \ |
| 6 | 6 | pdf-invert-images \ |
| 7 | - pdf-create | |
| 7 | + pdf-create \ | |
| 8 | + pdf-parse-content | |
| 8 | 9 | CBINS_examples = pdf-linearize |
| 9 | 10 | |
| 10 | 11 | TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) | ... | ... |
examples/pdf-parse-content.cc
0 → 100644
| 1 | +#include <iostream> | |
| 2 | +#include <string.h> | |
| 3 | +#include <stdlib.h> | |
| 4 | + | |
| 5 | +#include <qpdf/QPDF.hh> | |
| 6 | +#include <qpdf/QUtil.hh> | |
| 7 | + | |
| 8 | +static char const* whoami = 0; | |
| 9 | + | |
| 10 | +void usage() | |
| 11 | +{ | |
| 12 | + std::cerr << "Usage: " << whoami << " filename page-number" << std::endl | |
| 13 | + << "Prints a dump of the objects in the content streams" | |
| 14 | + << " of the given page." << std::endl | |
| 15 | + << "Pages are numbered from 1." << std::endl; | |
| 16 | + exit(2); | |
| 17 | +} | |
| 18 | + | |
| 19 | +class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks | |
| 20 | +{ | |
| 21 | + public: | |
| 22 | + virtual ~ParserCallbacks() | |
| 23 | + { | |
| 24 | + } | |
| 25 | + | |
| 26 | + virtual void handleObject(QPDFObjectHandle); | |
| 27 | + virtual void handleEOF(); | |
| 28 | +}; | |
| 29 | + | |
| 30 | +void | |
| 31 | +ParserCallbacks::handleObject(QPDFObjectHandle obj) | |
| 32 | +{ | |
| 33 | + if (obj.isInlineImage()) | |
| 34 | + { | |
| 35 | + std::string val = obj.getInlineImageValue(); | |
| 36 | + std::cout << "inline image: "; | |
| 37 | + char buf[3]; | |
| 38 | + buf[2] = '\0'; | |
| 39 | + for (size_t i = 0; i < val.length(); ++i) | |
| 40 | + { | |
| 41 | + sprintf(buf, "%02x", (unsigned char)(val[i])); | |
| 42 | + std::cout << buf; | |
| 43 | + } | |
| 44 | + std::cout << std::endl; | |
| 45 | + } | |
| 46 | + else | |
| 47 | + { | |
| 48 | + std::cout << obj.unparse() << std::endl; | |
| 49 | + } | |
| 50 | +} | |
| 51 | + | |
| 52 | +void | |
| 53 | +ParserCallbacks::handleEOF() | |
| 54 | +{ | |
| 55 | + std::cout << "-EOF-" << std::endl; | |
| 56 | +} | |
| 57 | + | |
| 58 | +int main(int argc, char* argv[]) | |
| 59 | +{ | |
| 60 | + whoami = QUtil::getWhoami(argv[0]); | |
| 61 | + | |
| 62 | + // For libtool's sake.... | |
| 63 | + if (strncmp(whoami, "lt-", 3) == 0) | |
| 64 | + { | |
| 65 | + whoami += 3; | |
| 66 | + } | |
| 67 | + | |
| 68 | + if (argc != 3) | |
| 69 | + { | |
| 70 | + usage(); | |
| 71 | + } | |
| 72 | + char const* filename = argv[1]; | |
| 73 | + int pageno = atoi(argv[2]); | |
| 74 | + | |
| 75 | + try | |
| 76 | + { | |
| 77 | + QPDF pdf; | |
| 78 | + pdf.processFile(filename); | |
| 79 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | |
| 80 | + if ((pageno < 1) || (pageno > (int)pages.size())) | |
| 81 | + { | |
| 82 | + usage(); | |
| 83 | + } | |
| 84 | + | |
| 85 | + QPDFObjectHandle page = pages[pageno-1]; | |
| 86 | + QPDFObjectHandle contents = page.getKey("/Contents"); | |
| 87 | + ParserCallbacks cb; | |
| 88 | + QPDFObjectHandle::parseContentStream(contents, &cb); | |
| 89 | + } | |
| 90 | + catch (std::exception& e) | |
| 91 | + { | |
| 92 | + std::cerr << whoami << ": " << e.what() << std::endl; | |
| 93 | + exit(2); | |
| 94 | + } | |
| 95 | + | |
| 96 | + return 0; | |
| 97 | +} | ... | ... |
examples/qtest/parse-content.test
0 → 100644
| 1 | +#!/usr/bin/env perl | |
| 2 | +require 5.008; | |
| 3 | +BEGIN { $^W = 1; } | |
| 4 | +use strict; | |
| 5 | + | |
| 6 | +chdir("parse-content"); | |
| 7 | + | |
| 8 | +require TestDriver; | |
| 9 | + | |
| 10 | +my $td = new TestDriver('pdf-parse-content'); | |
| 11 | + | |
| 12 | +$td->runtest("parse content", | |
| 13 | + {$td->COMMAND => "pdf-parse-content input.pdf 1"}, | |
| 14 | + {$td->FILE => "content.out", $td->EXIT_STATUS => 0}, | |
| 15 | + $td->NORMALIZE_NEWLINES); | |
| 16 | + | |
| 17 | +$td->report(1); | ... | ... |
examples/qtest/parse-content/content.out
0 → 100644
examples/qtest/parse-content/input.pdf
0 → 100644
No preview for this file type
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -71,6 +71,21 @@ class QPDFObjectHandle |
| 71 | 71 | virtual void decryptString(std::string& val) = 0; |
| 72 | 72 | }; |
| 73 | 73 | |
| 74 | + // This class is used by parseContentStream. Callers must | |
| 75 | + // instantiate a subclass of this with handlers defined to accept | |
| 76 | + // QPDFObjectHandles that are parsed from the stream. | |
| 77 | + class ParserCallbacks | |
| 78 | + { | |
| 79 | + public: | |
| 80 | + QPDF_DLL | |
| 81 | + virtual ~ParserCallbacks() | |
| 82 | + { | |
| 83 | + } | |
| 84 | + virtual void handleObject(QPDFObjectHandle) = 0; | |
| 85 | + virtual void handleEOF() = 0; | |
| 86 | + }; | |
| 87 | + | |
| 88 | + | |
| 74 | 89 | QPDF_DLL |
| 75 | 90 | QPDFObjectHandle(); |
| 76 | 91 | QPDF_DLL |
| ... | ... | @@ -138,6 +153,11 @@ class QPDFObjectHandle |
| 138 | 153 | StringDecrypter* decrypter, |
| 139 | 154 | QPDF* context); |
| 140 | 155 | |
| 156 | + // Helpers for parsing content streams | |
| 157 | + QPDF_DLL | |
| 158 | + static void parseContentStream(QPDFObjectHandle stream_or_array, | |
| 159 | + ParserCallbacks* callbacks); | |
| 160 | + | |
| 141 | 161 | // Type-specific factories |
| 142 | 162 | QPDF_DLL |
| 143 | 163 | static QPDFObjectHandle newNull(); |
| ... | ... | @@ -571,7 +591,10 @@ class QPDFObjectHandle |
| 571 | 591 | std::string const& object_description, |
| 572 | 592 | QPDFTokenizer& tokenizer, bool& empty, |
| 573 | 593 | StringDecrypter* decrypter, QPDF* context, |
| 574 | - bool in_array, bool in_dictionary); | |
| 594 | + bool in_array, bool in_dictionary, | |
| 595 | + bool content_stream); | |
| 596 | + static void parseContentStream_internal( | |
| 597 | + QPDFObjectHandle stream, ParserCallbacks* callbacks); | |
| 575 | 598 | |
| 576 | 599 | bool initialized; |
| 577 | 600 | ... | ... |
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -18,6 +18,8 @@ |
| 18 | 18 | class QPDFTokenizer |
| 19 | 19 | { |
| 20 | 20 | public: |
| 21 | + // Token type tt_eof is only returned of allowEOF() is called on | |
| 22 | + // the tokenizer. tt_eof was introduced in QPDF version 4.1. | |
| 21 | 23 | enum token_type_e |
| 22 | 24 | { |
| 23 | 25 | tt_bad, |
| ... | ... | @@ -34,6 +36,7 @@ class QPDFTokenizer |
| 34 | 36 | tt_null, |
| 35 | 37 | tt_bool, |
| 36 | 38 | tt_word, |
| 39 | + tt_eof, | |
| 37 | 40 | }; |
| 38 | 41 | |
| 39 | 42 | class Token |
| ... | ... | @@ -97,6 +100,12 @@ class QPDFTokenizer |
| 97 | 100 | QPDF_DLL |
| 98 | 101 | void allowPoundAnywhereInName(); |
| 99 | 102 | |
| 103 | + // If called, treat EOF as a separate token type instead of an | |
| 104 | + // error. This was introduced in QPDF 4.1 to facilitate | |
| 105 | + // tokenizing content streams. | |
| 106 | + QPDF_DLL | |
| 107 | + void allowEOF(); | |
| 108 | + | |
| 100 | 109 | // Mode of operation: |
| 101 | 110 | |
| 102 | 111 | // Keep presenting characters and calling getToken() until |
| ... | ... | @@ -140,6 +149,7 @@ class QPDFTokenizer |
| 140 | 149 | st_literal, st_in_hexstring, st_token_ready } state; |
| 141 | 150 | |
| 142 | 151 | bool pound_special_in_name; |
| 152 | + bool allow_eof; | |
| 143 | 153 | |
| 144 | 154 | // Current token accumulation |
| 145 | 155 | token_type_e type; | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -680,6 +680,106 @@ QPDFObjectHandle::parse(std::string const& object_str, |
| 680 | 680 | return result; |
| 681 | 681 | } |
| 682 | 682 | |
| 683 | +void | |
| 684 | +QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, | |
| 685 | + ParserCallbacks* callbacks) | |
| 686 | +{ | |
| 687 | + std::vector<QPDFObjectHandle> streams; | |
| 688 | + if (stream_or_array.isArray()) | |
| 689 | + { | |
| 690 | + streams = stream_or_array.getArrayAsVector(); | |
| 691 | + } | |
| 692 | + else | |
| 693 | + { | |
| 694 | + streams.push_back(stream_or_array); | |
| 695 | + } | |
| 696 | + for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); | |
| 697 | + iter != streams.end(); ++iter) | |
| 698 | + { | |
| 699 | + QPDFObjectHandle stream = *iter; | |
| 700 | + if (! stream.isStream()) | |
| 701 | + { | |
| 702 | + throw std::logic_error( | |
| 703 | + "QPDFObjectHandle: parseContentStream called on non-stream"); | |
| 704 | + } | |
| 705 | + parseContentStream_internal(stream, callbacks); | |
| 706 | + } | |
| 707 | + callbacks->handleEOF(); | |
| 708 | +} | |
| 709 | + | |
| 710 | +void | |
| 711 | +QPDFObjectHandle::parseContentStream_internal(QPDFObjectHandle stream, | |
| 712 | + ParserCallbacks* callbacks) | |
| 713 | +{ | |
| 714 | + stream.assertStream(); | |
| 715 | + PointerHolder<Buffer> stream_data = stream.getStreamData(); | |
| 716 | + size_t length = stream_data->getSize(); | |
| 717 | + std::string description = "content stream object " + | |
| 718 | + QUtil::int_to_string(stream.getObjectID()) + " " + | |
| 719 | + QUtil::int_to_string(stream.getGeneration()); | |
| 720 | + PointerHolder<InputSource> input = | |
| 721 | + new BufferInputSource(description, stream_data.getPointer()); | |
| 722 | + QPDFTokenizer tokenizer; | |
| 723 | + tokenizer.allowEOF(); | |
| 724 | + bool empty = false; | |
| 725 | + while ((size_t) input->tell() < length) | |
| 726 | + { | |
| 727 | + QPDFObjectHandle obj = | |
| 728 | + parseInternal(input, "content", tokenizer, empty, | |
| 729 | + 0, 0, false, false, true); | |
| 730 | + if (! obj.isInitialized()) | |
| 731 | + { | |
| 732 | + // EOF | |
| 733 | + break; | |
| 734 | + } | |
| 735 | + | |
| 736 | + callbacks->handleObject(obj); | |
| 737 | + if (obj.isKeyword() && (obj.getKeywordValue() == "ID")) | |
| 738 | + { | |
| 739 | + // Discard next character; it is the space after ID that | |
| 740 | + // terminated the token. Read until end of inline image. | |
| 741 | + char ch; | |
| 742 | + input->read(&ch, 1); | |
| 743 | + char buf[4]; | |
| 744 | + memset(buf, '\0', sizeof(buf)); | |
| 745 | + bool done = false; | |
| 746 | + std::string inline_image; | |
| 747 | + while (! done) | |
| 748 | + { | |
| 749 | + if (input->read(&ch, 1) == 0) | |
| 750 | + { | |
| 751 | + QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image"); | |
| 752 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 753 | + "stream data", input->tell(), | |
| 754 | + "EOF found while reading inline image"); | |
| 755 | + } | |
| 756 | + inline_image += ch; | |
| 757 | + memmove(buf, buf + 1, sizeof(buf) - 1); | |
| 758 | + buf[sizeof(buf) - 1] = ch; | |
| 759 | + if (strchr(" \t\n\v\f\r", buf[0]) && | |
| 760 | + (buf[1] == 'E') && | |
| 761 | + (buf[2] == 'I') && | |
| 762 | + strchr(" \t\n\v\f\r", buf[3])) | |
| 763 | + { | |
| 764 | + // We've found an EI operator. | |
| 765 | + done = true; | |
| 766 | + input->seek(-3, SEEK_CUR); | |
| 767 | + for (int i = 0; i < 4; ++i) | |
| 768 | + { | |
| 769 | + if (inline_image.length() > 0) | |
| 770 | + { | |
| 771 | + inline_image.erase(inline_image.length() - 1); | |
| 772 | + } | |
| 773 | + } | |
| 774 | + } | |
| 775 | + } | |
| 776 | + QTC::TC("qpdf", "QPDFObjectHandle inline image token"); | |
| 777 | + callbacks->handleObject( | |
| 778 | + QPDFObjectHandle::newInlineImage(inline_image)); | |
| 779 | + } | |
| 780 | + } | |
| 781 | +} | |
| 782 | + | |
| 683 | 783 | QPDFObjectHandle |
| 684 | 784 | QPDFObjectHandle::parse(PointerHolder<InputSource> input, |
| 685 | 785 | std::string const& object_description, |
| ... | ... | @@ -687,7 +787,7 @@ QPDFObjectHandle::parse(PointerHolder<InputSource> input, |
| 687 | 787 | StringDecrypter* decrypter, QPDF* context) |
| 688 | 788 | { |
| 689 | 789 | return parseInternal(input, object_description, tokenizer, empty, |
| 690 | - decrypter, context, false, false); | |
| 790 | + decrypter, context, false, false, false); | |
| 691 | 791 | } |
| 692 | 792 | |
| 693 | 793 | QPDFObjectHandle |
| ... | ... | @@ -695,7 +795,8 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, |
| 695 | 795 | std::string const& object_description, |
| 696 | 796 | QPDFTokenizer& tokenizer, bool& empty, |
| 697 | 797 | StringDecrypter* decrypter, QPDF* context, |
| 698 | - bool in_array, bool in_dictionary) | |
| 798 | + bool in_array, bool in_dictionary, | |
| 799 | + bool content_stream) | |
| 699 | 800 | { |
| 700 | 801 | empty = false; |
| 701 | 802 | if (in_dictionary && in_array) |
| ... | ... | @@ -721,6 +822,21 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, |
| 721 | 822 | |
| 722 | 823 | switch (token.getType()) |
| 723 | 824 | { |
| 825 | + case QPDFTokenizer::tt_eof: | |
| 826 | + if (content_stream) | |
| 827 | + { | |
| 828 | + // Return uninitialized object to indicate EOF | |
| 829 | + return object; | |
| 830 | + } | |
| 831 | + else | |
| 832 | + { | |
| 833 | + // When not in content stream mode, EOF is tt_bad and | |
| 834 | + // throws an exception before we get here. | |
| 835 | + throw std::logic_error( | |
| 836 | + "EOF received while not in content stream mode"); | |
| 837 | + } | |
| 838 | + break; | |
| 839 | + | |
| 724 | 840 | case QPDFTokenizer::tt_brace_open: |
| 725 | 841 | case QPDFTokenizer::tt_brace_close: |
| 726 | 842 | // Don't know what to do with these for now |
| ... | ... | @@ -764,13 +880,13 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, |
| 764 | 880 | case QPDFTokenizer::tt_array_open: |
| 765 | 881 | object = parseInternal( |
| 766 | 882 | input, object_description, tokenizer, empty, |
| 767 | - decrypter, context, true, false); | |
| 883 | + decrypter, context, true, false, content_stream); | |
| 768 | 884 | break; |
| 769 | 885 | |
| 770 | 886 | case QPDFTokenizer::tt_dict_open: |
| 771 | 887 | object = parseInternal( |
| 772 | 888 | input, object_description, tokenizer, empty, |
| 773 | - decrypter, context, false, true); | |
| 889 | + decrypter, context, false, true, content_stream); | |
| 774 | 890 | break; |
| 775 | 891 | |
| 776 | 892 | case QPDFTokenizer::tt_bool: |
| ... | ... | @@ -826,6 +942,10 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, |
| 826 | 942 | input->seek(input->getLastOffset(), SEEK_SET); |
| 827 | 943 | empty = true; |
| 828 | 944 | } |
| 945 | + else if (content_stream) | |
| 946 | + { | |
| 947 | + object = QPDFObjectHandle::newKeyword(token.getValue()); | |
| 948 | + } | |
| 829 | 949 | else |
| 830 | 950 | { |
| 831 | 951 | throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -22,7 +22,8 @@ static bool is_space(char ch) |
| 22 | 22 | } |
| 23 | 23 | |
| 24 | 24 | QPDFTokenizer::QPDFTokenizer() : |
| 25 | - pound_special_in_name(true) | |
| 25 | + pound_special_in_name(true), | |
| 26 | + allow_eof(false) | |
| 26 | 27 | { |
| 27 | 28 | reset(); |
| 28 | 29 | } |
| ... | ... | @@ -35,6 +36,12 @@ QPDFTokenizer::allowPoundAnywhereInName() |
| 35 | 36 | } |
| 36 | 37 | |
| 37 | 38 | void |
| 39 | +QPDFTokenizer::allowEOF() | |
| 40 | +{ | |
| 41 | + this->allow_eof = true; | |
| 42 | +} | |
| 43 | + | |
| 44 | +void | |
| 38 | 45 | QPDFTokenizer::reset() |
| 39 | 46 | { |
| 40 | 47 | state = st_top; |
| ... | ... | @@ -441,9 +448,17 @@ QPDFTokenizer::presentEOF() |
| 441 | 448 | } |
| 442 | 449 | else if (state != st_token_ready) |
| 443 | 450 | { |
| 444 | - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token"); | |
| 445 | - type = tt_bad; | |
| 446 | - error_message = "EOF while reading token"; | |
| 451 | + QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token", | |
| 452 | + this->allow_eof ? 1 : 0); | |
| 453 | + if (this->allow_eof) | |
| 454 | + { | |
| 455 | + type = tt_eof; | |
| 456 | + } | |
| 457 | + else | |
| 458 | + { | |
| 459 | + type = tt_bad; | |
| 460 | + error_message = "EOF while reading token"; | |
| 461 | + } | |
| 447 | 462 | } |
| 448 | 463 | |
| 449 | 464 | state = st_token_ready; | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -236,7 +236,7 @@ QPDFWriter copy use_aes 1 |
| 236 | 236 | QPDFObjectHandle indirect without context 0 |
| 237 | 237 | QPDFObjectHandle trailing data in parse 0 |
| 238 | 238 | qpdf pages encryption password 0 |
| 239 | -QPDF_Tokenizer EOF reading token 0 | |
| 239 | +QPDF_Tokenizer EOF reading token 1 | |
| 240 | 240 | QPDF_Tokenizer EOF reading appendable token 0 |
| 241 | 241 | QPDFWriter extra header text no newline 0 |
| 242 | 242 | QPDFWriter extra header text add newline 0 |
| ... | ... | @@ -259,3 +259,5 @@ QPDFWriter remove Crypt 0 |
| 259 | 259 | qpdf-c called qpdf_get_pdf_extension_level 0 |
| 260 | 260 | qpdf-c called qpdf_set_r5_encryption_parameters 0 |
| 261 | 261 | qpdf-c called qpdf_set_r6_encryption_parameters 0 |
| 262 | +QPDFObjectHandle EOF in inline image 0 | |
| 263 | +QPDFObjectHandle inline image token 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", |
| 199 | 199 | show_ntests(); |
| 200 | 200 | # ---------- |
| 201 | 201 | $td->notify("--- Miscellaneous Tests ---"); |
| 202 | -$n_tests += 57; | |
| 202 | +$n_tests += 59; | |
| 203 | 203 | |
| 204 | 204 | $td->runtest("qpdf version", |
| 205 | 205 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -468,6 +468,16 @@ $td->runtest("check file with leading junk", |
| 468 | 468 | {$td->COMMAND => "qpdf --check leading-junk.pdf"}, |
| 469 | 469 | {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, |
| 470 | 470 | $td->NORMALIZE_NEWLINES); |
| 471 | +$td->runtest("EOF inside inline image", | |
| 472 | + {$td->COMMAND => "test_driver 37 eof-in-inline-image.pdf"}, | |
| 473 | + {$td->FILE => "eof-in-inline-image.out", | |
| 474 | + $td->EXIT_STATUS => 2}, | |
| 475 | + $td->NORMALIZE_NEWLINES); | |
| 476 | +$td->runtest("tokenize content streams", | |
| 477 | + {$td->COMMAND => "test_driver 37 tokenize-content-streams.pdf"}, | |
| 478 | + {$td->FILE => "tokenize-content-streams.out", | |
| 479 | + $td->EXIT_STATUS => 0}, | |
| 480 | + $td->NORMALIZE_NEWLINES); | |
| 471 | 481 | |
| 472 | 482 | show_ntests(); |
| 473 | 483 | # ---------- | ... | ... |
qpdf/qtest/qpdf/eof-in-inline-image.out
0 → 100644
| 1 | +BT | |
| 2 | +/F1 | |
| 3 | +24 | |
| 4 | +Tf | |
| 5 | +72 | |
| 6 | +720 | |
| 7 | +Td | |
| 8 | +(Potato) | |
| 9 | +Tj | |
| 10 | +ET | |
| 11 | +BI | |
| 12 | +/CS | |
| 13 | +/G | |
| 14 | +/W | |
| 15 | +1 | |
| 16 | +/H | |
| 17 | +1 | |
| 18 | +/BPC | |
| 19 | +8 | |
| 20 | +/F | |
| 21 | +/Fl | |
| 22 | +/DP | |
| 23 | +<< /Columns 1 /Predictor 15 >> | |
| 24 | +ID | |
| 25 | +content stream object 4 0 (stream data, file position 139): EOF found while reading inline image | ... | ... |
qpdf/qtest/qpdf/eof-in-inline-image.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/tokenize-content-streams.out
0 → 100644
| 1 | +BT | |
| 2 | +/F1 | |
| 3 | +24 | |
| 4 | +Tf | |
| 5 | +72 | |
| 6 | +720 | |
| 7 | +Td | |
| 8 | +(Potato) | |
| 9 | +Tj | |
| 10 | +ET | |
| 11 | +-EOF- | |
| 12 | +0.1 | |
| 13 | +0 | |
| 14 | +0 | |
| 15 | +0.1 | |
| 16 | +0 | |
| 17 | +0 | |
| 18 | +cm | |
| 19 | +q | |
| 20 | +0 | |
| 21 | +1.1999 | |
| 22 | +-1.1999 | |
| 23 | +0 | |
| 24 | +121.19 | |
| 25 | +150.009 | |
| 26 | +cm | |
| 27 | +BI | |
| 28 | +/CS | |
| 29 | +/G | |
| 30 | +/W | |
| 31 | +1 | |
| 32 | +/H | |
| 33 | +1 | |
| 34 | +/BPC | |
| 35 | +8 | |
| 36 | +/F | |
| 37 | +/Fl | |
| 38 | +/DP | |
| 39 | +<< /Columns 1 /Predictor 15 >> | |
| 40 | +ID | |
| 41 | +inline image: 789c63fc0f0001030101 | |
| 42 | +EI | |
| 43 | +Q | |
| 44 | +q | |
| 45 | +0 | |
| 46 | +35.997 | |
| 47 | +-128.389 | |
| 48 | +0 | |
| 49 | +431.964 | |
| 50 | +7269.02 | |
| 51 | +cm | |
| 52 | +BI | |
| 53 | +/CS | |
| 54 | +/G | |
| 55 | +/W | |
| 56 | +30 | |
| 57 | +/H | |
| 58 | +107 | |
| 59 | +/BPC | |
| 60 | +8 | |
| 61 | +/F | |
| 62 | +/Fl | |
| 63 | +/DP | |
| 64 | +<< /Columns 30 /Predictor 15 >> | |
| 65 | +ID | |
| 66 | +inline image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a | |
| 67 | +EI | |
| 68 | +Q | |
| 69 | +q | |
| 70 | +0 | |
| 71 | +38.3968 | |
| 72 | +-93.5922 | |
| 73 | +0 | |
| 74 | +431.964 | |
| 75 | +7567.79 | |
| 76 | +cm | |
| 77 | +BI | |
| 78 | +/CS | |
| 79 | +/G | |
| 80 | +/W | |
| 81 | +32 | |
| 82 | +/H | |
| 83 | +78 | |
| 84 | +/BPC | |
| 85 | +8 | |
| 86 | +/F | |
| 87 | +/Fl | |
| 88 | +/DP | |
| 89 | +<< /Columns 32 /Predictor 15 >> | |
| 90 | +ID | |
| 91 | +inline image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c13 | |
| 92 | +EI | |
| 93 | +Q | |
| 94 | +-EOF- | |
| 95 | +test 37 done | ... | ... |
qpdf/qtest/qpdf/tokenize-content-streams.pdf
0 → 100644
No preview for this file type
qpdf/test_driver.cc
| ... | ... | @@ -58,6 +58,45 @@ class Provider: public QPDFObjectHandle::StreamDataProvider |
| 58 | 58 | bool bad_length; |
| 59 | 59 | }; |
| 60 | 60 | |
| 61 | +class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks | |
| 62 | +{ | |
| 63 | + public: | |
| 64 | + virtual ~ParserCallbacks() | |
| 65 | + { | |
| 66 | + } | |
| 67 | + | |
| 68 | + virtual void handleObject(QPDFObjectHandle); | |
| 69 | + virtual void handleEOF(); | |
| 70 | +}; | |
| 71 | + | |
| 72 | +void | |
| 73 | +ParserCallbacks::handleObject(QPDFObjectHandle obj) | |
| 74 | +{ | |
| 75 | + if (obj.isInlineImage()) | |
| 76 | + { | |
| 77 | + std::string val = obj.getInlineImageValue(); | |
| 78 | + std::cout << "inline image: "; | |
| 79 | + char buf[3]; | |
| 80 | + buf[2] = '\0'; | |
| 81 | + for (size_t i = 0; i < val.length(); ++i) | |
| 82 | + { | |
| 83 | + sprintf(buf, "%02x", (unsigned char)(val[i])); | |
| 84 | + std::cout << buf; | |
| 85 | + } | |
| 86 | + std::cout << std::endl; | |
| 87 | + } | |
| 88 | + else | |
| 89 | + { | |
| 90 | + std::cout << obj.unparse() << std::endl; | |
| 91 | + } | |
| 92 | +} | |
| 93 | + | |
| 94 | +void | |
| 95 | +ParserCallbacks::handleEOF() | |
| 96 | +{ | |
| 97 | + std::cout << "-EOF-" << std::endl; | |
| 98 | +} | |
| 99 | + | |
| 61 | 100 | static std::string getPageContents(QPDFObjectHandle page) |
| 62 | 101 | { |
| 63 | 102 | PointerHolder<Buffer> b1 = |
| ... | ... | @@ -1245,6 +1284,19 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 1245 | 1284 | } |
| 1246 | 1285 | } |
| 1247 | 1286 | } |
| 1287 | + else if (n == 37) | |
| 1288 | + { | |
| 1289 | + // Parse content streams of all pages | |
| 1290 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | |
| 1291 | + for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin(); | |
| 1292 | + iter != pages.end(); ++iter) | |
| 1293 | + { | |
| 1294 | + QPDFObjectHandle page = *iter; | |
| 1295 | + QPDFObjectHandle contents = page.getKey("/Contents"); | |
| 1296 | + ParserCallbacks cb; | |
| 1297 | + QPDFObjectHandle::parseContentStream(contents, &cb); | |
| 1298 | + } | |
| 1299 | + } | |
| 1248 | 1300 | else |
| 1249 | 1301 | { |
| 1250 | 1302 | throw std::runtime_error(std::string("invalid test ") + | ... | ... |