Commit f81152311e5737e5e0de9dd9462311f306c6921b

Authored by Jay Berkenbilt
1 parent 1d88955f

Add QPDFObjectHandle::parseContentStream method

This method allows parsing of the PDF objects in a content stream or
array of content streams.
ChangeLog
1 2013-01-20 Jay Berkenbilt <ejb@ql.org> 1 2013-01-20 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Added QPDFObjectHandle::parseContentStream, which parses the
  4 + objects in a content stream and calls handlers in a callback
  5 + class. The example pdf-parse-content illustrates it use.
  6 +
3 * Added QPDF_Keyword and QPDF_InlineImage types along with 7 * Added QPDF_Keyword and QPDF_InlineImage types along with
4 appropriate wrapper methods in QPDFObjectHandle. These new object 8 appropriate wrapper methods in QPDFObjectHandle. These new object
5 types are to facilitate content stream parsing. 9 types are to facilitate content stream parsing.
examples/build.mk
@@ -4,7 +4,8 @@ BINS_examples = \ @@ -4,7 +4,8 @@ BINS_examples = \
4 pdf-npages \ 4 pdf-npages \
5 pdf-double-page-size \ 5 pdf-double-page-size \
6 pdf-invert-images \ 6 pdf-invert-images \
7 - pdf-create 7 + pdf-create \
  8 + pdf-parse-content
8 CBINS_examples = pdf-linearize 9 CBINS_examples = pdf-linearize
9 10
10 TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) 11 TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
examples/pdf-parse-content.cc 0 → 100644
  1 +#include <iostream>
  2 +#include <string.h>
  3 +#include <stdlib.h>
  4 +
  5 +#include <qpdf/QPDF.hh>
  6 +#include <qpdf/QUtil.hh>
  7 +
  8 +static char const* whoami = 0;
  9 +
  10 +void usage()
  11 +{
  12 + std::cerr << "Usage: " << whoami << " filename page-number" << std::endl
  13 + << "Prints a dump of the objects in the content streams"
  14 + << " of the given page." << std::endl
  15 + << "Pages are numbered from 1." << std::endl;
  16 + exit(2);
  17 +}
  18 +
  19 +class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
  20 +{
  21 + public:
  22 + virtual ~ParserCallbacks()
  23 + {
  24 + }
  25 +
  26 + virtual void handleObject(QPDFObjectHandle);
  27 + virtual void handleEOF();
  28 +};
  29 +
  30 +void
  31 +ParserCallbacks::handleObject(QPDFObjectHandle obj)
  32 +{
  33 + if (obj.isInlineImage())
  34 + {
  35 + std::string val = obj.getInlineImageValue();
  36 + std::cout << "inline image: ";
  37 + char buf[3];
  38 + buf[2] = '\0';
  39 + for (size_t i = 0; i < val.length(); ++i)
  40 + {
  41 + sprintf(buf, "%02x", (unsigned char)(val[i]));
  42 + std::cout << buf;
  43 + }
  44 + std::cout << std::endl;
  45 + }
  46 + else
  47 + {
  48 + std::cout << obj.unparse() << std::endl;
  49 + }
  50 +}
  51 +
  52 +void
  53 +ParserCallbacks::handleEOF()
  54 +{
  55 + std::cout << "-EOF-" << std::endl;
  56 +}
  57 +
  58 +int main(int argc, char* argv[])
  59 +{
  60 + whoami = QUtil::getWhoami(argv[0]);
  61 +
  62 + // For libtool's sake....
  63 + if (strncmp(whoami, "lt-", 3) == 0)
  64 + {
  65 + whoami += 3;
  66 + }
  67 +
  68 + if (argc != 3)
  69 + {
  70 + usage();
  71 + }
  72 + char const* filename = argv[1];
  73 + int pageno = atoi(argv[2]);
  74 +
  75 + try
  76 + {
  77 + QPDF pdf;
  78 + pdf.processFile(filename);
  79 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  80 + if ((pageno < 1) || (pageno > (int)pages.size()))
  81 + {
  82 + usage();
  83 + }
  84 +
  85 + QPDFObjectHandle page = pages[pageno-1];
  86 + QPDFObjectHandle contents = page.getKey("/Contents");
  87 + ParserCallbacks cb;
  88 + QPDFObjectHandle::parseContentStream(contents, &cb);
  89 + }
  90 + catch (std::exception& e)
  91 + {
  92 + std::cerr << whoami << ": " << e.what() << std::endl;
  93 + exit(2);
  94 + }
  95 +
  96 + return 0;
  97 +}
examples/qtest/parse-content.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +BEGIN { $^W = 1; }
  4 +use strict;
  5 +
  6 +chdir("parse-content");
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('pdf-parse-content');
  11 +
  12 +$td->runtest("parse content",
  13 + {$td->COMMAND => "pdf-parse-content input.pdf 1"},
  14 + {$td->FILE => "content.out", $td->EXIT_STATUS => 0},
  15 + $td->NORMALIZE_NEWLINES);
  16 +
  17 +$td->report(1);
examples/qtest/parse-content/content.out 0 → 100644
  1 +BT
  2 +/F1
  3 +24
  4 +Tf
  5 +72
  6 +720
  7 +Td
  8 +(Potato)
  9 +Tj
  10 +ET
  11 +-EOF-
examples/qtest/parse-content/input.pdf 0 → 100644
No preview for this file type
include/qpdf/QPDFObjectHandle.hh
@@ -71,6 +71,21 @@ class QPDFObjectHandle @@ -71,6 +71,21 @@ class QPDFObjectHandle
71 virtual void decryptString(std::string& val) = 0; 71 virtual void decryptString(std::string& val) = 0;
72 }; 72 };
73 73
  74 + // This class is used by parseContentStream. Callers must
  75 + // instantiate a subclass of this with handlers defined to accept
  76 + // QPDFObjectHandles that are parsed from the stream.
  77 + class ParserCallbacks
  78 + {
  79 + public:
  80 + QPDF_DLL
  81 + virtual ~ParserCallbacks()
  82 + {
  83 + }
  84 + virtual void handleObject(QPDFObjectHandle) = 0;
  85 + virtual void handleEOF() = 0;
  86 + };
  87 +
  88 +
74 QPDF_DLL 89 QPDF_DLL
75 QPDFObjectHandle(); 90 QPDFObjectHandle();
76 QPDF_DLL 91 QPDF_DLL
@@ -138,6 +153,11 @@ class QPDFObjectHandle @@ -138,6 +153,11 @@ class QPDFObjectHandle
138 StringDecrypter* decrypter, 153 StringDecrypter* decrypter,
139 QPDF* context); 154 QPDF* context);
140 155
  156 + // Helpers for parsing content streams
  157 + QPDF_DLL
  158 + static void parseContentStream(QPDFObjectHandle stream_or_array,
  159 + ParserCallbacks* callbacks);
  160 +
141 // Type-specific factories 161 // Type-specific factories
142 QPDF_DLL 162 QPDF_DLL
143 static QPDFObjectHandle newNull(); 163 static QPDFObjectHandle newNull();
@@ -571,7 +591,10 @@ class QPDFObjectHandle @@ -571,7 +591,10 @@ class QPDFObjectHandle
571 std::string const& object_description, 591 std::string const& object_description,
572 QPDFTokenizer& tokenizer, bool& empty, 592 QPDFTokenizer& tokenizer, bool& empty,
573 StringDecrypter* decrypter, QPDF* context, 593 StringDecrypter* decrypter, QPDF* context,
574 - bool in_array, bool in_dictionary); 594 + bool in_array, bool in_dictionary,
  595 + bool content_stream);
  596 + static void parseContentStream_internal(
  597 + QPDFObjectHandle stream, ParserCallbacks* callbacks);
575 598
576 bool initialized; 599 bool initialized;
577 600
include/qpdf/QPDFTokenizer.hh
@@ -18,6 +18,8 @@ @@ -18,6 +18,8 @@
18 class QPDFTokenizer 18 class QPDFTokenizer
19 { 19 {
20 public: 20 public:
  21 + // Token type tt_eof is only returned of allowEOF() is called on
  22 + // the tokenizer. tt_eof was introduced in QPDF version 4.1.
21 enum token_type_e 23 enum token_type_e
22 { 24 {
23 tt_bad, 25 tt_bad,
@@ -34,6 +36,7 @@ class QPDFTokenizer @@ -34,6 +36,7 @@ class QPDFTokenizer
34 tt_null, 36 tt_null,
35 tt_bool, 37 tt_bool,
36 tt_word, 38 tt_word,
  39 + tt_eof,
37 }; 40 };
38 41
39 class Token 42 class Token
@@ -97,6 +100,12 @@ class QPDFTokenizer @@ -97,6 +100,12 @@ class QPDFTokenizer
97 QPDF_DLL 100 QPDF_DLL
98 void allowPoundAnywhereInName(); 101 void allowPoundAnywhereInName();
99 102
  103 + // If called, treat EOF as a separate token type instead of an
  104 + // error. This was introduced in QPDF 4.1 to facilitate
  105 + // tokenizing content streams.
  106 + QPDF_DLL
  107 + void allowEOF();
  108 +
100 // Mode of operation: 109 // Mode of operation:
101 110
102 // Keep presenting characters and calling getToken() until 111 // Keep presenting characters and calling getToken() until
@@ -140,6 +149,7 @@ class QPDFTokenizer @@ -140,6 +149,7 @@ class QPDFTokenizer
140 st_literal, st_in_hexstring, st_token_ready } state; 149 st_literal, st_in_hexstring, st_token_ready } state;
141 150
142 bool pound_special_in_name; 151 bool pound_special_in_name;
  152 + bool allow_eof;
143 153
144 // Current token accumulation 154 // Current token accumulation
145 token_type_e type; 155 token_type_e type;
libqpdf/QPDFObjectHandle.cc
@@ -680,6 +680,106 @@ QPDFObjectHandle::parse(std::string const&amp; object_str, @@ -680,6 +680,106 @@ QPDFObjectHandle::parse(std::string const&amp; object_str,
680 return result; 680 return result;
681 } 681 }
682 682
  683 +void
  684 +QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
  685 + ParserCallbacks* callbacks)
  686 +{
  687 + std::vector<QPDFObjectHandle> streams;
  688 + if (stream_or_array.isArray())
  689 + {
  690 + streams = stream_or_array.getArrayAsVector();
  691 + }
  692 + else
  693 + {
  694 + streams.push_back(stream_or_array);
  695 + }
  696 + for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
  697 + iter != streams.end(); ++iter)
  698 + {
  699 + QPDFObjectHandle stream = *iter;
  700 + if (! stream.isStream())
  701 + {
  702 + throw std::logic_error(
  703 + "QPDFObjectHandle: parseContentStream called on non-stream");
  704 + }
  705 + parseContentStream_internal(stream, callbacks);
  706 + }
  707 + callbacks->handleEOF();
  708 +}
  709 +
  710 +void
  711 +QPDFObjectHandle::parseContentStream_internal(QPDFObjectHandle stream,
  712 + ParserCallbacks* callbacks)
  713 +{
  714 + stream.assertStream();
  715 + PointerHolder<Buffer> stream_data = stream.getStreamData();
  716 + size_t length = stream_data->getSize();
  717 + std::string description = "content stream object " +
  718 + QUtil::int_to_string(stream.getObjectID()) + " " +
  719 + QUtil::int_to_string(stream.getGeneration());
  720 + PointerHolder<InputSource> input =
  721 + new BufferInputSource(description, stream_data.getPointer());
  722 + QPDFTokenizer tokenizer;
  723 + tokenizer.allowEOF();
  724 + bool empty = false;
  725 + while ((size_t) input->tell() < length)
  726 + {
  727 + QPDFObjectHandle obj =
  728 + parseInternal(input, "content", tokenizer, empty,
  729 + 0, 0, false, false, true);
  730 + if (! obj.isInitialized())
  731 + {
  732 + // EOF
  733 + break;
  734 + }
  735 +
  736 + callbacks->handleObject(obj);
  737 + if (obj.isKeyword() && (obj.getKeywordValue() == "ID"))
  738 + {
  739 + // Discard next character; it is the space after ID that
  740 + // terminated the token. Read until end of inline image.
  741 + char ch;
  742 + input->read(&ch, 1);
  743 + char buf[4];
  744 + memset(buf, '\0', sizeof(buf));
  745 + bool done = false;
  746 + std::string inline_image;
  747 + while (! done)
  748 + {
  749 + if (input->read(&ch, 1) == 0)
  750 + {
  751 + QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
  752 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  753 + "stream data", input->tell(),
  754 + "EOF found while reading inline image");
  755 + }
  756 + inline_image += ch;
  757 + memmove(buf, buf + 1, sizeof(buf) - 1);
  758 + buf[sizeof(buf) - 1] = ch;
  759 + if (strchr(" \t\n\v\f\r", buf[0]) &&
  760 + (buf[1] == 'E') &&
  761 + (buf[2] == 'I') &&
  762 + strchr(" \t\n\v\f\r", buf[3]))
  763 + {
  764 + // We've found an EI operator.
  765 + done = true;
  766 + input->seek(-3, SEEK_CUR);
  767 + for (int i = 0; i < 4; ++i)
  768 + {
  769 + if (inline_image.length() > 0)
  770 + {
  771 + inline_image.erase(inline_image.length() - 1);
  772 + }
  773 + }
  774 + }
  775 + }
  776 + QTC::TC("qpdf", "QPDFObjectHandle inline image token");
  777 + callbacks->handleObject(
  778 + QPDFObjectHandle::newInlineImage(inline_image));
  779 + }
  780 + }
  781 +}
  782 +
683 QPDFObjectHandle 783 QPDFObjectHandle
684 QPDFObjectHandle::parse(PointerHolder<InputSource> input, 784 QPDFObjectHandle::parse(PointerHolder<InputSource> input,
685 std::string const& object_description, 785 std::string const& object_description,
@@ -687,7 +787,7 @@ QPDFObjectHandle::parse(PointerHolder&lt;InputSource&gt; input, @@ -687,7 +787,7 @@ QPDFObjectHandle::parse(PointerHolder&lt;InputSource&gt; input,
687 StringDecrypter* decrypter, QPDF* context) 787 StringDecrypter* decrypter, QPDF* context)
688 { 788 {
689 return parseInternal(input, object_description, tokenizer, empty, 789 return parseInternal(input, object_description, tokenizer, empty,
690 - decrypter, context, false, false); 790 + decrypter, context, false, false, false);
691 } 791 }
692 792
693 QPDFObjectHandle 793 QPDFObjectHandle
@@ -695,7 +795,8 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input, @@ -695,7 +795,8 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
695 std::string const& object_description, 795 std::string const& object_description,
696 QPDFTokenizer& tokenizer, bool& empty, 796 QPDFTokenizer& tokenizer, bool& empty,
697 StringDecrypter* decrypter, QPDF* context, 797 StringDecrypter* decrypter, QPDF* context,
698 - bool in_array, bool in_dictionary) 798 + bool in_array, bool in_dictionary,
  799 + bool content_stream)
699 { 800 {
700 empty = false; 801 empty = false;
701 if (in_dictionary && in_array) 802 if (in_dictionary && in_array)
@@ -721,6 +822,21 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input, @@ -721,6 +822,21 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
721 822
722 switch (token.getType()) 823 switch (token.getType())
723 { 824 {
  825 + case QPDFTokenizer::tt_eof:
  826 + if (content_stream)
  827 + {
  828 + // Return uninitialized object to indicate EOF
  829 + return object;
  830 + }
  831 + else
  832 + {
  833 + // When not in content stream mode, EOF is tt_bad and
  834 + // throws an exception before we get here.
  835 + throw std::logic_error(
  836 + "EOF received while not in content stream mode");
  837 + }
  838 + break;
  839 +
724 case QPDFTokenizer::tt_brace_open: 840 case QPDFTokenizer::tt_brace_open:
725 case QPDFTokenizer::tt_brace_close: 841 case QPDFTokenizer::tt_brace_close:
726 // Don't know what to do with these for now 842 // Don't know what to do with these for now
@@ -764,13 +880,13 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input, @@ -764,13 +880,13 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
764 case QPDFTokenizer::tt_array_open: 880 case QPDFTokenizer::tt_array_open:
765 object = parseInternal( 881 object = parseInternal(
766 input, object_description, tokenizer, empty, 882 input, object_description, tokenizer, empty,
767 - decrypter, context, true, false); 883 + decrypter, context, true, false, content_stream);
768 break; 884 break;
769 885
770 case QPDFTokenizer::tt_dict_open: 886 case QPDFTokenizer::tt_dict_open:
771 object = parseInternal( 887 object = parseInternal(
772 input, object_description, tokenizer, empty, 888 input, object_description, tokenizer, empty,
773 - decrypter, context, false, true); 889 + decrypter, context, false, true, content_stream);
774 break; 890 break;
775 891
776 case QPDFTokenizer::tt_bool: 892 case QPDFTokenizer::tt_bool:
@@ -826,6 +942,10 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input, @@ -826,6 +942,10 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
826 input->seek(input->getLastOffset(), SEEK_SET); 942 input->seek(input->getLastOffset(), SEEK_SET);
827 empty = true; 943 empty = true;
828 } 944 }
  945 + else if (content_stream)
  946 + {
  947 + object = QPDFObjectHandle::newKeyword(token.getValue());
  948 + }
829 else 949 else
830 { 950 {
831 throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), 951 throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
libqpdf/QPDFTokenizer.cc
@@ -22,7 +22,8 @@ static bool is_space(char ch) @@ -22,7 +22,8 @@ static bool is_space(char ch)
22 } 22 }
23 23
24 QPDFTokenizer::QPDFTokenizer() : 24 QPDFTokenizer::QPDFTokenizer() :
25 - pound_special_in_name(true) 25 + pound_special_in_name(true),
  26 + allow_eof(false)
26 { 27 {
27 reset(); 28 reset();
28 } 29 }
@@ -35,6 +36,12 @@ QPDFTokenizer::allowPoundAnywhereInName() @@ -35,6 +36,12 @@ QPDFTokenizer::allowPoundAnywhereInName()
35 } 36 }
36 37
37 void 38 void
  39 +QPDFTokenizer::allowEOF()
  40 +{
  41 + this->allow_eof = true;
  42 +}
  43 +
  44 +void
38 QPDFTokenizer::reset() 45 QPDFTokenizer::reset()
39 { 46 {
40 state = st_top; 47 state = st_top;
@@ -441,9 +448,17 @@ QPDFTokenizer::presentEOF() @@ -441,9 +448,17 @@ QPDFTokenizer::presentEOF()
441 } 448 }
442 else if (state != st_token_ready) 449 else if (state != st_token_ready)
443 { 450 {
444 - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token");  
445 - type = tt_bad;  
446 - error_message = "EOF while reading token"; 451 + QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token",
  452 + this->allow_eof ? 1 : 0);
  453 + if (this->allow_eof)
  454 + {
  455 + type = tt_eof;
  456 + }
  457 + else
  458 + {
  459 + type = tt_bad;
  460 + error_message = "EOF while reading token";
  461 + }
447 } 462 }
448 463
449 state = st_token_ready; 464 state = st_token_ready;
qpdf/qpdf.testcov
@@ -236,7 +236,7 @@ QPDFWriter copy use_aes 1 @@ -236,7 +236,7 @@ QPDFWriter copy use_aes 1
236 QPDFObjectHandle indirect without context 0 236 QPDFObjectHandle indirect without context 0
237 QPDFObjectHandle trailing data in parse 0 237 QPDFObjectHandle trailing data in parse 0
238 qpdf pages encryption password 0 238 qpdf pages encryption password 0
239 -QPDF_Tokenizer EOF reading token 0 239 +QPDF_Tokenizer EOF reading token 1
240 QPDF_Tokenizer EOF reading appendable token 0 240 QPDF_Tokenizer EOF reading appendable token 0
241 QPDFWriter extra header text no newline 0 241 QPDFWriter extra header text no newline 0
242 QPDFWriter extra header text add newline 0 242 QPDFWriter extra header text add newline 0
@@ -259,3 +259,5 @@ QPDFWriter remove Crypt 0 @@ -259,3 +259,5 @@ QPDFWriter remove Crypt 0
259 qpdf-c called qpdf_get_pdf_extension_level 0 259 qpdf-c called qpdf_get_pdf_extension_level 0
260 qpdf-c called qpdf_set_r5_encryption_parameters 0 260 qpdf-c called qpdf_set_r5_encryption_parameters 0
261 qpdf-c called qpdf_set_r6_encryption_parameters 0 261 qpdf-c called qpdf_set_r6_encryption_parameters 0
  262 +QPDFObjectHandle EOF in inline image 0
  263 +QPDFObjectHandle inline image token 0
qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;, @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 show_ntests(); 199 show_ntests();
200 # ---------- 200 # ----------
201 $td->notify("--- Miscellaneous Tests ---"); 201 $td->notify("--- Miscellaneous Tests ---");
202 -$n_tests += 57; 202 +$n_tests += 59;
203 203
204 $td->runtest("qpdf version", 204 $td->runtest("qpdf version",
205 {$td->COMMAND => "qpdf --version"}, 205 {$td->COMMAND => "qpdf --version"},
@@ -468,6 +468,16 @@ $td-&gt;runtest(&quot;check file with leading junk&quot;, @@ -468,6 +468,16 @@ $td-&gt;runtest(&quot;check file with leading junk&quot;,
468 {$td->COMMAND => "qpdf --check leading-junk.pdf"}, 468 {$td->COMMAND => "qpdf --check leading-junk.pdf"},
469 {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, 469 {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
470 $td->NORMALIZE_NEWLINES); 470 $td->NORMALIZE_NEWLINES);
  471 +$td->runtest("EOF inside inline image",
  472 + {$td->COMMAND => "test_driver 37 eof-in-inline-image.pdf"},
  473 + {$td->FILE => "eof-in-inline-image.out",
  474 + $td->EXIT_STATUS => 2},
  475 + $td->NORMALIZE_NEWLINES);
  476 +$td->runtest("tokenize content streams",
  477 + {$td->COMMAND => "test_driver 37 tokenize-content-streams.pdf"},
  478 + {$td->FILE => "tokenize-content-streams.out",
  479 + $td->EXIT_STATUS => 0},
  480 + $td->NORMALIZE_NEWLINES);
471 481
472 show_ntests(); 482 show_ntests();
473 # ---------- 483 # ----------
qpdf/qtest/qpdf/eof-in-inline-image.out 0 → 100644
  1 +BT
  2 +/F1
  3 +24
  4 +Tf
  5 +72
  6 +720
  7 +Td
  8 +(Potato)
  9 +Tj
  10 +ET
  11 +BI
  12 +/CS
  13 +/G
  14 +/W
  15 +1
  16 +/H
  17 +1
  18 +/BPC
  19 +8
  20 +/F
  21 +/Fl
  22 +/DP
  23 +<< /Columns 1 /Predictor 15 >>
  24 +ID
  25 +content stream object 4 0 (stream data, file position 139): EOF found while reading inline image
qpdf/qtest/qpdf/eof-in-inline-image.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/tokenize-content-streams.out 0 → 100644
  1 +BT
  2 +/F1
  3 +24
  4 +Tf
  5 +72
  6 +720
  7 +Td
  8 +(Potato)
  9 +Tj
  10 +ET
  11 +-EOF-
  12 +0.1
  13 +0
  14 +0
  15 +0.1
  16 +0
  17 +0
  18 +cm
  19 +q
  20 +0
  21 +1.1999
  22 +-1.1999
  23 +0
  24 +121.19
  25 +150.009
  26 +cm
  27 +BI
  28 +/CS
  29 +/G
  30 +/W
  31 +1
  32 +/H
  33 +1
  34 +/BPC
  35 +8
  36 +/F
  37 +/Fl
  38 +/DP
  39 +<< /Columns 1 /Predictor 15 >>
  40 +ID
  41 +inline image: 789c63fc0f0001030101
  42 +EI
  43 +Q
  44 +q
  45 +0
  46 +35.997
  47 +-128.389
  48 +0
  49 +431.964
  50 +7269.02
  51 +cm
  52 +BI
  53 +/CS
  54 +/G
  55 +/W
  56 +30
  57 +/H
  58 +107
  59 +/BPC
  60 +8
  61 +/F
  62 +/Fl
  63 +/DP
  64 +<< /Columns 30 /Predictor 15 >>
  65 +ID
  66 +inline image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a
  67 +EI
  68 +Q
  69 +q
  70 +0
  71 +38.3968
  72 +-93.5922
  73 +0
  74 +431.964
  75 +7567.79
  76 +cm
  77 +BI
  78 +/CS
  79 +/G
  80 +/W
  81 +32
  82 +/H
  83 +78
  84 +/BPC
  85 +8
  86 +/F
  87 +/Fl
  88 +/DP
  89 +<< /Columns 32 /Predictor 15 >>
  90 +ID
  91 +inline image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c13
  92 +EI
  93 +Q
  94 +-EOF-
  95 +test 37 done
qpdf/qtest/qpdf/tokenize-content-streams.pdf 0 → 100644
No preview for this file type
qpdf/test_driver.cc
@@ -58,6 +58,45 @@ class Provider: public QPDFObjectHandle::StreamDataProvider @@ -58,6 +58,45 @@ class Provider: public QPDFObjectHandle::StreamDataProvider
58 bool bad_length; 58 bool bad_length;
59 }; 59 };
60 60
  61 +class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
  62 +{
  63 + public:
  64 + virtual ~ParserCallbacks()
  65 + {
  66 + }
  67 +
  68 + virtual void handleObject(QPDFObjectHandle);
  69 + virtual void handleEOF();
  70 +};
  71 +
  72 +void
  73 +ParserCallbacks::handleObject(QPDFObjectHandle obj)
  74 +{
  75 + if (obj.isInlineImage())
  76 + {
  77 + std::string val = obj.getInlineImageValue();
  78 + std::cout << "inline image: ";
  79 + char buf[3];
  80 + buf[2] = '\0';
  81 + for (size_t i = 0; i < val.length(); ++i)
  82 + {
  83 + sprintf(buf, "%02x", (unsigned char)(val[i]));
  84 + std::cout << buf;
  85 + }
  86 + std::cout << std::endl;
  87 + }
  88 + else
  89 + {
  90 + std::cout << obj.unparse() << std::endl;
  91 + }
  92 +}
  93 +
  94 +void
  95 +ParserCallbacks::handleEOF()
  96 +{
  97 + std::cout << "-EOF-" << std::endl;
  98 +}
  99 +
61 static std::string getPageContents(QPDFObjectHandle page) 100 static std::string getPageContents(QPDFObjectHandle page)
62 { 101 {
63 PointerHolder<Buffer> b1 = 102 PointerHolder<Buffer> b1 =
@@ -1245,6 +1284,19 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -1245,6 +1284,19 @@ void runtest(int n, char const* filename1, char const* arg2)
1245 } 1284 }
1246 } 1285 }
1247 } 1286 }
  1287 + else if (n == 37)
  1288 + {
  1289 + // Parse content streams of all pages
  1290 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1291 + for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
  1292 + iter != pages.end(); ++iter)
  1293 + {
  1294 + QPDFObjectHandle page = *iter;
  1295 + QPDFObjectHandle contents = page.getKey("/Contents");
  1296 + ParserCallbacks cb;
  1297 + QPDFObjectHandle::parseContentStream(contents, &cb);
  1298 + }
  1299 + }
1248 else 1300 else
1249 { 1301 {
1250 throw std::runtime_error(std::string("invalid test ") + 1302 throw std::runtime_error(std::string("invalid test ") +