Commit 3be58f49e57da67cf79b61061d8b1a0f7ccf7cff

Authored by Jay Berkenbilt
1 parent 98da4fd8

Make more QPDFPageObjectHelper methods work with form XObject

ChangeLog
1 2021-01-02 Jay Berkenbilt <ejb@ql.org> 1 2021-01-02 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Make QPDFPageObjectHelper methods pipeContents, parseContents,
  4 + and addContentTokenFilter work with form XObjects.
  5 +
  6 + * Rename some QPDFPageObjectHelper methods and make them support
  7 + form XObjects as well as pages. The old names will be preserved
  8 + from compatibility.
  9 + - pipePageContents -> pipeContents
  10 + - parsePageContents -> parseContents
  11 +
  12 + * Add QPDFObjectHandle::parseAsContents to apply ParserCallbacks
  13 + to a form XObject.
  14 +
3 * QPDFPageObjectHelper::externalizeInlineImages can be called with 15 * QPDFPageObjectHelper::externalizeInlineImages can be called with
4 form XObjects as well as pages. 16 form XObjects as well as pages.
5 17
examples/pdf-parse-content.cc
@@ -89,7 +89,7 @@ int main(int argc, char* argv[]) @@ -89,7 +89,7 @@ int main(int argc, char* argv[])
89 89
90 QPDFPageObjectHelper& page = pages.at(QIntC::to_size(pageno-1)); 90 QPDFPageObjectHelper& page = pages.at(QIntC::to_size(pageno-1));
91 ParserCallbacks cb; 91 ParserCallbacks cb;
92 - page.parsePageContents(&cb); 92 + page.parseContents(&cb);
93 } 93 }
94 catch (std::exception& e) 94 catch (std::exception& e)
95 { 95 {
fuzz/qpdf_fuzzer.cc
@@ -142,7 +142,7 @@ FuzzHelper::testPages() @@ -142,7 +142,7 @@ FuzzHelper::testPages()
142 try 142 try
143 { 143 {
144 page.coalesceContentStreams(); 144 page.coalesceContentStreams();
145 - page.parsePageContents(&discard_contents); 145 + page.parseContents(&discard_contents);
146 page.getImages(); 146 page.getImages();
147 pldh.getLabelForPage(pageno); 147 pldh.getLabelForPage(pageno);
148 QPDFObjectHandle page_obj(page.getObjectHandle()); 148 QPDFObjectHandle page_obj(page.getObjectHandle());
include/qpdf/QPDFObjectHandle.hh
@@ -448,7 +448,7 @@ class QPDFObjectHandle @@ -448,7 +448,7 @@ class QPDFObjectHandle
448 void parsePageContents(ParserCallbacks* callbacks); 448 void parsePageContents(ParserCallbacks* callbacks);
449 QPDF_DLL 449 QPDF_DLL
450 void filterPageContents(TokenFilter* filter, Pipeline* next = 0); 450 void filterPageContents(TokenFilter* filter, Pipeline* next = 0);
451 - // See comments for QPDFPageObjectHelper::pipePageContents. 451 + // See comments for QPDFPageObjectHelper::pipeContents.
452 QPDF_DLL 452 QPDF_DLL
453 void pipePageContents(Pipeline* p); 453 void pipePageContents(Pipeline* p);
454 QPDF_DLL 454 QPDF_DLL
@@ -460,6 +460,10 @@ class QPDFObjectHandle @@ -460,6 +460,10 @@ class QPDFObjectHandle
460 // XObject, whose data is in the same format as a content stream. 460 // XObject, whose data is in the same format as a content stream.
461 QPDF_DLL 461 QPDF_DLL
462 void filterAsContents(TokenFilter* filter, Pipeline* next = 0); 462 void filterAsContents(TokenFilter* filter, Pipeline* next = 0);
  463 + // Called on a stream to parse the stream as page contents. This
  464 + // can be used to parse a form XObject.
  465 + QPDF_DLL
  466 + void parseAsContents(ParserCallbacks* callbacks);
463 467
464 // Type-specific factories 468 // Type-specific factories
465 QPDF_DLL 469 QPDF_DLL
include/qpdf/QPDFPageObjectHelper.hh
@@ -182,7 +182,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -182,7 +182,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
182 182
183 // Parse a page's contents through ParserCallbacks, described 183 // Parse a page's contents through ParserCallbacks, described
184 // above. This method works whether the contents are a single 184 // above. This method works whether the contents are a single
185 - // stream or an array of streams. Call on a page object. 185 + // stream or an array of streams. Call on a page object. Also
  186 + // works for form XObjects.
  187 + QPDF_DLL
  188 + void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks);
  189 + // Old name
186 QPDF_DLL 190 QPDF_DLL
187 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); 191 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
188 192
@@ -206,14 +210,17 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -206,14 +210,17 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
206 210
207 // Pipe a page's contents through the given pipeline. This method 211 // Pipe a page's contents through the given pipeline. This method
208 // works whether the contents are a single stream or an array of 212 // works whether the contents are a single stream or an array of
209 - // streams. 213 + // streams. Also works on form XObjects.
  214 + QPDF_DLL
  215 + void pipeContents(Pipeline* p);
  216 + // Old name
210 QPDF_DLL 217 QPDF_DLL
211 void pipePageContents(Pipeline* p); 218 void pipePageContents(Pipeline* p);
212 219
213 // Attach a token filter to a page's contents. If the page's 220 // Attach a token filter to a page's contents. If the page's
214 // contents is an array of streams, it is automatically coalesced. 221 // contents is an array of streams, it is automatically coalesced.
215 // The token filter is applied to the page's contents as a single 222 // The token filter is applied to the page's contents as a single
216 - // stream. 223 + // stream. Also works on form XObjects.
217 QPDF_DLL 224 QPDF_DLL
218 void addContentTokenFilter( 225 void addContentTokenFilter(
219 PointerHolder<QPDFObjectHandle::TokenFilter> token_filter); 226 PointerHolder<QPDFObjectHandle::TokenFilter> token_filter);
libqpdf/QPDFObjectHandle.cc
@@ -1669,6 +1669,15 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) @@ -1669,6 +1669,15 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
1669 } 1669 }
1670 1670
1671 void 1671 void
  1672 +QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks)
  1673 +{
  1674 + std::string description = "object " +
  1675 + QUtil::int_to_string(this->objid) + " " +
  1676 + QUtil::int_to_string(this->generation);
  1677 + this->parseContentStream_internal(description, callbacks);
  1678 +}
  1679 +
  1680 +void
1672 QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) 1681 QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1673 { 1682 {
1674 std::string description = "token filter for page object " + 1683 std::string description = "token filter for page object " +
libqpdf/QPDFPageObjectHelper.cc
@@ -584,7 +584,21 @@ void @@ -584,7 +584,21 @@ void
584 QPDFPageObjectHelper::parsePageContents( 584 QPDFPageObjectHelper::parsePageContents(
585 QPDFObjectHandle::ParserCallbacks* callbacks) 585 QPDFObjectHandle::ParserCallbacks* callbacks)
586 { 586 {
587 - this->oh.parsePageContents(callbacks); 587 + parseContents(callbacks);
  588 +}
  589 +
  590 +void
  591 +QPDFPageObjectHelper::parseContents(
  592 + QPDFObjectHandle::ParserCallbacks* callbacks)
  593 +{
  594 + if (this->oh.isFormXObject())
  595 + {
  596 + this->oh.parseAsContents(callbacks);
  597 + }
  598 + else
  599 + {
  600 + this->oh.parsePageContents(callbacks);
  601 + }
588 } 602 }
589 603
590 void 604 void
@@ -613,14 +627,34 @@ QPDFPageObjectHelper::filterContents( @@ -613,14 +627,34 @@ QPDFPageObjectHelper::filterContents(
613 void 627 void
614 QPDFPageObjectHelper::pipePageContents(Pipeline* p) 628 QPDFPageObjectHelper::pipePageContents(Pipeline* p)
615 { 629 {
616 - this->oh.pipePageContents(p); 630 + pipeContents(p);
  631 +}
  632 +
  633 +void
  634 +QPDFPageObjectHelper::pipeContents(Pipeline* p)
  635 +{
  636 + if (this->oh.isFormXObject())
  637 + {
  638 + this->oh.pipeStreamData(p, 0, qpdf_dl_specialized);
  639 + }
  640 + else
  641 + {
  642 + this->oh.pipePageContents(p);
  643 + }
617 } 644 }
618 645
619 void 646 void
620 QPDFPageObjectHelper::addContentTokenFilter( 647 QPDFPageObjectHelper::addContentTokenFilter(
621 PointerHolder<QPDFObjectHandle::TokenFilter> token_filter) 648 PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)
622 { 649 {
623 - this->oh.addContentTokenFilter(token_filter); 650 + if (this->oh.isFormXObject())
  651 + {
  652 + this->oh.addTokenFilter(token_filter);
  653 + }
  654 + else
  655 + {
  656 + this->oh.addContentTokenFilter(token_filter);
  657 + }
624 } 658 }
625 659
626 class NameWatcher: public QPDFObjectHandle::TokenFilter 660 class NameWatcher: public QPDFObjectHandle::TokenFilter
manual/qpdf-manual.xml
@@ -4893,6 +4893,18 @@ print &quot;\n&quot;; @@ -4893,6 +4893,18 @@ print &quot;\n&quot;;
4893 <function>filterContents</function> 4893 <function>filterContents</function>
4894 </para> 4894 </para>
4895 </listitem> 4895 </listitem>
  4896 + <listitem>
  4897 + <para>
  4898 + <function>pipePageContents</function> to
  4899 + <function>pipeContents</function>
  4900 + </para>
  4901 + </listitem>
  4902 + <listitem>
  4903 + <para>
  4904 + <function>parsePageContents</function> to
  4905 + <function>parseContents</function>
  4906 + </para>
  4907 + </listitem>
4896 </itemizedlist> 4908 </itemizedlist>
4897 </para> 4909 </para>
4898 </listitem> 4910 </listitem>
qpdf/qpdf.cc
@@ -3539,7 +3539,7 @@ static void do_check(QPDF&amp; pdf, Options&amp; o, int&amp; exit_code) @@ -3539,7 +3539,7 @@ static void do_check(QPDF&amp; pdf, Options&amp; o, int&amp; exit_code)
3539 ++pageno; 3539 ++pageno;
3540 try 3540 try
3541 { 3541 {
3542 - page.parsePageContents(&discard_contents); 3542 + page.parseContents(&discard_contents);
3543 } 3543 }
3544 catch (QPDFExc& e) 3544 catch (QPDFExc& e)
3545 { 3545 {
qpdf/qtest/qpdf.test
@@ -423,7 +423,7 @@ foreach my $i (@choice_values) @@ -423,7 +423,7 @@ foreach my $i (@choice_values)
423 show_ntests(); 423 show_ntests();
424 # ---------- 424 # ----------
425 $td->notify("--- Form XObject, underlay, overlay ---"); 425 $td->notify("--- Form XObject, underlay, overlay ---");
426 -$n_tests += 19; 426 +$n_tests += 20;
427 427
428 $td->runtest("form xobject creation", 428 $td->runtest("form xobject creation",
429 {$td->COMMAND => "test_driver 55 fxo-red.pdf"}, 429 {$td->COMMAND => "test_driver 55 fxo-red.pdf"},
@@ -491,6 +491,11 @@ $td-&gt;runtest(&quot;foreach&quot;, @@ -491,6 +491,11 @@ $td-&gt;runtest(&quot;foreach&quot;,
491 {$td->FILE => "nested-form-xobjects.out", 491 {$td->FILE => "nested-form-xobjects.out",
492 $td->EXIT_STATUS => 0}, 492 $td->EXIT_STATUS => 0},
493 $td->NORMALIZE_NEWLINES); 493 $td->NORMALIZE_NEWLINES);
  494 +$td->runtest("page operations on form xobject",
  495 + {$td->COMMAND => "test_driver 72 nested-form-xobjects.pdf"},
  496 + {$td->FILE => "page-ops-on-form-xobject.out",
  497 + $td->EXIT_STATUS => 0},
  498 + $td->NORMALIZE_NEWLINES);
494 499
495 show_ntests(); 500 show_ntests();
496 # ---------- 501 # ----------
qpdf/qtest/qpdf/page-ops-on-form-xobject.out 0 โ†’ 100644
  1 +--- parseContents ---
  2 +content size: 173
  3 +operator, offset=0, length=2: BT
  4 +name, offset=5, length=3: /F1
  5 +integer, offset=9, length=2: 24
  6 +operator, offset=12, length=2: Tf
  7 +integer, offset=17, length=1: 0
  8 +integer, offset=19, length=3: 320
  9 +operator, offset=23, length=2: Td
  10 +string, offset=28, length=5: (FX1)
  11 +operator, offset=34, length=2: Tj
  12 +operator, offset=37, length=2: ET
  13 +operator, offset=40, length=1: q
  14 +integer, offset=42, length=3: 100
  15 +integer, offset=46, length=1: 0
  16 +integer, offset=48, length=1: 0
  17 +integer, offset=50, length=3: 100
  18 +integer, offset=54, length=3: 0
  19 +integer, offset=58, length=3: 200
  20 +operator, offset=62, length=2: cm
  21 +name, offset=65, length=4: /Im1
  22 +operator, offset=70, length=2: Do
  23 +operator, offset=73, length=1: Q
  24 +operator, offset=75, length=1: q
  25 +integer, offset=77, length=3: 100
  26 +integer, offset=81, length=1: 0
  27 +integer, offset=83, length=1: 0
  28 +integer, offset=85, length=3: 100
  29 +integer, offset=89, length=3: 120
  30 +integer, offset=93, length=3: 200
  31 +operator, offset=97, length=2: cm
  32 +name, offset=100, length=4: /Im2
  33 +operator, offset=105, length=2: Do
  34 +operator, offset=108, length=1: Q
  35 +operator, offset=110, length=1: q
  36 +real, offset=112, length=7: 1.00000
  37 +real, offset=120, length=7: 0.00000
  38 +real, offset=128, length=7: 0.00000
  39 +real, offset=136, length=7: 1.00000
  40 +real, offset=144, length=7: 0.00000
  41 +real, offset=152, length=7: 0.00000
  42 +operator, offset=160, length=2: cm
  43 +name, offset=163, length=4: /Fx1
  44 +operator, offset=168, length=2: Do
  45 +operator, offset=171, length=1: Q
  46 +-EOF-
  47 +test 72 done
qpdf/test_driver.cc
@@ -1463,7 +1463,7 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -1463,7 +1463,7 @@ void runtest(int n, char const* filename1, char const* arg2)
1463 { 1463 {
1464 QPDFPageObjectHelper& page(*iter); 1464 QPDFPageObjectHelper& page(*iter);
1465 ParserCallbacks cb; 1465 ParserCallbacks cb;
1466 - page.parsePageContents(&cb); 1466 + page.parseContents(&cb);
1467 } 1467 }
1468 } 1468 }
1469 else if (n == 38) 1469 else if (n == 38)
@@ -2279,6 +2279,27 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -2279,6 +2279,27 @@ void runtest(int n, char const* filename1, char const* arg2)
2279 std::cout << i.first << " -> " << i.second.unparse() << std::endl; 2279 std::cout << i.first << " -> " << i.second.unparse() << std::endl;
2280 } 2280 }
2281 } 2281 }
  2282 + else if (n == 72)
  2283 + {
  2284 + // Call some QPDFPageObjectHelper methods on form XObjects.
  2285 + auto page = QPDFPageDocumentHelper(pdf).getAllPages().at(0);
  2286 + auto fx1 = QPDFPageObjectHelper(
  2287 + page.getObjectHandle()
  2288 + .getKey("/Resources")
  2289 + .getKey("/XObject")
  2290 + .getKey("/Fx1"));
  2291 + std::cout << "--- parseContents ---" << std::endl;
  2292 + ParserCallbacks cb;
  2293 + fx1.parseContents(&cb);
  2294 + Pl_Buffer b("buffer");
  2295 + fx1.addContentTokenFilter(new TokenFilter);
  2296 + fx1.pipeContents(&b);
  2297 + std::unique_ptr<Buffer> buf(b.getBuffer());
  2298 + std::string s(
  2299 + reinterpret_cast<char const*>(buf->getBuffer()),
  2300 + buf->getSize());
  2301 + assert(s.find("/bye") != std::string::npos);
  2302 + }
2282 else 2303 else
2283 { 2304 {
2284 throw std::runtime_error(std::string("invalid test ") + 2305 throw std::runtime_error(std::string("invalid test ") +
qpdf/test_tokenizer.cc
@@ -219,7 +219,7 @@ static void process(char const* filename, bool include_ignorable, @@ -219,7 +219,7 @@ static void process(char const* filename, bool include_ignorable,
219 { 219 {
220 ++pageno; 220 ++pageno;
221 Pl_Buffer plb("buffer"); 221 Pl_Buffer plb("buffer");
222 - (*iter).pipePageContents(&plb); 222 + (*iter).pipeContents(&plb);
223 PointerHolder<Buffer> content_data = plb.getBuffer(); 223 PointerHolder<Buffer> content_data = plb.getBuffer();
224 BufferInputSource* bis = new BufferInputSource( 224 BufferInputSource* bis = new BufferInputSource(
225 "content data", content_data.getPointer()); 225 "content data", content_data.getPointer());