Commit 3be58f49e57da67cf79b61061d8b1a0f7ccf7cff
1 parent
98da4fd8
Make more QPDFPageObjectHelper methods work with form XObject
Showing
13 changed files
with
164 additions
and
13 deletions
ChangeLog
| 1 | 2021-01-02 Jay Berkenbilt <ejb@ql.org> | 1 | 2021-01-02 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Make QPDFPageObjectHelper methods pipeContents, parseContents, | ||
| 4 | + and addContentTokenFilter work with form XObjects. | ||
| 5 | + | ||
| 6 | + * Rename some QPDFPageObjectHelper methods and make them support | ||
| 7 | + form XObjects as well as pages. The old names will be preserved | ||
| 8 | + from compatibility. | ||
| 9 | + - pipePageContents -> pipeContents | ||
| 10 | + - parsePageContents -> parseContents | ||
| 11 | + | ||
| 12 | + * Add QPDFObjectHandle::parseAsContents to apply ParserCallbacks | ||
| 13 | + to a form XObject. | ||
| 14 | + | ||
| 3 | * QPDFPageObjectHelper::externalizeInlineImages can be called with | 15 | * QPDFPageObjectHelper::externalizeInlineImages can be called with |
| 4 | form XObjects as well as pages. | 16 | form XObjects as well as pages. |
| 5 | 17 |
examples/pdf-parse-content.cc
| @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) | @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) | ||
| 89 | 89 | ||
| 90 | QPDFPageObjectHelper& page = pages.at(QIntC::to_size(pageno-1)); | 90 | QPDFPageObjectHelper& page = pages.at(QIntC::to_size(pageno-1)); |
| 91 | ParserCallbacks cb; | 91 | ParserCallbacks cb; |
| 92 | - page.parsePageContents(&cb); | 92 | + page.parseContents(&cb); |
| 93 | } | 93 | } |
| 94 | catch (std::exception& e) | 94 | catch (std::exception& e) |
| 95 | { | 95 | { |
fuzz/qpdf_fuzzer.cc
| @@ -142,7 +142,7 @@ FuzzHelper::testPages() | @@ -142,7 +142,7 @@ FuzzHelper::testPages() | ||
| 142 | try | 142 | try |
| 143 | { | 143 | { |
| 144 | page.coalesceContentStreams(); | 144 | page.coalesceContentStreams(); |
| 145 | - page.parsePageContents(&discard_contents); | 145 | + page.parseContents(&discard_contents); |
| 146 | page.getImages(); | 146 | page.getImages(); |
| 147 | pldh.getLabelForPage(pageno); | 147 | pldh.getLabelForPage(pageno); |
| 148 | QPDFObjectHandle page_obj(page.getObjectHandle()); | 148 | QPDFObjectHandle page_obj(page.getObjectHandle()); |
include/qpdf/QPDFObjectHandle.hh
| @@ -448,7 +448,7 @@ class QPDFObjectHandle | @@ -448,7 +448,7 @@ class QPDFObjectHandle | ||
| 448 | void parsePageContents(ParserCallbacks* callbacks); | 448 | void parsePageContents(ParserCallbacks* callbacks); |
| 449 | QPDF_DLL | 449 | QPDF_DLL |
| 450 | void filterPageContents(TokenFilter* filter, Pipeline* next = 0); | 450 | void filterPageContents(TokenFilter* filter, Pipeline* next = 0); |
| 451 | - // See comments for QPDFPageObjectHelper::pipePageContents. | 451 | + // See comments for QPDFPageObjectHelper::pipeContents. |
| 452 | QPDF_DLL | 452 | QPDF_DLL |
| 453 | void pipePageContents(Pipeline* p); | 453 | void pipePageContents(Pipeline* p); |
| 454 | QPDF_DLL | 454 | QPDF_DLL |
| @@ -460,6 +460,10 @@ class QPDFObjectHandle | @@ -460,6 +460,10 @@ class QPDFObjectHandle | ||
| 460 | // XObject, whose data is in the same format as a content stream. | 460 | // XObject, whose data is in the same format as a content stream. |
| 461 | QPDF_DLL | 461 | QPDF_DLL |
| 462 | void filterAsContents(TokenFilter* filter, Pipeline* next = 0); | 462 | void filterAsContents(TokenFilter* filter, Pipeline* next = 0); |
| 463 | + // Called on a stream to parse the stream as page contents. This | ||
| 464 | + // can be used to parse a form XObject. | ||
| 465 | + QPDF_DLL | ||
| 466 | + void parseAsContents(ParserCallbacks* callbacks); | ||
| 463 | 467 | ||
| 464 | // Type-specific factories | 468 | // Type-specific factories |
| 465 | QPDF_DLL | 469 | QPDF_DLL |
include/qpdf/QPDFPageObjectHelper.hh
| @@ -182,7 +182,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -182,7 +182,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 182 | 182 | ||
| 183 | // Parse a page's contents through ParserCallbacks, described | 183 | // Parse a page's contents through ParserCallbacks, described |
| 184 | // above. This method works whether the contents are a single | 184 | // above. This method works whether the contents are a single |
| 185 | - // stream or an array of streams. Call on a page object. | 185 | + // stream or an array of streams. Call on a page object. Also |
| 186 | + // works for form XObjects. | ||
| 187 | + QPDF_DLL | ||
| 188 | + void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); | ||
| 189 | + // Old name | ||
| 186 | QPDF_DLL | 190 | QPDF_DLL |
| 187 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); | 191 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); |
| 188 | 192 | ||
| @@ -206,14 +210,17 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -206,14 +210,17 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 206 | 210 | ||
| 207 | // Pipe a page's contents through the given pipeline. This method | 211 | // Pipe a page's contents through the given pipeline. This method |
| 208 | // works whether the contents are a single stream or an array of | 212 | // works whether the contents are a single stream or an array of |
| 209 | - // streams. | 213 | + // streams. Also works on form XObjects. |
| 214 | + QPDF_DLL | ||
| 215 | + void pipeContents(Pipeline* p); | ||
| 216 | + // Old name | ||
| 210 | QPDF_DLL | 217 | QPDF_DLL |
| 211 | void pipePageContents(Pipeline* p); | 218 | void pipePageContents(Pipeline* p); |
| 212 | 219 | ||
| 213 | // Attach a token filter to a page's contents. If the page's | 220 | // Attach a token filter to a page's contents. If the page's |
| 214 | // contents is an array of streams, it is automatically coalesced. | 221 | // contents is an array of streams, it is automatically coalesced. |
| 215 | // The token filter is applied to the page's contents as a single | 222 | // The token filter is applied to the page's contents as a single |
| 216 | - // stream. | 223 | + // stream. Also works on form XObjects. |
| 217 | QPDF_DLL | 224 | QPDF_DLL |
| 218 | void addContentTokenFilter( | 225 | void addContentTokenFilter( |
| 219 | PointerHolder<QPDFObjectHandle::TokenFilter> token_filter); | 226 | PointerHolder<QPDFObjectHandle::TokenFilter> token_filter); |
libqpdf/QPDFObjectHandle.cc
| @@ -1669,6 +1669,15 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) | @@ -1669,6 +1669,15 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) | ||
| 1669 | } | 1669 | } |
| 1670 | 1670 | ||
| 1671 | void | 1671 | void |
| 1672 | +QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks) | ||
| 1673 | +{ | ||
| 1674 | + std::string description = "object " + | ||
| 1675 | + QUtil::int_to_string(this->objid) + " " + | ||
| 1676 | + QUtil::int_to_string(this->generation); | ||
| 1677 | + this->parseContentStream_internal(description, callbacks); | ||
| 1678 | +} | ||
| 1679 | + | ||
| 1680 | +void | ||
| 1672 | QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) | 1681 | QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) |
| 1673 | { | 1682 | { |
| 1674 | std::string description = "token filter for page object " + | 1683 | std::string description = "token filter for page object " + |
libqpdf/QPDFPageObjectHelper.cc
| @@ -584,7 +584,21 @@ void | @@ -584,7 +584,21 @@ void | ||
| 584 | QPDFPageObjectHelper::parsePageContents( | 584 | QPDFPageObjectHelper::parsePageContents( |
| 585 | QPDFObjectHandle::ParserCallbacks* callbacks) | 585 | QPDFObjectHandle::ParserCallbacks* callbacks) |
| 586 | { | 586 | { |
| 587 | - this->oh.parsePageContents(callbacks); | 587 | + parseContents(callbacks); |
| 588 | +} | ||
| 589 | + | ||
| 590 | +void | ||
| 591 | +QPDFPageObjectHelper::parseContents( | ||
| 592 | + QPDFObjectHandle::ParserCallbacks* callbacks) | ||
| 593 | +{ | ||
| 594 | + if (this->oh.isFormXObject()) | ||
| 595 | + { | ||
| 596 | + this->oh.parseAsContents(callbacks); | ||
| 597 | + } | ||
| 598 | + else | ||
| 599 | + { | ||
| 600 | + this->oh.parsePageContents(callbacks); | ||
| 601 | + } | ||
| 588 | } | 602 | } |
| 589 | 603 | ||
| 590 | void | 604 | void |
| @@ -613,14 +627,34 @@ QPDFPageObjectHelper::filterContents( | @@ -613,14 +627,34 @@ QPDFPageObjectHelper::filterContents( | ||
| 613 | void | 627 | void |
| 614 | QPDFPageObjectHelper::pipePageContents(Pipeline* p) | 628 | QPDFPageObjectHelper::pipePageContents(Pipeline* p) |
| 615 | { | 629 | { |
| 616 | - this->oh.pipePageContents(p); | 630 | + pipeContents(p); |
| 631 | +} | ||
| 632 | + | ||
| 633 | +void | ||
| 634 | +QPDFPageObjectHelper::pipeContents(Pipeline* p) | ||
| 635 | +{ | ||
| 636 | + if (this->oh.isFormXObject()) | ||
| 637 | + { | ||
| 638 | + this->oh.pipeStreamData(p, 0, qpdf_dl_specialized); | ||
| 639 | + } | ||
| 640 | + else | ||
| 641 | + { | ||
| 642 | + this->oh.pipePageContents(p); | ||
| 643 | + } | ||
| 617 | } | 644 | } |
| 618 | 645 | ||
| 619 | void | 646 | void |
| 620 | QPDFPageObjectHelper::addContentTokenFilter( | 647 | QPDFPageObjectHelper::addContentTokenFilter( |
| 621 | PointerHolder<QPDFObjectHandle::TokenFilter> token_filter) | 648 | PointerHolder<QPDFObjectHandle::TokenFilter> token_filter) |
| 622 | { | 649 | { |
| 623 | - this->oh.addContentTokenFilter(token_filter); | 650 | + if (this->oh.isFormXObject()) |
| 651 | + { | ||
| 652 | + this->oh.addTokenFilter(token_filter); | ||
| 653 | + } | ||
| 654 | + else | ||
| 655 | + { | ||
| 656 | + this->oh.addContentTokenFilter(token_filter); | ||
| 657 | + } | ||
| 624 | } | 658 | } |
| 625 | 659 | ||
| 626 | class NameWatcher: public QPDFObjectHandle::TokenFilter | 660 | class NameWatcher: public QPDFObjectHandle::TokenFilter |
manual/qpdf-manual.xml
| @@ -4893,6 +4893,18 @@ print "\n"; | @@ -4893,6 +4893,18 @@ print "\n"; | ||
| 4893 | <function>filterContents</function> | 4893 | <function>filterContents</function> |
| 4894 | </para> | 4894 | </para> |
| 4895 | </listitem> | 4895 | </listitem> |
| 4896 | + <listitem> | ||
| 4897 | + <para> | ||
| 4898 | + <function>pipePageContents</function> to | ||
| 4899 | + <function>pipeContents</function> | ||
| 4900 | + </para> | ||
| 4901 | + </listitem> | ||
| 4902 | + <listitem> | ||
| 4903 | + <para> | ||
| 4904 | + <function>parsePageContents</function> to | ||
| 4905 | + <function>parseContents</function> | ||
| 4906 | + </para> | ||
| 4907 | + </listitem> | ||
| 4896 | </itemizedlist> | 4908 | </itemizedlist> |
| 4897 | </para> | 4909 | </para> |
| 4898 | </listitem> | 4910 | </listitem> |
qpdf/qpdf.cc
| @@ -3539,7 +3539,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code) | @@ -3539,7 +3539,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code) | ||
| 3539 | ++pageno; | 3539 | ++pageno; |
| 3540 | try | 3540 | try |
| 3541 | { | 3541 | { |
| 3542 | - page.parsePageContents(&discard_contents); | 3542 | + page.parseContents(&discard_contents); |
| 3543 | } | 3543 | } |
| 3544 | catch (QPDFExc& e) | 3544 | catch (QPDFExc& e) |
| 3545 | { | 3545 | { |
qpdf/qtest/qpdf.test
| @@ -423,7 +423,7 @@ foreach my $i (@choice_values) | @@ -423,7 +423,7 @@ foreach my $i (@choice_values) | ||
| 423 | show_ntests(); | 423 | show_ntests(); |
| 424 | # ---------- | 424 | # ---------- |
| 425 | $td->notify("--- Form XObject, underlay, overlay ---"); | 425 | $td->notify("--- Form XObject, underlay, overlay ---"); |
| 426 | -$n_tests += 19; | 426 | +$n_tests += 20; |
| 427 | 427 | ||
| 428 | $td->runtest("form xobject creation", | 428 | $td->runtest("form xobject creation", |
| 429 | {$td->COMMAND => "test_driver 55 fxo-red.pdf"}, | 429 | {$td->COMMAND => "test_driver 55 fxo-red.pdf"}, |
| @@ -491,6 +491,11 @@ $td->runtest("foreach", | @@ -491,6 +491,11 @@ $td->runtest("foreach", | ||
| 491 | {$td->FILE => "nested-form-xobjects.out", | 491 | {$td->FILE => "nested-form-xobjects.out", |
| 492 | $td->EXIT_STATUS => 0}, | 492 | $td->EXIT_STATUS => 0}, |
| 493 | $td->NORMALIZE_NEWLINES); | 493 | $td->NORMALIZE_NEWLINES); |
| 494 | +$td->runtest("page operations on form xobject", | ||
| 495 | + {$td->COMMAND => "test_driver 72 nested-form-xobjects.pdf"}, | ||
| 496 | + {$td->FILE => "page-ops-on-form-xobject.out", | ||
| 497 | + $td->EXIT_STATUS => 0}, | ||
| 498 | + $td->NORMALIZE_NEWLINES); | ||
| 494 | 499 | ||
| 495 | show_ntests(); | 500 | show_ntests(); |
| 496 | # ---------- | 501 | # ---------- |
qpdf/qtest/qpdf/page-ops-on-form-xobject.out
0 โ 100644
| 1 | +--- parseContents --- | ||
| 2 | +content size: 173 | ||
| 3 | +operator, offset=0, length=2: BT | ||
| 4 | +name, offset=5, length=3: /F1 | ||
| 5 | +integer, offset=9, length=2: 24 | ||
| 6 | +operator, offset=12, length=2: Tf | ||
| 7 | +integer, offset=17, length=1: 0 | ||
| 8 | +integer, offset=19, length=3: 320 | ||
| 9 | +operator, offset=23, length=2: Td | ||
| 10 | +string, offset=28, length=5: (FX1) | ||
| 11 | +operator, offset=34, length=2: Tj | ||
| 12 | +operator, offset=37, length=2: ET | ||
| 13 | +operator, offset=40, length=1: q | ||
| 14 | +integer, offset=42, length=3: 100 | ||
| 15 | +integer, offset=46, length=1: 0 | ||
| 16 | +integer, offset=48, length=1: 0 | ||
| 17 | +integer, offset=50, length=3: 100 | ||
| 18 | +integer, offset=54, length=3: 0 | ||
| 19 | +integer, offset=58, length=3: 200 | ||
| 20 | +operator, offset=62, length=2: cm | ||
| 21 | +name, offset=65, length=4: /Im1 | ||
| 22 | +operator, offset=70, length=2: Do | ||
| 23 | +operator, offset=73, length=1: Q | ||
| 24 | +operator, offset=75, length=1: q | ||
| 25 | +integer, offset=77, length=3: 100 | ||
| 26 | +integer, offset=81, length=1: 0 | ||
| 27 | +integer, offset=83, length=1: 0 | ||
| 28 | +integer, offset=85, length=3: 100 | ||
| 29 | +integer, offset=89, length=3: 120 | ||
| 30 | +integer, offset=93, length=3: 200 | ||
| 31 | +operator, offset=97, length=2: cm | ||
| 32 | +name, offset=100, length=4: /Im2 | ||
| 33 | +operator, offset=105, length=2: Do | ||
| 34 | +operator, offset=108, length=1: Q | ||
| 35 | +operator, offset=110, length=1: q | ||
| 36 | +real, offset=112, length=7: 1.00000 | ||
| 37 | +real, offset=120, length=7: 0.00000 | ||
| 38 | +real, offset=128, length=7: 0.00000 | ||
| 39 | +real, offset=136, length=7: 1.00000 | ||
| 40 | +real, offset=144, length=7: 0.00000 | ||
| 41 | +real, offset=152, length=7: 0.00000 | ||
| 42 | +operator, offset=160, length=2: cm | ||
| 43 | +name, offset=163, length=4: /Fx1 | ||
| 44 | +operator, offset=168, length=2: Do | ||
| 45 | +operator, offset=171, length=1: Q | ||
| 46 | +-EOF- | ||
| 47 | +test 72 done |
qpdf/test_driver.cc
| @@ -1463,7 +1463,7 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -1463,7 +1463,7 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 1463 | { | 1463 | { |
| 1464 | QPDFPageObjectHelper& page(*iter); | 1464 | QPDFPageObjectHelper& page(*iter); |
| 1465 | ParserCallbacks cb; | 1465 | ParserCallbacks cb; |
| 1466 | - page.parsePageContents(&cb); | 1466 | + page.parseContents(&cb); |
| 1467 | } | 1467 | } |
| 1468 | } | 1468 | } |
| 1469 | else if (n == 38) | 1469 | else if (n == 38) |
| @@ -2279,6 +2279,27 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -2279,6 +2279,27 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 2279 | std::cout << i.first << " -> " << i.second.unparse() << std::endl; | 2279 | std::cout << i.first << " -> " << i.second.unparse() << std::endl; |
| 2280 | } | 2280 | } |
| 2281 | } | 2281 | } |
| 2282 | + else if (n == 72) | ||
| 2283 | + { | ||
| 2284 | + // Call some QPDFPageObjectHelper methods on form XObjects. | ||
| 2285 | + auto page = QPDFPageDocumentHelper(pdf).getAllPages().at(0); | ||
| 2286 | + auto fx1 = QPDFPageObjectHelper( | ||
| 2287 | + page.getObjectHandle() | ||
| 2288 | + .getKey("/Resources") | ||
| 2289 | + .getKey("/XObject") | ||
| 2290 | + .getKey("/Fx1")); | ||
| 2291 | + std::cout << "--- parseContents ---" << std::endl; | ||
| 2292 | + ParserCallbacks cb; | ||
| 2293 | + fx1.parseContents(&cb); | ||
| 2294 | + Pl_Buffer b("buffer"); | ||
| 2295 | + fx1.addContentTokenFilter(new TokenFilter); | ||
| 2296 | + fx1.pipeContents(&b); | ||
| 2297 | + std::unique_ptr<Buffer> buf(b.getBuffer()); | ||
| 2298 | + std::string s( | ||
| 2299 | + reinterpret_cast<char const*>(buf->getBuffer()), | ||
| 2300 | + buf->getSize()); | ||
| 2301 | + assert(s.find("/bye") != std::string::npos); | ||
| 2302 | + } | ||
| 2282 | else | 2303 | else |
| 2283 | { | 2304 | { |
| 2284 | throw std::runtime_error(std::string("invalid test ") + | 2305 | throw std::runtime_error(std::string("invalid test ") + |
qpdf/test_tokenizer.cc
| @@ -219,7 +219,7 @@ static void process(char const* filename, bool include_ignorable, | @@ -219,7 +219,7 @@ static void process(char const* filename, bool include_ignorable, | ||
| 219 | { | 219 | { |
| 220 | ++pageno; | 220 | ++pageno; |
| 221 | Pl_Buffer plb("buffer"); | 221 | Pl_Buffer plb("buffer"); |
| 222 | - (*iter).pipePageContents(&plb); | 222 | + (*iter).pipeContents(&plb); |
| 223 | PointerHolder<Buffer> content_data = plb.getBuffer(); | 223 | PointerHolder<Buffer> content_data = plb.getBuffer(); |
| 224 | BufferInputSource* bis = new BufferInputSource( | 224 | BufferInputSource* bis = new BufferInputSource( |
| 225 | "content data", content_data.getPointer()); | 225 | "content data", content_data.getPointer()); |