Commit 3be58f49e57da67cf79b61061d8b1a0f7ccf7cff
1 parent
98da4fd8
Make more QPDFPageObjectHelper methods work with form XObject
Showing
13 changed files
with
164 additions
and
13 deletions
ChangeLog
| 1 | 1 | 2021-01-02 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Make QPDFPageObjectHelper methods pipeContents, parseContents, | |
| 4 | + and addContentTokenFilter work with form XObjects. | |
| 5 | + | |
| 6 | + * Rename some QPDFPageObjectHelper methods and make them support | |
| 7 | + form XObjects as well as pages. The old names will be preserved | |
| 8 | + from compatibility. | |
| 9 | + - pipePageContents -> pipeContents | |
| 10 | + - parsePageContents -> parseContents | |
| 11 | + | |
| 12 | + * Add QPDFObjectHandle::parseAsContents to apply ParserCallbacks | |
| 13 | + to a form XObject. | |
| 14 | + | |
| 3 | 15 | * QPDFPageObjectHelper::externalizeInlineImages can be called with |
| 4 | 16 | form XObjects as well as pages. |
| 5 | 17 | ... | ... |
examples/pdf-parse-content.cc
fuzz/qpdf_fuzzer.cc
| ... | ... | @@ -142,7 +142,7 @@ FuzzHelper::testPages() |
| 142 | 142 | try |
| 143 | 143 | { |
| 144 | 144 | page.coalesceContentStreams(); |
| 145 | - page.parsePageContents(&discard_contents); | |
| 145 | + page.parseContents(&discard_contents); | |
| 146 | 146 | page.getImages(); |
| 147 | 147 | pldh.getLabelForPage(pageno); |
| 148 | 148 | QPDFObjectHandle page_obj(page.getObjectHandle()); | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -448,7 +448,7 @@ class QPDFObjectHandle |
| 448 | 448 | void parsePageContents(ParserCallbacks* callbacks); |
| 449 | 449 | QPDF_DLL |
| 450 | 450 | void filterPageContents(TokenFilter* filter, Pipeline* next = 0); |
| 451 | - // See comments for QPDFPageObjectHelper::pipePageContents. | |
| 451 | + // See comments for QPDFPageObjectHelper::pipeContents. | |
| 452 | 452 | QPDF_DLL |
| 453 | 453 | void pipePageContents(Pipeline* p); |
| 454 | 454 | QPDF_DLL |
| ... | ... | @@ -460,6 +460,10 @@ class QPDFObjectHandle |
| 460 | 460 | // XObject, whose data is in the same format as a content stream. |
| 461 | 461 | QPDF_DLL |
| 462 | 462 | void filterAsContents(TokenFilter* filter, Pipeline* next = 0); |
| 463 | + // Called on a stream to parse the stream as page contents. This | |
| 464 | + // can be used to parse a form XObject. | |
| 465 | + QPDF_DLL | |
| 466 | + void parseAsContents(ParserCallbacks* callbacks); | |
| 463 | 467 | |
| 464 | 468 | // Type-specific factories |
| 465 | 469 | QPDF_DLL | ... | ... |
include/qpdf/QPDFPageObjectHelper.hh
| ... | ... | @@ -182,7 +182,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper |
| 182 | 182 | |
| 183 | 183 | // Parse a page's contents through ParserCallbacks, described |
| 184 | 184 | // above. This method works whether the contents are a single |
| 185 | - // stream or an array of streams. Call on a page object. | |
| 185 | + // stream or an array of streams. Call on a page object. Also | |
| 186 | + // works for form XObjects. | |
| 187 | + QPDF_DLL | |
| 188 | + void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); | |
| 189 | + // Old name | |
| 186 | 190 | QPDF_DLL |
| 187 | 191 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); |
| 188 | 192 | |
| ... | ... | @@ -206,14 +210,17 @@ class QPDFPageObjectHelper: public QPDFObjectHelper |
| 206 | 210 | |
| 207 | 211 | // Pipe a page's contents through the given pipeline. This method |
| 208 | 212 | // works whether the contents are a single stream or an array of |
| 209 | - // streams. | |
| 213 | + // streams. Also works on form XObjects. | |
| 214 | + QPDF_DLL | |
| 215 | + void pipeContents(Pipeline* p); | |
| 216 | + // Old name | |
| 210 | 217 | QPDF_DLL |
| 211 | 218 | void pipePageContents(Pipeline* p); |
| 212 | 219 | |
| 213 | 220 | // Attach a token filter to a page's contents. If the page's |
| 214 | 221 | // contents is an array of streams, it is automatically coalesced. |
| 215 | 222 | // The token filter is applied to the page's contents as a single |
| 216 | - // stream. | |
| 223 | + // stream. Also works on form XObjects. | |
| 217 | 224 | QPDF_DLL |
| 218 | 225 | void addContentTokenFilter( |
| 219 | 226 | PointerHolder<QPDFObjectHandle::TokenFilter> token_filter); | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -1669,6 +1669,15 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) |
| 1669 | 1669 | } |
| 1670 | 1670 | |
| 1671 | 1671 | void |
| 1672 | +QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks) | |
| 1673 | +{ | |
| 1674 | + std::string description = "object " + | |
| 1675 | + QUtil::int_to_string(this->objid) + " " + | |
| 1676 | + QUtil::int_to_string(this->generation); | |
| 1677 | + this->parseContentStream_internal(description, callbacks); | |
| 1678 | +} | |
| 1679 | + | |
| 1680 | +void | |
| 1672 | 1681 | QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) |
| 1673 | 1682 | { |
| 1674 | 1683 | std::string description = "token filter for page object " + | ... | ... |
libqpdf/QPDFPageObjectHelper.cc
| ... | ... | @@ -584,7 +584,21 @@ void |
| 584 | 584 | QPDFPageObjectHelper::parsePageContents( |
| 585 | 585 | QPDFObjectHandle::ParserCallbacks* callbacks) |
| 586 | 586 | { |
| 587 | - this->oh.parsePageContents(callbacks); | |
| 587 | + parseContents(callbacks); | |
| 588 | +} | |
| 589 | + | |
| 590 | +void | |
| 591 | +QPDFPageObjectHelper::parseContents( | |
| 592 | + QPDFObjectHandle::ParserCallbacks* callbacks) | |
| 593 | +{ | |
| 594 | + if (this->oh.isFormXObject()) | |
| 595 | + { | |
| 596 | + this->oh.parseAsContents(callbacks); | |
| 597 | + } | |
| 598 | + else | |
| 599 | + { | |
| 600 | + this->oh.parsePageContents(callbacks); | |
| 601 | + } | |
| 588 | 602 | } |
| 589 | 603 | |
| 590 | 604 | void |
| ... | ... | @@ -613,14 +627,34 @@ QPDFPageObjectHelper::filterContents( |
| 613 | 627 | void |
| 614 | 628 | QPDFPageObjectHelper::pipePageContents(Pipeline* p) |
| 615 | 629 | { |
| 616 | - this->oh.pipePageContents(p); | |
| 630 | + pipeContents(p); | |
| 631 | +} | |
| 632 | + | |
| 633 | +void | |
| 634 | +QPDFPageObjectHelper::pipeContents(Pipeline* p) | |
| 635 | +{ | |
| 636 | + if (this->oh.isFormXObject()) | |
| 637 | + { | |
| 638 | + this->oh.pipeStreamData(p, 0, qpdf_dl_specialized); | |
| 639 | + } | |
| 640 | + else | |
| 641 | + { | |
| 642 | + this->oh.pipePageContents(p); | |
| 643 | + } | |
| 617 | 644 | } |
| 618 | 645 | |
| 619 | 646 | void |
| 620 | 647 | QPDFPageObjectHelper::addContentTokenFilter( |
| 621 | 648 | PointerHolder<QPDFObjectHandle::TokenFilter> token_filter) |
| 622 | 649 | { |
| 623 | - this->oh.addContentTokenFilter(token_filter); | |
| 650 | + if (this->oh.isFormXObject()) | |
| 651 | + { | |
| 652 | + this->oh.addTokenFilter(token_filter); | |
| 653 | + } | |
| 654 | + else | |
| 655 | + { | |
| 656 | + this->oh.addContentTokenFilter(token_filter); | |
| 657 | + } | |
| 624 | 658 | } |
| 625 | 659 | |
| 626 | 660 | class NameWatcher: public QPDFObjectHandle::TokenFilter | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -4893,6 +4893,18 @@ print "\n"; |
| 4893 | 4893 | <function>filterContents</function> |
| 4894 | 4894 | </para> |
| 4895 | 4895 | </listitem> |
| 4896 | + <listitem> | |
| 4897 | + <para> | |
| 4898 | + <function>pipePageContents</function> to | |
| 4899 | + <function>pipeContents</function> | |
| 4900 | + </para> | |
| 4901 | + </listitem> | |
| 4902 | + <listitem> | |
| 4903 | + <para> | |
| 4904 | + <function>parsePageContents</function> to | |
| 4905 | + <function>parseContents</function> | |
| 4906 | + </para> | |
| 4907 | + </listitem> | |
| 4896 | 4908 | </itemizedlist> |
| 4897 | 4909 | </para> |
| 4898 | 4910 | </listitem> | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -3539,7 +3539,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code) |
| 3539 | 3539 | ++pageno; |
| 3540 | 3540 | try |
| 3541 | 3541 | { |
| 3542 | - page.parsePageContents(&discard_contents); | |
| 3542 | + page.parseContents(&discard_contents); | |
| 3543 | 3543 | } |
| 3544 | 3544 | catch (QPDFExc& e) |
| 3545 | 3545 | { | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -423,7 +423,7 @@ foreach my $i (@choice_values) |
| 423 | 423 | show_ntests(); |
| 424 | 424 | # ---------- |
| 425 | 425 | $td->notify("--- Form XObject, underlay, overlay ---"); |
| 426 | -$n_tests += 19; | |
| 426 | +$n_tests += 20; | |
| 427 | 427 | |
| 428 | 428 | $td->runtest("form xobject creation", |
| 429 | 429 | {$td->COMMAND => "test_driver 55 fxo-red.pdf"}, |
| ... | ... | @@ -491,6 +491,11 @@ $td->runtest("foreach", |
| 491 | 491 | {$td->FILE => "nested-form-xobjects.out", |
| 492 | 492 | $td->EXIT_STATUS => 0}, |
| 493 | 493 | $td->NORMALIZE_NEWLINES); |
| 494 | +$td->runtest("page operations on form xobject", | |
| 495 | + {$td->COMMAND => "test_driver 72 nested-form-xobjects.pdf"}, | |
| 496 | + {$td->FILE => "page-ops-on-form-xobject.out", | |
| 497 | + $td->EXIT_STATUS => 0}, | |
| 498 | + $td->NORMALIZE_NEWLINES); | |
| 494 | 499 | |
| 495 | 500 | show_ntests(); |
| 496 | 501 | # ---------- | ... | ... |
qpdf/qtest/qpdf/page-ops-on-form-xobject.out
0 → 100644
| 1 | +--- parseContents --- | |
| 2 | +content size: 173 | |
| 3 | +operator, offset=0, length=2: BT | |
| 4 | +name, offset=5, length=3: /F1 | |
| 5 | +integer, offset=9, length=2: 24 | |
| 6 | +operator, offset=12, length=2: Tf | |
| 7 | +integer, offset=17, length=1: 0 | |
| 8 | +integer, offset=19, length=3: 320 | |
| 9 | +operator, offset=23, length=2: Td | |
| 10 | +string, offset=28, length=5: (FX1) | |
| 11 | +operator, offset=34, length=2: Tj | |
| 12 | +operator, offset=37, length=2: ET | |
| 13 | +operator, offset=40, length=1: q | |
| 14 | +integer, offset=42, length=3: 100 | |
| 15 | +integer, offset=46, length=1: 0 | |
| 16 | +integer, offset=48, length=1: 0 | |
| 17 | +integer, offset=50, length=3: 100 | |
| 18 | +integer, offset=54, length=3: 0 | |
| 19 | +integer, offset=58, length=3: 200 | |
| 20 | +operator, offset=62, length=2: cm | |
| 21 | +name, offset=65, length=4: /Im1 | |
| 22 | +operator, offset=70, length=2: Do | |
| 23 | +operator, offset=73, length=1: Q | |
| 24 | +operator, offset=75, length=1: q | |
| 25 | +integer, offset=77, length=3: 100 | |
| 26 | +integer, offset=81, length=1: 0 | |
| 27 | +integer, offset=83, length=1: 0 | |
| 28 | +integer, offset=85, length=3: 100 | |
| 29 | +integer, offset=89, length=3: 120 | |
| 30 | +integer, offset=93, length=3: 200 | |
| 31 | +operator, offset=97, length=2: cm | |
| 32 | +name, offset=100, length=4: /Im2 | |
| 33 | +operator, offset=105, length=2: Do | |
| 34 | +operator, offset=108, length=1: Q | |
| 35 | +operator, offset=110, length=1: q | |
| 36 | +real, offset=112, length=7: 1.00000 | |
| 37 | +real, offset=120, length=7: 0.00000 | |
| 38 | +real, offset=128, length=7: 0.00000 | |
| 39 | +real, offset=136, length=7: 1.00000 | |
| 40 | +real, offset=144, length=7: 0.00000 | |
| 41 | +real, offset=152, length=7: 0.00000 | |
| 42 | +operator, offset=160, length=2: cm | |
| 43 | +name, offset=163, length=4: /Fx1 | |
| 44 | +operator, offset=168, length=2: Do | |
| 45 | +operator, offset=171, length=1: Q | |
| 46 | +-EOF- | |
| 47 | +test 72 done | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -1463,7 +1463,7 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 1463 | 1463 | { |
| 1464 | 1464 | QPDFPageObjectHelper& page(*iter); |
| 1465 | 1465 | ParserCallbacks cb; |
| 1466 | - page.parsePageContents(&cb); | |
| 1466 | + page.parseContents(&cb); | |
| 1467 | 1467 | } |
| 1468 | 1468 | } |
| 1469 | 1469 | else if (n == 38) |
| ... | ... | @@ -2279,6 +2279,27 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 2279 | 2279 | std::cout << i.first << " -> " << i.second.unparse() << std::endl; |
| 2280 | 2280 | } |
| 2281 | 2281 | } |
| 2282 | + else if (n == 72) | |
| 2283 | + { | |
| 2284 | + // Call some QPDFPageObjectHelper methods on form XObjects. | |
| 2285 | + auto page = QPDFPageDocumentHelper(pdf).getAllPages().at(0); | |
| 2286 | + auto fx1 = QPDFPageObjectHelper( | |
| 2287 | + page.getObjectHandle() | |
| 2288 | + .getKey("/Resources") | |
| 2289 | + .getKey("/XObject") | |
| 2290 | + .getKey("/Fx1")); | |
| 2291 | + std::cout << "--- parseContents ---" << std::endl; | |
| 2292 | + ParserCallbacks cb; | |
| 2293 | + fx1.parseContents(&cb); | |
| 2294 | + Pl_Buffer b("buffer"); | |
| 2295 | + fx1.addContentTokenFilter(new TokenFilter); | |
| 2296 | + fx1.pipeContents(&b); | |
| 2297 | + std::unique_ptr<Buffer> buf(b.getBuffer()); | |
| 2298 | + std::string s( | |
| 2299 | + reinterpret_cast<char const*>(buf->getBuffer()), | |
| 2300 | + buf->getSize()); | |
| 2301 | + assert(s.find("/bye") != std::string::npos); | |
| 2302 | + } | |
| 2282 | 2303 | else |
| 2283 | 2304 | { |
| 2284 | 2305 | throw std::runtime_error(std::string("invalid test ") + | ... | ... |
qpdf/test_tokenizer.cc
| ... | ... | @@ -219,7 +219,7 @@ static void process(char const* filename, bool include_ignorable, |
| 219 | 219 | { |
| 220 | 220 | ++pageno; |
| 221 | 221 | Pl_Buffer plb("buffer"); |
| 222 | - (*iter).pipePageContents(&plb); | |
| 222 | + (*iter).pipeContents(&plb); | |
| 223 | 223 | PointerHolder<Buffer> content_data = plb.getBuffer(); |
| 224 | 224 | BufferInputSource* bis = new BufferInputSource( |
| 225 | 225 | "content data", content_data.getPointer()); | ... | ... |