Commit 6154221edbb0e17e77da7defeeac5fe53121ef57
1 parent
63ea4619
QPDFPageObjectHelper: filterPageContents -> filterContents + form XObject
Showing
5 changed files
with
51 additions
and
47 deletions
ChangeLog
| 1 | 1 | 2020-12-31 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | - * Rename QPDFPageObjectHelper::getPageImages to | |
| 4 | - QPDFPageObjectHelper::getImages and make it support form XObjects | |
| 5 | - as well as pages. The old name will be preserved for | |
| 6 | - compatibility. | |
| 3 | + * Rename some QPDFPageObjectHelper methods and make them support | |
| 4 | + form XObjects as well as pages. The old names will be preserved | |
| 5 | + from compatibility. | |
| 6 | + - getPageImages -> getImages | |
| 7 | + - filterPageContents -> filterContents | |
| 7 | 8 | |
| 8 | 9 | * Add QPDFObjectHandle::isFormXObject to test whether an object is |
| 9 | 10 | a form XObject. | ... | ... |
examples/pdf-count-strings.cc
| 1 | 1 | // |
| 2 | 2 | // This example illustrates the use of QPDFObjectHandle::TokenFilter |
| 3 | -// with filterPageContents. See also pdf-filter-tokens.cc for an | |
| 4 | -// example that uses QPDFObjectHandle::TokenFilter with | |
| 5 | -// addContentTokenFilter. | |
| 3 | +// with filterContents. See also pdf-filter-tokens.cc for an example | |
| 4 | +// that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter. | |
| 6 | 5 | // |
| 7 | 6 | |
| 8 | 7 | #include <iostream> |
| ... | ... | @@ -108,14 +107,14 @@ int main(int argc, char* argv[]) |
| 108 | 107 | if (pageno % 2) |
| 109 | 108 | { |
| 110 | 109 | // Ignore output for odd pages. |
| 111 | - ph.filterPageContents(&counter); | |
| 110 | + ph.filterContents(&counter); | |
| 112 | 111 | } |
| 113 | 112 | else |
| 114 | 113 | { |
| 115 | 114 | // Write output to stdout for even pages. |
| 116 | 115 | Pl_StdioFile out("stdout", stdout); |
| 117 | 116 | std::cout << "% Contents of page " << pageno << std::endl; |
| 118 | - ph.filterPageContents(&counter, &out); | |
| 117 | + ph.filterContents(&counter, &out); | |
| 119 | 118 | std::cout << "\n% end " << pageno << std::endl; |
| 120 | 119 | } |
| 121 | 120 | std::cout << "Page " << pageno | ... | ... |
examples/pdf-filter-tokens.cc
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | // This example illustrates the use of QPDFObjectHandle::TokenFilter |
| 3 | 3 | // with addContentTokenFilter. Please see comments inline for details. |
| 4 | 4 | // See also pdf-count-strings.cc for a use of |
| 5 | -// QPDFObjectHandle::TokenFilter with filterPageContents. | |
| 5 | +// QPDFObjectHandle::TokenFilter with filterContents. | |
| 6 | 6 | // |
| 7 | 7 | |
| 8 | 8 | #include <iostream> | ... | ... |
include/qpdf/QPDFPageObjectHelper.hh
| ... | ... | @@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper |
| 140 | 140 | QPDF_DLL |
| 141 | 141 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); |
| 142 | 142 | |
| 143 | - // Pass a page's contents through the given TokenFilter. If a | |
| 144 | - // pipeline is also provided, it will be the target of the write | |
| 145 | - // methods from the token filter. If a pipeline is not specified, | |
| 146 | - // any output generated by the token filter will be discarded. Use | |
| 147 | - // this interface if you need to pass a page's contents through | |
| 148 | - // filter for work purposes without having that filter | |
| 149 | - // automatically applied to the page's contents, as happens with | |
| 150 | - // addContentTokenFilter. See examples/pdf-count-strings.cc for an | |
| 151 | - // example. | |
| 143 | + // Pass a page's or form XObject's contents through the given | |
| 144 | + // TokenFilter. If a pipeline is also provided, it will be the | |
| 145 | + // target of the write methods from the token filter. If a | |
| 146 | + // pipeline is not specified, any output generated by the token | |
| 147 | + // filter will be discarded. Use this interface if you need to | |
| 148 | + // pass a page's contents through filter for work purposes without | |
| 149 | + // having that filter automatically applied to the page's | |
| 150 | + // contents, as happens with addContentTokenFilter. See | |
| 151 | + // examples/pdf-count-strings.cc for an example. | |
| 152 | + QPDF_DLL | |
| 153 | + void filterContents(QPDFObjectHandle::TokenFilter* filter, | |
| 154 | + Pipeline* next = 0); | |
| 155 | + | |
| 156 | + // Old name -- calls filterContents() | |
| 152 | 157 | QPDF_DLL |
| 153 | 158 | void filterPageContents(QPDFObjectHandle::TokenFilter* filter, |
| 154 | 159 | Pipeline* next = 0); |
| ... | ... | @@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper |
| 265 | 270 | private: |
| 266 | 271 | static void |
| 267 | 272 | removeUnreferencedResourcesHelper( |
| 268 | - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | |
| 269 | - std::function<QPDFObjectHandle()> get_resource, | |
| 270 | - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content); | |
| 273 | + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen); | |
| 271 | 274 | |
| 272 | 275 | class Members |
| 273 | 276 | { | ... | ... |
libqpdf/QPDFPageObjectHelper.cc
| ... | ... | @@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size) |
| 435 | 435 | QPDFObjectHandle::parse("<< /XObject << >> >>")); |
| 436 | 436 | InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); |
| 437 | 437 | Pl_Buffer b("new page content"); |
| 438 | - filterPageContents(&iit, &b); | |
| 438 | + filterContents(&iit, &b); | |
| 439 | 439 | if (iit.any_images) |
| 440 | 440 | { |
| 441 | 441 | getObjectHandle().replaceKey( |
| ... | ... | @@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents( |
| 504 | 504 | QPDFObjectHandle::TokenFilter* filter, |
| 505 | 505 | Pipeline* next) |
| 506 | 506 | { |
| 507 | - this->oh.filterPageContents(filter, next); | |
| 507 | + return filterContents(filter, next); | |
| 508 | +} | |
| 509 | + | |
| 510 | +void | |
| 511 | +QPDFPageObjectHelper::filterContents( | |
| 512 | + QPDFObjectHandle::TokenFilter* filter, | |
| 513 | + Pipeline* next) | |
| 514 | +{ | |
| 515 | + if (this->oh.isFormXObject()) | |
| 516 | + { | |
| 517 | + this->oh.filterAsContents(filter, next); | |
| 518 | + } | |
| 519 | + else | |
| 520 | + { | |
| 521 | + this->oh.filterPageContents(filter, next); | |
| 522 | + } | |
| 508 | 523 | } |
| 509 | 524 | |
| 510 | 525 | void |
| ... | ... | @@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) |
| 554 | 569 | |
| 555 | 570 | void |
| 556 | 571 | QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 557 | - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | |
| 558 | - std::function<QPDFObjectHandle()> get_resource, | |
| 559 | - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content) | |
| 572 | + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen) | |
| 560 | 573 | { |
| 561 | - if (seen.count(oh.getObjGen())) | |
| 574 | + if (seen.count(ph.oh.getObjGen())) | |
| 562 | 575 | { |
| 563 | 576 | return; |
| 564 | 577 | } |
| 565 | - seen.insert(oh.getObjGen()); | |
| 578 | + seen.insert(ph.oh.getObjGen()); | |
| 566 | 579 | NameWatcher nw; |
| 567 | 580 | try |
| 568 | 581 | { |
| 569 | - filter_content(&nw); | |
| 582 | + ph.filterContents(&nw); | |
| 570 | 583 | } |
| 571 | 584 | catch (std::exception& e) |
| 572 | 585 | { |
| 573 | - oh.warnIfPossible( | |
| 586 | + ph.oh.warnIfPossible( | |
| 574 | 587 | std::string("Unable to parse content stream: ") + e.what() + |
| 575 | 588 | "; not attempting to remove unreferenced objects from this page"); |
| 576 | 589 | return; |
| ... | ... | @@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 578 | 591 | if (nw.saw_bad) |
| 579 | 592 | { |
| 580 | 593 | QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); |
| 581 | - oh.warnIfPossible( | |
| 594 | + ph.oh.warnIfPossible( | |
| 582 | 595 | "Bad token found while scanning content stream; " |
| 583 | 596 | "not attempting to remove unreferenced objects from this page"); |
| 584 | 597 | return; |
| ... | ... | @@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 591 | 604 | std::vector<std::string> to_filter; |
| 592 | 605 | to_filter.push_back("/Font"); |
| 593 | 606 | to_filter.push_back("/XObject"); |
| 594 | - QPDFObjectHandle resources = get_resource(); | |
| 607 | + QPDFObjectHandle resources = ph.getAttribute("/Resources", true); | |
| 595 | 608 | for (std::vector<std::string>::iterator d_iter = to_filter.begin(); |
| 596 | 609 | d_iter != to_filter.end(); ++d_iter) |
| 597 | 610 | { |
| ... | ... | @@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 615 | 628 | { |
| 616 | 629 | QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); |
| 617 | 630 | removeUnreferencedResourcesHelper( |
| 618 | - resource.getDict(), seen, | |
| 619 | - [&resource]() { | |
| 620 | - return QPDFPageObjectHelper(resource) | |
| 621 | - .getAttribute("/Resources", true); | |
| 622 | - }, | |
| 623 | - [&resource](QPDFObjectHandle::TokenFilter* f) { | |
| 624 | - resource.filterAsContents(f); | |
| 625 | - }); | |
| 631 | + QPDFPageObjectHelper(resource), seen); | |
| 626 | 632 | } |
| 627 | 633 | } |
| 628 | 634 | } |
| ... | ... | @@ -632,12 +638,7 @@ void |
| 632 | 638 | QPDFPageObjectHelper::removeUnreferencedResources() |
| 633 | 639 | { |
| 634 | 640 | std::set<QPDFObjGen> seen; |
| 635 | - removeUnreferencedResourcesHelper( | |
| 636 | - this->oh, seen, | |
| 637 | - [this]() { return this->getAttribute("/Resources", true); }, | |
| 638 | - [this](QPDFObjectHandle::TokenFilter* f) { | |
| 639 | - this->filterPageContents(f); | |
| 640 | - }); | |
| 641 | + removeUnreferencedResourcesHelper(*this, seen); | |
| 641 | 642 | } |
| 642 | 643 | |
| 643 | 644 | QPDFPageObjectHelper | ... | ... |