Commit 6154221edbb0e17e77da7defeeac5fe53121ef57
1 parent
63ea4619
QPDFPageObjectHelper: filterPageContents -> filterContents + form XObject
Showing
5 changed files
with
51 additions
and
47 deletions
ChangeLog
| 1 | 2020-12-31 Jay Berkenbilt <ejb@ql.org> | 1 | 2020-12-31 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | - * Rename QPDFPageObjectHelper::getPageImages to | ||
| 4 | - QPDFPageObjectHelper::getImages and make it support form XObjects | ||
| 5 | - as well as pages. The old name will be preserved for | ||
| 6 | - compatibility. | 3 | + * Rename some QPDFPageObjectHelper methods and make them support |
| 4 | + form XObjects as well as pages. The old names will be preserved | ||
| 5 | + from compatibility. | ||
| 6 | + - getPageImages -> getImages | ||
| 7 | + - filterPageContents -> filterContents | ||
| 7 | 8 | ||
| 8 | * Add QPDFObjectHandle::isFormXObject to test whether an object is | 9 | * Add QPDFObjectHandle::isFormXObject to test whether an object is |
| 9 | a form XObject. | 10 | a form XObject. |
examples/pdf-count-strings.cc
| 1 | // | 1 | // |
| 2 | // This example illustrates the use of QPDFObjectHandle::TokenFilter | 2 | // This example illustrates the use of QPDFObjectHandle::TokenFilter |
| 3 | -// with filterPageContents. See also pdf-filter-tokens.cc for an | ||
| 4 | -// example that uses QPDFObjectHandle::TokenFilter with | ||
| 5 | -// addContentTokenFilter. | 3 | +// with filterContents. See also pdf-filter-tokens.cc for an example |
| 4 | +// that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter. | ||
| 6 | // | 5 | // |
| 7 | 6 | ||
| 8 | #include <iostream> | 7 | #include <iostream> |
| @@ -108,14 +107,14 @@ int main(int argc, char* argv[]) | @@ -108,14 +107,14 @@ int main(int argc, char* argv[]) | ||
| 108 | if (pageno % 2) | 107 | if (pageno % 2) |
| 109 | { | 108 | { |
| 110 | // Ignore output for odd pages. | 109 | // Ignore output for odd pages. |
| 111 | - ph.filterPageContents(&counter); | 110 | + ph.filterContents(&counter); |
| 112 | } | 111 | } |
| 113 | else | 112 | else |
| 114 | { | 113 | { |
| 115 | // Write output to stdout for even pages. | 114 | // Write output to stdout for even pages. |
| 116 | Pl_StdioFile out("stdout", stdout); | 115 | Pl_StdioFile out("stdout", stdout); |
| 117 | std::cout << "% Contents of page " << pageno << std::endl; | 116 | std::cout << "% Contents of page " << pageno << std::endl; |
| 118 | - ph.filterPageContents(&counter, &out); | 117 | + ph.filterContents(&counter, &out); |
| 119 | std::cout << "\n% end " << pageno << std::endl; | 118 | std::cout << "\n% end " << pageno << std::endl; |
| 120 | } | 119 | } |
| 121 | std::cout << "Page " << pageno | 120 | std::cout << "Page " << pageno |
examples/pdf-filter-tokens.cc
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | // This example illustrates the use of QPDFObjectHandle::TokenFilter | 2 | // This example illustrates the use of QPDFObjectHandle::TokenFilter |
| 3 | // with addContentTokenFilter. Please see comments inline for details. | 3 | // with addContentTokenFilter. Please see comments inline for details. |
| 4 | // See also pdf-count-strings.cc for a use of | 4 | // See also pdf-count-strings.cc for a use of |
| 5 | -// QPDFObjectHandle::TokenFilter with filterPageContents. | 5 | +// QPDFObjectHandle::TokenFilter with filterContents. |
| 6 | // | 6 | // |
| 7 | 7 | ||
| 8 | #include <iostream> | 8 | #include <iostream> |
include/qpdf/QPDFPageObjectHelper.hh
| @@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 140 | QPDF_DLL | 140 | QPDF_DLL |
| 141 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); | 141 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); |
| 142 | 142 | ||
| 143 | - // Pass a page's contents through the given TokenFilter. If a | ||
| 144 | - // pipeline is also provided, it will be the target of the write | ||
| 145 | - // methods from the token filter. If a pipeline is not specified, | ||
| 146 | - // any output generated by the token filter will be discarded. Use | ||
| 147 | - // this interface if you need to pass a page's contents through | ||
| 148 | - // filter for work purposes without having that filter | ||
| 149 | - // automatically applied to the page's contents, as happens with | ||
| 150 | - // addContentTokenFilter. See examples/pdf-count-strings.cc for an | ||
| 151 | - // example. | 143 | + // Pass a page's or form XObject's contents through the given |
| 144 | + // TokenFilter. If a pipeline is also provided, it will be the | ||
| 145 | + // target of the write methods from the token filter. If a | ||
| 146 | + // pipeline is not specified, any output generated by the token | ||
| 147 | + // filter will be discarded. Use this interface if you need to | ||
| 148 | + // pass a page's contents through filter for work purposes without | ||
| 149 | + // having that filter automatically applied to the page's | ||
| 150 | + // contents, as happens with addContentTokenFilter. See | ||
| 151 | + // examples/pdf-count-strings.cc for an example. | ||
| 152 | + QPDF_DLL | ||
| 153 | + void filterContents(QPDFObjectHandle::TokenFilter* filter, | ||
| 154 | + Pipeline* next = 0); | ||
| 155 | + | ||
| 156 | + // Old name -- calls filterContents() | ||
| 152 | QPDF_DLL | 157 | QPDF_DLL |
| 153 | void filterPageContents(QPDFObjectHandle::TokenFilter* filter, | 158 | void filterPageContents(QPDFObjectHandle::TokenFilter* filter, |
| 154 | Pipeline* next = 0); | 159 | Pipeline* next = 0); |
| @@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 265 | private: | 270 | private: |
| 266 | static void | 271 | static void |
| 267 | removeUnreferencedResourcesHelper( | 272 | removeUnreferencedResourcesHelper( |
| 268 | - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | ||
| 269 | - std::function<QPDFObjectHandle()> get_resource, | ||
| 270 | - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content); | 273 | + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen); |
| 271 | 274 | ||
| 272 | class Members | 275 | class Members |
| 273 | { | 276 | { |
libqpdf/QPDFPageObjectHelper.cc
| @@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size) | @@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size) | ||
| 435 | QPDFObjectHandle::parse("<< /XObject << >> >>")); | 435 | QPDFObjectHandle::parse("<< /XObject << >> >>")); |
| 436 | InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); | 436 | InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); |
| 437 | Pl_Buffer b("new page content"); | 437 | Pl_Buffer b("new page content"); |
| 438 | - filterPageContents(&iit, &b); | 438 | + filterContents(&iit, &b); |
| 439 | if (iit.any_images) | 439 | if (iit.any_images) |
| 440 | { | 440 | { |
| 441 | getObjectHandle().replaceKey( | 441 | getObjectHandle().replaceKey( |
| @@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents( | @@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents( | ||
| 504 | QPDFObjectHandle::TokenFilter* filter, | 504 | QPDFObjectHandle::TokenFilter* filter, |
| 505 | Pipeline* next) | 505 | Pipeline* next) |
| 506 | { | 506 | { |
| 507 | - this->oh.filterPageContents(filter, next); | 507 | + return filterContents(filter, next); |
| 508 | +} | ||
| 509 | + | ||
| 510 | +void | ||
| 511 | +QPDFPageObjectHelper::filterContents( | ||
| 512 | + QPDFObjectHandle::TokenFilter* filter, | ||
| 513 | + Pipeline* next) | ||
| 514 | +{ | ||
| 515 | + if (this->oh.isFormXObject()) | ||
| 516 | + { | ||
| 517 | + this->oh.filterAsContents(filter, next); | ||
| 518 | + } | ||
| 519 | + else | ||
| 520 | + { | ||
| 521 | + this->oh.filterPageContents(filter, next); | ||
| 522 | + } | ||
| 508 | } | 523 | } |
| 509 | 524 | ||
| 510 | void | 525 | void |
| @@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) | @@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) | ||
| 554 | 569 | ||
| 555 | void | 570 | void |
| 556 | QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | 571 | QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 557 | - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | ||
| 558 | - std::function<QPDFObjectHandle()> get_resource, | ||
| 559 | - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content) | 572 | + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen) |
| 560 | { | 573 | { |
| 561 | - if (seen.count(oh.getObjGen())) | 574 | + if (seen.count(ph.oh.getObjGen())) |
| 562 | { | 575 | { |
| 563 | return; | 576 | return; |
| 564 | } | 577 | } |
| 565 | - seen.insert(oh.getObjGen()); | 578 | + seen.insert(ph.oh.getObjGen()); |
| 566 | NameWatcher nw; | 579 | NameWatcher nw; |
| 567 | try | 580 | try |
| 568 | { | 581 | { |
| 569 | - filter_content(&nw); | 582 | + ph.filterContents(&nw); |
| 570 | } | 583 | } |
| 571 | catch (std::exception& e) | 584 | catch (std::exception& e) |
| 572 | { | 585 | { |
| 573 | - oh.warnIfPossible( | 586 | + ph.oh.warnIfPossible( |
| 574 | std::string("Unable to parse content stream: ") + e.what() + | 587 | std::string("Unable to parse content stream: ") + e.what() + |
| 575 | "; not attempting to remove unreferenced objects from this page"); | 588 | "; not attempting to remove unreferenced objects from this page"); |
| 576 | return; | 589 | return; |
| @@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 578 | if (nw.saw_bad) | 591 | if (nw.saw_bad) |
| 579 | { | 592 | { |
| 580 | QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); | 593 | QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); |
| 581 | - oh.warnIfPossible( | 594 | + ph.oh.warnIfPossible( |
| 582 | "Bad token found while scanning content stream; " | 595 | "Bad token found while scanning content stream; " |
| 583 | "not attempting to remove unreferenced objects from this page"); | 596 | "not attempting to remove unreferenced objects from this page"); |
| 584 | return; | 597 | return; |
| @@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 591 | std::vector<std::string> to_filter; | 604 | std::vector<std::string> to_filter; |
| 592 | to_filter.push_back("/Font"); | 605 | to_filter.push_back("/Font"); |
| 593 | to_filter.push_back("/XObject"); | 606 | to_filter.push_back("/XObject"); |
| 594 | - QPDFObjectHandle resources = get_resource(); | 607 | + QPDFObjectHandle resources = ph.getAttribute("/Resources", true); |
| 595 | for (std::vector<std::string>::iterator d_iter = to_filter.begin(); | 608 | for (std::vector<std::string>::iterator d_iter = to_filter.begin(); |
| 596 | d_iter != to_filter.end(); ++d_iter) | 609 | d_iter != to_filter.end(); ++d_iter) |
| 597 | { | 610 | { |
| @@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 615 | { | 628 | { |
| 616 | QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); | 629 | QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); |
| 617 | removeUnreferencedResourcesHelper( | 630 | removeUnreferencedResourcesHelper( |
| 618 | - resource.getDict(), seen, | ||
| 619 | - [&resource]() { | ||
| 620 | - return QPDFPageObjectHelper(resource) | ||
| 621 | - .getAttribute("/Resources", true); | ||
| 622 | - }, | ||
| 623 | - [&resource](QPDFObjectHandle::TokenFilter* f) { | ||
| 624 | - resource.filterAsContents(f); | ||
| 625 | - }); | 631 | + QPDFPageObjectHelper(resource), seen); |
| 626 | } | 632 | } |
| 627 | } | 633 | } |
| 628 | } | 634 | } |
| @@ -632,12 +638,7 @@ void | @@ -632,12 +638,7 @@ void | ||
| 632 | QPDFPageObjectHelper::removeUnreferencedResources() | 638 | QPDFPageObjectHelper::removeUnreferencedResources() |
| 633 | { | 639 | { |
| 634 | std::set<QPDFObjGen> seen; | 640 | std::set<QPDFObjGen> seen; |
| 635 | - removeUnreferencedResourcesHelper( | ||
| 636 | - this->oh, seen, | ||
| 637 | - [this]() { return this->getAttribute("/Resources", true); }, | ||
| 638 | - [this](QPDFObjectHandle::TokenFilter* f) { | ||
| 639 | - this->filterPageContents(f); | ||
| 640 | - }); | 641 | + removeUnreferencedResourcesHelper(*this, seen); |
| 641 | } | 642 | } |
| 642 | 643 | ||
| 643 | QPDFPageObjectHelper | 644 | QPDFPageObjectHelper |