Commit 278710fbe855b82ce0db1b1f1d8f969798872b6c
1 parent
b03e6bd6
Refactor QPDFPageObjectHelper::removeUnreferencedResources()
Refactor removeUnreferencedResources to prepare for filtering form XObjects.
Showing
5 changed files
with
52 additions
and
5 deletions
ChangeLog
| 1 | 1 | 2020-03-31 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Add QPDFObjectHandle::filterAsContents, which filters a stream's | |
| 4 | + data as if it were page contents. This can be useful to filter | |
| 5 | + form XObjects the same way we would filter page contents. | |
| 6 | + | |
| 3 | 7 | * If QPDF_EXECUTABLE is set, use it as the path to qpdf for |
| 4 | 8 | purposes of completion. This variable is only read during the |
| 5 | 9 | executation of `qpdf --completion-zsh` and `qpdf | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -400,6 +400,12 @@ class QPDFObjectHandle |
| 400 | 400 | void addContentTokenFilter(PointerHolder<TokenFilter> token_filter); |
| 401 | 401 | // End legacy content stream helpers |
| 402 | 402 | |
| 403 | + // Called on a stream to filter the stream as if it were page | |
| 404 | + // contents. This can be used to apply a TokenFilter to a form | |
| 405 | + // XObject, whose data is in the same format as a content stream. | |
| 406 | + QPDF_DLL | |
| 407 | + void filterAsContents(TokenFilter* filter, Pipeline* next = 0); | |
| 408 | + | |
| 403 | 409 | // Type-specific factories |
| 404 | 410 | QPDF_DLL |
| 405 | 411 | static QPDFObjectHandle newNull(); | ... | ... |
include/qpdf/QPDFPageObjectHelper.hh
| ... | ... | @@ -28,6 +28,7 @@ |
| 28 | 28 | #include <qpdf/DLL.h> |
| 29 | 29 | |
| 30 | 30 | #include <qpdf/QPDFObjectHandle.hh> |
| 31 | +#include <functional> | |
| 31 | 32 | |
| 32 | 33 | class QPDFPageObjectHelper: public QPDFObjectHelper |
| 33 | 34 | { |
| ... | ... | @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper |
| 231 | 232 | bool invert_transformations = true); |
| 232 | 233 | |
| 233 | 234 | private: |
| 235 | + static void | |
| 236 | + removeUnreferencedResourcesHelper( | |
| 237 | + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | |
| 238 | + std::function<QPDFObjectHandle()> get_resource, | |
| 239 | + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content); | |
| 240 | + | |
| 234 | 241 | class Members |
| 235 | 242 | { |
| 236 | 243 | friend class QPDFPageObjectHelper; | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -1630,6 +1630,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) |
| 1630 | 1630 | } |
| 1631 | 1631 | |
| 1632 | 1632 | void |
| 1633 | +QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next) | |
| 1634 | +{ | |
| 1635 | + std::string description = "token filter for object " + | |
| 1636 | + QUtil::int_to_string(this->m->objid) + " " + | |
| 1637 | + QUtil::int_to_string(this->m->generation); | |
| 1638 | + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next); | |
| 1639 | + this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized); | |
| 1640 | +} | |
| 1641 | + | |
| 1642 | +void | |
| 1633 | 1643 | QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, |
| 1634 | 1644 | ParserCallbacks* callbacks) |
| 1635 | 1645 | { | ... | ... |
libqpdf/QPDFPageObjectHelper.cc
| ... | ... | @@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) |
| 511 | 511 | } |
| 512 | 512 | |
| 513 | 513 | void |
| 514 | -QPDFPageObjectHelper::removeUnreferencedResources() | |
| 514 | +QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | |
| 515 | + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | |
| 516 | + std::function<QPDFObjectHandle()> get_resource, | |
| 517 | + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content) | |
| 515 | 518 | { |
| 519 | + if (seen.count(oh.getObjGen())) | |
| 520 | + { | |
| 521 | + return; | |
| 522 | + } | |
| 523 | + seen.insert(oh.getObjGen()); | |
| 516 | 524 | NameWatcher nw; |
| 517 | 525 | try |
| 518 | 526 | { |
| 519 | - filterPageContents(&nw); | |
| 527 | + filter_content(&nw); | |
| 520 | 528 | } |
| 521 | 529 | catch (std::exception& e) |
| 522 | 530 | { |
| 523 | - this->oh.warnIfPossible( | |
| 531 | + oh.warnIfPossible( | |
| 524 | 532 | std::string("Unable to parse content stream: ") + e.what() + |
| 525 | 533 | "; not attempting to remove unreferenced objects from this page"); |
| 526 | 534 | return; |
| ... | ... | @@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() |
| 528 | 536 | if (nw.saw_bad) |
| 529 | 537 | { |
| 530 | 538 | QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); |
| 531 | - this->oh.warnIfPossible( | |
| 539 | + oh.warnIfPossible( | |
| 532 | 540 | "Bad token found while scanning content stream; " |
| 533 | 541 | "not attempting to remove unreferenced objects from this page"); |
| 534 | 542 | return; |
| ... | ... | @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() |
| 541 | 549 | std::vector<std::string> to_filter; |
| 542 | 550 | to_filter.push_back("/Font"); |
| 543 | 551 | to_filter.push_back("/XObject"); |
| 544 | - QPDFObjectHandle resources = getAttribute("/Resources", true); | |
| 552 | + QPDFObjectHandle resources = get_resource(); | |
| 545 | 553 | for (std::vector<std::string>::iterator d_iter = to_filter.begin(); |
| 546 | 554 | d_iter != to_filter.end(); ++d_iter) |
| 547 | 555 | { |
| ... | ... | @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources() |
| 564 | 572 | } |
| 565 | 573 | } |
| 566 | 574 | |
| 575 | +void | |
| 576 | +QPDFPageObjectHelper::removeUnreferencedResources() | |
| 577 | +{ | |
| 578 | + std::set<QPDFObjGen> seen; | |
| 579 | + removeUnreferencedResourcesHelper( | |
| 580 | + this->oh, seen, | |
| 581 | + [this]() { return this->getAttribute("/Resources", true); }, | |
| 582 | + [this](QPDFObjectHandle::TokenFilter* f) { | |
| 583 | + this->filterPageContents(f); | |
| 584 | + }); | |
| 585 | +} | |
| 586 | + | |
| 567 | 587 | QPDFPageObjectHelper |
| 568 | 588 | QPDFPageObjectHelper::shallowCopyPage() |
| 569 | 589 | { | ... | ... |