Commit 278710fbe855b82ce0db1b1f1d8f969798872b6c
1 parent
b03e6bd6
Refactor QPDFPageObjectHelper::removeUnreferencedResources()
Refactor removeUnreferencedResources to prepare for filtering form XObjects.
Showing
5 changed files
with
52 additions
and
5 deletions
ChangeLog
| 1 | 2020-03-31 Jay Berkenbilt <ejb@ql.org> | 1 | 2020-03-31 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Add QPDFObjectHandle::filterAsContents, which filters a stream's | ||
| 4 | + data as if it were page contents. This can be useful to filter | ||
| 5 | + form XObjects the same way we would filter page contents. | ||
| 6 | + | ||
| 3 | * If QPDF_EXECUTABLE is set, use it as the path to qpdf for | 7 | * If QPDF_EXECUTABLE is set, use it as the path to qpdf for |
| 4 | purposes of completion. This variable is only read during the | 8 | purposes of completion. This variable is only read during the |
| 5 | executation of `qpdf --completion-zsh` and `qpdf | 9 | executation of `qpdf --completion-zsh` and `qpdf |
include/qpdf/QPDFObjectHandle.hh
| @@ -400,6 +400,12 @@ class QPDFObjectHandle | @@ -400,6 +400,12 @@ class QPDFObjectHandle | ||
| 400 | void addContentTokenFilter(PointerHolder<TokenFilter> token_filter); | 400 | void addContentTokenFilter(PointerHolder<TokenFilter> token_filter); |
| 401 | // End legacy content stream helpers | 401 | // End legacy content stream helpers |
| 402 | 402 | ||
| 403 | + // Called on a stream to filter the stream as if it were page | ||
| 404 | + // contents. This can be used to apply a TokenFilter to a form | ||
| 405 | + // XObject, whose data is in the same format as a content stream. | ||
| 406 | + QPDF_DLL | ||
| 407 | + void filterAsContents(TokenFilter* filter, Pipeline* next = 0); | ||
| 408 | + | ||
| 403 | // Type-specific factories | 409 | // Type-specific factories |
| 404 | QPDF_DLL | 410 | QPDF_DLL |
| 405 | static QPDFObjectHandle newNull(); | 411 | static QPDFObjectHandle newNull(); |
include/qpdf/QPDFPageObjectHelper.hh
| @@ -28,6 +28,7 @@ | @@ -28,6 +28,7 @@ | ||
| 28 | #include <qpdf/DLL.h> | 28 | #include <qpdf/DLL.h> |
| 29 | 29 | ||
| 30 | #include <qpdf/QPDFObjectHandle.hh> | 30 | #include <qpdf/QPDFObjectHandle.hh> |
| 31 | +#include <functional> | ||
| 31 | 32 | ||
| 32 | class QPDFPageObjectHelper: public QPDFObjectHelper | 33 | class QPDFPageObjectHelper: public QPDFObjectHelper |
| 33 | { | 34 | { |
| @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 231 | bool invert_transformations = true); | 232 | bool invert_transformations = true); |
| 232 | 233 | ||
| 233 | private: | 234 | private: |
| 235 | + static void | ||
| 236 | + removeUnreferencedResourcesHelper( | ||
| 237 | + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | ||
| 238 | + std::function<QPDFObjectHandle()> get_resource, | ||
| 239 | + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content); | ||
| 240 | + | ||
| 234 | class Members | 241 | class Members |
| 235 | { | 242 | { |
| 236 | friend class QPDFPageObjectHelper; | 243 | friend class QPDFPageObjectHelper; |
libqpdf/QPDFObjectHandle.cc
| @@ -1630,6 +1630,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) | @@ -1630,6 +1630,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) | ||
| 1630 | } | 1630 | } |
| 1631 | 1631 | ||
| 1632 | void | 1632 | void |
| 1633 | +QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next) | ||
| 1634 | +{ | ||
| 1635 | + std::string description = "token filter for object " + | ||
| 1636 | + QUtil::int_to_string(this->m->objid) + " " + | ||
| 1637 | + QUtil::int_to_string(this->m->generation); | ||
| 1638 | + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next); | ||
| 1639 | + this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized); | ||
| 1640 | +} | ||
| 1641 | + | ||
| 1642 | +void | ||
| 1633 | QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, | 1643 | QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, |
| 1634 | ParserCallbacks* callbacks) | 1644 | ParserCallbacks* callbacks) |
| 1635 | { | 1645 | { |
libqpdf/QPDFPageObjectHelper.cc
| @@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) | @@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) | ||
| 511 | } | 511 | } |
| 512 | 512 | ||
| 513 | void | 513 | void |
| 514 | -QPDFPageObjectHelper::removeUnreferencedResources() | 514 | +QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 515 | + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, | ||
| 516 | + std::function<QPDFObjectHandle()> get_resource, | ||
| 517 | + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content) | ||
| 515 | { | 518 | { |
| 519 | + if (seen.count(oh.getObjGen())) | ||
| 520 | + { | ||
| 521 | + return; | ||
| 522 | + } | ||
| 523 | + seen.insert(oh.getObjGen()); | ||
| 516 | NameWatcher nw; | 524 | NameWatcher nw; |
| 517 | try | 525 | try |
| 518 | { | 526 | { |
| 519 | - filterPageContents(&nw); | 527 | + filter_content(&nw); |
| 520 | } | 528 | } |
| 521 | catch (std::exception& e) | 529 | catch (std::exception& e) |
| 522 | { | 530 | { |
| 523 | - this->oh.warnIfPossible( | 531 | + oh.warnIfPossible( |
| 524 | std::string("Unable to parse content stream: ") + e.what() + | 532 | std::string("Unable to parse content stream: ") + e.what() + |
| 525 | "; not attempting to remove unreferenced objects from this page"); | 533 | "; not attempting to remove unreferenced objects from this page"); |
| 526 | return; | 534 | return; |
| @@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() | @@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() | ||
| 528 | if (nw.saw_bad) | 536 | if (nw.saw_bad) |
| 529 | { | 537 | { |
| 530 | QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); | 538 | QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); |
| 531 | - this->oh.warnIfPossible( | 539 | + oh.warnIfPossible( |
| 532 | "Bad token found while scanning content stream; " | 540 | "Bad token found while scanning content stream; " |
| 533 | "not attempting to remove unreferenced objects from this page"); | 541 | "not attempting to remove unreferenced objects from this page"); |
| 534 | return; | 542 | return; |
| @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() | @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() | ||
| 541 | std::vector<std::string> to_filter; | 549 | std::vector<std::string> to_filter; |
| 542 | to_filter.push_back("/Font"); | 550 | to_filter.push_back("/Font"); |
| 543 | to_filter.push_back("/XObject"); | 551 | to_filter.push_back("/XObject"); |
| 544 | - QPDFObjectHandle resources = getAttribute("/Resources", true); | 552 | + QPDFObjectHandle resources = get_resource(); |
| 545 | for (std::vector<std::string>::iterator d_iter = to_filter.begin(); | 553 | for (std::vector<std::string>::iterator d_iter = to_filter.begin(); |
| 546 | d_iter != to_filter.end(); ++d_iter) | 554 | d_iter != to_filter.end(); ++d_iter) |
| 547 | { | 555 | { |
| @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources() | @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources() | ||
| 564 | } | 572 | } |
| 565 | } | 573 | } |
| 566 | 574 | ||
| 575 | +void | ||
| 576 | +QPDFPageObjectHelper::removeUnreferencedResources() | ||
| 577 | +{ | ||
| 578 | + std::set<QPDFObjGen> seen; | ||
| 579 | + removeUnreferencedResourcesHelper( | ||
| 580 | + this->oh, seen, | ||
| 581 | + [this]() { return this->getAttribute("/Resources", true); }, | ||
| 582 | + [this](QPDFObjectHandle::TokenFilter* f) { | ||
| 583 | + this->filterPageContents(f); | ||
| 584 | + }); | ||
| 585 | +} | ||
| 586 | + | ||
| 567 | QPDFPageObjectHelper | 587 | QPDFPageObjectHelper |
| 568 | QPDFPageObjectHelper::shallowCopyPage() | 588 | QPDFPageObjectHelper::shallowCopyPage() |
| 569 | { | 589 | { |