Commit 278710fbe855b82ce0db1b1f1d8f969798872b6c

Authored by Jay Berkenbilt
1 parent b03e6bd6

Refactor QPDFPageObjectHelper::removeUnreferencedResources()

Refactor removeUnreferencedResources to prepare for filtering form
XObjects.
ChangeLog
1 1 2020-03-31 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Add QPDFObjectHandle::filterAsContents, which filters a stream's
  4 + data as if it were page contents. This can be useful to filter
  5 + form XObjects the same way we would filter page contents.
  6 +
3 7 * If QPDF_EXECUTABLE is set, use it as the path to qpdf for
4 8 purposes of completion. This variable is only read during the
5 9 executation of `qpdf --completion-zsh` and `qpdf
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -400,6 +400,12 @@ class QPDFObjectHandle
400 400 void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
401 401 // End legacy content stream helpers
402 402  
  403 + // Called on a stream to filter the stream as if it were page
  404 + // contents. This can be used to apply a TokenFilter to a form
  405 + // XObject, whose data is in the same format as a content stream.
  406 + QPDF_DLL
  407 + void filterAsContents(TokenFilter* filter, Pipeline* next = 0);
  408 +
403 409 // Type-specific factories
404 410 QPDF_DLL
405 411 static QPDFObjectHandle newNull();
... ...
include/qpdf/QPDFPageObjectHelper.hh
... ... @@ -28,6 +28,7 @@
28 28 #include <qpdf/DLL.h>
29 29  
30 30 #include <qpdf/QPDFObjectHandle.hh>
  31 +#include <functional>
31 32  
32 33 class QPDFPageObjectHelper: public QPDFObjectHelper
33 34 {
... ... @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
231 232 bool invert_transformations = true);
232 233  
233 234 private:
  235 + static void
  236 + removeUnreferencedResourcesHelper(
  237 + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
  238 + std::function<QPDFObjectHandle()> get_resource,
  239 + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
  240 +
234 241 class Members
235 242 {
236 243 friend class QPDFPageObjectHelper;
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1630,6 +1630,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1630 1630 }
1631 1631  
1632 1632 void
  1633 +QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
  1634 +{
  1635 + std::string description = "token filter for object " +
  1636 + QUtil::int_to_string(this->m->objid) + " " +
  1637 + QUtil::int_to_string(this->m->generation);
  1638 + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
  1639 + this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
  1640 +}
  1641 +
  1642 +void
1633 1643 QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
1634 1644 ParserCallbacks* callbacks)
1635 1645 {
... ...
libqpdf/QPDFPageObjectHelper.cc
... ... @@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const&amp; token)
511 511 }
512 512  
513 513 void
514   -QPDFPageObjectHelper::removeUnreferencedResources()
  514 +QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
  515 + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
  516 + std::function<QPDFObjectHandle()> get_resource,
  517 + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
515 518 {
  519 + if (seen.count(oh.getObjGen()))
  520 + {
  521 + return;
  522 + }
  523 + seen.insert(oh.getObjGen());
516 524 NameWatcher nw;
517 525 try
518 526 {
519   - filterPageContents(&nw);
  527 + filter_content(&nw);
520 528 }
521 529 catch (std::exception& e)
522 530 {
523   - this->oh.warnIfPossible(
  531 + oh.warnIfPossible(
524 532 std::string("Unable to parse content stream: ") + e.what() +
525 533 "; not attempting to remove unreferenced objects from this page");
526 534 return;
... ... @@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
528 536 if (nw.saw_bad)
529 537 {
530 538 QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
531   - this->oh.warnIfPossible(
  539 + oh.warnIfPossible(
532 540 "Bad token found while scanning content stream; "
533 541 "not attempting to remove unreferenced objects from this page");
534 542 return;
... ... @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
541 549 std::vector<std::string> to_filter;
542 550 to_filter.push_back("/Font");
543 551 to_filter.push_back("/XObject");
544   - QPDFObjectHandle resources = getAttribute("/Resources", true);
  552 + QPDFObjectHandle resources = get_resource();
545 553 for (std::vector<std::string>::iterator d_iter = to_filter.begin();
546 554 d_iter != to_filter.end(); ++d_iter)
547 555 {
... ... @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources()
564 572 }
565 573 }
566 574  
  575 +void
  576 +QPDFPageObjectHelper::removeUnreferencedResources()
  577 +{
  578 + std::set<QPDFObjGen> seen;
  579 + removeUnreferencedResourcesHelper(
  580 + this->oh, seen,
  581 + [this]() { return this->getAttribute("/Resources", true); },
  582 + [this](QPDFObjectHandle::TokenFilter* f) {
  583 + this->filterPageContents(f);
  584 + });
  585 +}
  586 +
567 587 QPDFPageObjectHelper
568 588 QPDFPageObjectHelper::shallowCopyPage()
569 589 {
... ...