Commit 278710fbe855b82ce0db1b1f1d8f969798872b6c

Authored by Jay Berkenbilt
1 parent b03e6bd6

Refactor QPDFPageObjectHelper::removeUnreferencedResources()

Refactor removeUnreferencedResources to prepare for filtering form
XObjects.
ChangeLog
1 2020-03-31 Jay Berkenbilt <ejb@ql.org> 1 2020-03-31 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Add QPDFObjectHandle::filterAsContents, which filters a stream's
  4 + data as if it were page contents. This can be useful to filter
  5 + form XObjects the same way we would filter page contents.
  6 +
3 * If QPDF_EXECUTABLE is set, use it as the path to qpdf for 7 * If QPDF_EXECUTABLE is set, use it as the path to qpdf for
4 purposes of completion. This variable is only read during the 8 purposes of completion. This variable is only read during the
5 executation of `qpdf --completion-zsh` and `qpdf 9 executation of `qpdf --completion-zsh` and `qpdf
include/qpdf/QPDFObjectHandle.hh
@@ -400,6 +400,12 @@ class QPDFObjectHandle @@ -400,6 +400,12 @@ class QPDFObjectHandle
400 void addContentTokenFilter(PointerHolder<TokenFilter> token_filter); 400 void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
401 // End legacy content stream helpers 401 // End legacy content stream helpers
402 402
  403 + // Called on a stream to filter the stream as if it were page
  404 + // contents. This can be used to apply a TokenFilter to a form
  405 + // XObject, whose data is in the same format as a content stream.
  406 + QPDF_DLL
  407 + void filterAsContents(TokenFilter* filter, Pipeline* next = 0);
  408 +
403 // Type-specific factories 409 // Type-specific factories
404 QPDF_DLL 410 QPDF_DLL
405 static QPDFObjectHandle newNull(); 411 static QPDFObjectHandle newNull();
include/qpdf/QPDFPageObjectHelper.hh
@@ -28,6 +28,7 @@ @@ -28,6 +28,7 @@
28 #include <qpdf/DLL.h> 28 #include <qpdf/DLL.h>
29 29
30 #include <qpdf/QPDFObjectHandle.hh> 30 #include <qpdf/QPDFObjectHandle.hh>
  31 +#include <functional>
31 32
32 class QPDFPageObjectHelper: public QPDFObjectHelper 33 class QPDFPageObjectHelper: public QPDFObjectHelper
33 { 34 {
@@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
231 bool invert_transformations = true); 232 bool invert_transformations = true);
232 233
233 private: 234 private:
  235 + static void
  236 + removeUnreferencedResourcesHelper(
  237 + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
  238 + std::function<QPDFObjectHandle()> get_resource,
  239 + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
  240 +
234 class Members 241 class Members
235 { 242 {
236 friend class QPDFPageObjectHelper; 243 friend class QPDFPageObjectHelper;
libqpdf/QPDFObjectHandle.cc
@@ -1630,6 +1630,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) @@ -1630,6 +1630,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1630 } 1630 }
1631 1631
1632 void 1632 void
  1633 +QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
  1634 +{
  1635 + std::string description = "token filter for object " +
  1636 + QUtil::int_to_string(this->m->objid) + " " +
  1637 + QUtil::int_to_string(this->m->generation);
  1638 + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
  1639 + this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
  1640 +}
  1641 +
  1642 +void
1633 QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, 1643 QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
1634 ParserCallbacks* callbacks) 1644 ParserCallbacks* callbacks)
1635 { 1645 {
libqpdf/QPDFPageObjectHelper.cc
@@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const&amp; token) @@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const&amp; token)
511 } 511 }
512 512
513 void 513 void
514 -QPDFPageObjectHelper::removeUnreferencedResources() 514 +QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
  515 + QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
  516 + std::function<QPDFObjectHandle()> get_resource,
  517 + std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
515 { 518 {
  519 + if (seen.count(oh.getObjGen()))
  520 + {
  521 + return;
  522 + }
  523 + seen.insert(oh.getObjGen());
516 NameWatcher nw; 524 NameWatcher nw;
517 try 525 try
518 { 526 {
519 - filterPageContents(&nw); 527 + filter_content(&nw);
520 } 528 }
521 catch (std::exception& e) 529 catch (std::exception& e)
522 { 530 {
523 - this->oh.warnIfPossible( 531 + oh.warnIfPossible(
524 std::string("Unable to parse content stream: ") + e.what() + 532 std::string("Unable to parse content stream: ") + e.what() +
525 "; not attempting to remove unreferenced objects from this page"); 533 "; not attempting to remove unreferenced objects from this page");
526 return; 534 return;
@@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() @@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
528 if (nw.saw_bad) 536 if (nw.saw_bad)
529 { 537 {
530 QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); 538 QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
531 - this->oh.warnIfPossible( 539 + oh.warnIfPossible(
532 "Bad token found while scanning content stream; " 540 "Bad token found while scanning content stream; "
533 "not attempting to remove unreferenced objects from this page"); 541 "not attempting to remove unreferenced objects from this page");
534 return; 542 return;
@@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
541 std::vector<std::string> to_filter; 549 std::vector<std::string> to_filter;
542 to_filter.push_back("/Font"); 550 to_filter.push_back("/Font");
543 to_filter.push_back("/XObject"); 551 to_filter.push_back("/XObject");
544 - QPDFObjectHandle resources = getAttribute("/Resources", true); 552 + QPDFObjectHandle resources = get_resource();
545 for (std::vector<std::string>::iterator d_iter = to_filter.begin(); 553 for (std::vector<std::string>::iterator d_iter = to_filter.begin();
546 d_iter != to_filter.end(); ++d_iter) 554 d_iter != to_filter.end(); ++d_iter)
547 { 555 {
@@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources() @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources()
564 } 572 }
565 } 573 }
566 574
  575 +void
  576 +QPDFPageObjectHelper::removeUnreferencedResources()
  577 +{
  578 + std::set<QPDFObjGen> seen;
  579 + removeUnreferencedResourcesHelper(
  580 + this->oh, seen,
  581 + [this]() { return this->getAttribute("/Resources", true); },
  582 + [this](QPDFObjectHandle::TokenFilter* f) {
  583 + this->filterPageContents(f);
  584 + });
  585 +}
  586 +
567 QPDFPageObjectHelper 587 QPDFPageObjectHelper
568 QPDFPageObjectHelper::shallowCopyPage() 588 QPDFPageObjectHelper::shallowCopyPage()
569 { 589 {