Commit 6154221edbb0e17e77da7defeeac5fe53121ef57

Authored by Jay Berkenbilt
1 parent 63ea4619

QPDFPageObjectHelper: filterPageContents -> filterContents + form XObject

ChangeLog
1 1 2020-12-31 Jay Berkenbilt <ejb@ql.org>
2 2  
3   - * Rename QPDFPageObjectHelper::getPageImages to
4   - QPDFPageObjectHelper::getImages and make it support form XObjects
5   - as well as pages. The old name will be preserved for
6   - compatibility.
  3 + * Rename some QPDFPageObjectHelper methods and make them support
  4 + form XObjects as well as pages. The old names will be preserved
  5 + from compatibility.
  6 + - getPageImages -> getImages
  7 + - filterPageContents -> filterContents
7 8  
8 9 * Add QPDFObjectHandle::isFormXObject to test whether an object is
9 10 a form XObject.
... ...
examples/pdf-count-strings.cc
1 1 //
2 2 // This example illustrates the use of QPDFObjectHandle::TokenFilter
3   -// with filterPageContents. See also pdf-filter-tokens.cc for an
4   -// example that uses QPDFObjectHandle::TokenFilter with
5   -// addContentTokenFilter.
  3 +// with filterContents. See also pdf-filter-tokens.cc for an example
  4 +// that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter.
6 5 //
7 6  
8 7 #include <iostream>
... ... @@ -108,14 +107,14 @@ int main(int argc, char* argv[])
108 107 if (pageno % 2)
109 108 {
110 109 // Ignore output for odd pages.
111   - ph.filterPageContents(&counter);
  110 + ph.filterContents(&counter);
112 111 }
113 112 else
114 113 {
115 114 // Write output to stdout for even pages.
116 115 Pl_StdioFile out("stdout", stdout);
117 116 std::cout << "% Contents of page " << pageno << std::endl;
118   - ph.filterPageContents(&counter, &out);
  117 + ph.filterContents(&counter, &out);
119 118 std::cout << "\n% end " << pageno << std::endl;
120 119 }
121 120 std::cout << "Page " << pageno
... ...
examples/pdf-filter-tokens.cc
... ... @@ -2,7 +2,7 @@
2 2 // This example illustrates the use of QPDFObjectHandle::TokenFilter
3 3 // with addContentTokenFilter. Please see comments inline for details.
4 4 // See also pdf-count-strings.cc for a use of
5   -// QPDFObjectHandle::TokenFilter with filterPageContents.
  5 +// QPDFObjectHandle::TokenFilter with filterContents.
6 6 //
7 7  
8 8 #include <iostream>
... ...
include/qpdf/QPDFPageObjectHelper.hh
... ... @@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
140 140 QPDF_DLL
141 141 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
142 142  
143   - // Pass a page's contents through the given TokenFilter. If a
144   - // pipeline is also provided, it will be the target of the write
145   - // methods from the token filter. If a pipeline is not specified,
146   - // any output generated by the token filter will be discarded. Use
147   - // this interface if you need to pass a page's contents through
148   - // filter for work purposes without having that filter
149   - // automatically applied to the page's contents, as happens with
150   - // addContentTokenFilter. See examples/pdf-count-strings.cc for an
151   - // example.
  143 + // Pass a page's or form XObject's contents through the given
  144 + // TokenFilter. If a pipeline is also provided, it will be the
  145 + // target of the write methods from the token filter. If a
  146 + // pipeline is not specified, any output generated by the token
  147 + // filter will be discarded. Use this interface if you need to
  148 + // pass a page's contents through filter for work purposes without
  149 + // having that filter automatically applied to the page's
  150 + // contents, as happens with addContentTokenFilter. See
  151 + // examples/pdf-count-strings.cc for an example.
  152 + QPDF_DLL
  153 + void filterContents(QPDFObjectHandle::TokenFilter* filter,
  154 + Pipeline* next = 0);
  155 +
  156 + // Old name -- calls filterContents()
152 157 QPDF_DLL
153 158 void filterPageContents(QPDFObjectHandle::TokenFilter* filter,
154 159 Pipeline* next = 0);
... ... @@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
265 270 private:
266 271 static void
267 272 removeUnreferencedResourcesHelper(
268   - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
269   - std::function<QPDFObjectHandle()> get_resource,
270   - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
  273 + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen);
271 274  
272 275 class Members
273 276 {
... ...
libqpdf/QPDFPageObjectHelper.cc
... ... @@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
435 435 QPDFObjectHandle::parse("<< /XObject << >> >>"));
436 436 InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
437 437 Pl_Buffer b("new page content");
438   - filterPageContents(&iit, &b);
  438 + filterContents(&iit, &b);
439 439 if (iit.any_images)
440 440 {
441 441 getObjectHandle().replaceKey(
... ... @@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents(
504 504 QPDFObjectHandle::TokenFilter* filter,
505 505 Pipeline* next)
506 506 {
507   - this->oh.filterPageContents(filter, next);
  507 + return filterContents(filter, next);
  508 +}
  509 +
  510 +void
  511 +QPDFPageObjectHelper::filterContents(
  512 + QPDFObjectHandle::TokenFilter* filter,
  513 + Pipeline* next)
  514 +{
  515 + if (this->oh.isFormXObject())
  516 + {
  517 + this->oh.filterAsContents(filter, next);
  518 + }
  519 + else
  520 + {
  521 + this->oh.filterPageContents(filter, next);
  522 + }
508 523 }
509 524  
510 525 void
... ... @@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const&amp; token)
554 569  
555 570 void
556 571 QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
557   - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
558   - std::function<QPDFObjectHandle()> get_resource,
559   - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
  572 + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen)
560 573 {
561   - if (seen.count(oh.getObjGen()))
  574 + if (seen.count(ph.oh.getObjGen()))
562 575 {
563 576 return;
564 577 }
565   - seen.insert(oh.getObjGen());
  578 + seen.insert(ph.oh.getObjGen());
566 579 NameWatcher nw;
567 580 try
568 581 {
569   - filter_content(&nw);
  582 + ph.filterContents(&nw);
570 583 }
571 584 catch (std::exception& e)
572 585 {
573   - oh.warnIfPossible(
  586 + ph.oh.warnIfPossible(
574 587 std::string("Unable to parse content stream: ") + e.what() +
575 588 "; not attempting to remove unreferenced objects from this page");
576 589 return;
... ... @@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
578 591 if (nw.saw_bad)
579 592 {
580 593 QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
581   - oh.warnIfPossible(
  594 + ph.oh.warnIfPossible(
582 595 "Bad token found while scanning content stream; "
583 596 "not attempting to remove unreferenced objects from this page");
584 597 return;
... ... @@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
591 604 std::vector<std::string> to_filter;
592 605 to_filter.push_back("/Font");
593 606 to_filter.push_back("/XObject");
594   - QPDFObjectHandle resources = get_resource();
  607 + QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
595 608 for (std::vector<std::string>::iterator d_iter = to_filter.begin();
596 609 d_iter != to_filter.end(); ++d_iter)
597 610 {
... ... @@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
615 628 {
616 629 QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
617 630 removeUnreferencedResourcesHelper(
618   - resource.getDict(), seen,
619   - [&resource]() {
620   - return QPDFPageObjectHelper(resource)
621   - .getAttribute("/Resources", true);
622   - },
623   - [&resource](QPDFObjectHandle::TokenFilter* f) {
624   - resource.filterAsContents(f);
625   - });
  631 + QPDFPageObjectHelper(resource), seen);
626 632 }
627 633 }
628 634 }
... ... @@ -632,12 +638,7 @@ void
632 638 QPDFPageObjectHelper::removeUnreferencedResources()
633 639 {
634 640 std::set<QPDFObjGen> seen;
635   - removeUnreferencedResourcesHelper(
636   - this->oh, seen,
637   - [this]() { return this->getAttribute("/Resources", true); },
638   - [this](QPDFObjectHandle::TokenFilter* f) {
639   - this->filterPageContents(f);
640   - });
  641 + removeUnreferencedResourcesHelper(*this, seen);
641 642 }
642 643  
643 644 QPDFPageObjectHelper
... ...