Commit 6154221edbb0e17e77da7defeeac5fe53121ef57

Authored by Jay Berkenbilt
1 parent 63ea4619

QPDFPageObjectHelper: filterPageContents -> filterContents + form XObject

ChangeLog
1 2020-12-31 Jay Berkenbilt <ejb@ql.org> 1 2020-12-31 Jay Berkenbilt <ejb@ql.org>
2 2
3 - * Rename QPDFPageObjectHelper::getPageImages to  
4 - QPDFPageObjectHelper::getImages and make it support form XObjects  
5 - as well as pages. The old name will be preserved for  
6 - compatibility. 3 + * Rename some QPDFPageObjectHelper methods and make them support
  4 + form XObjects as well as pages. The old names will be preserved
  5 + from compatibility.
  6 + - getPageImages -> getImages
  7 + - filterPageContents -> filterContents
7 8
8 * Add QPDFObjectHandle::isFormXObject to test whether an object is 9 * Add QPDFObjectHandle::isFormXObject to test whether an object is
9 a form XObject. 10 a form XObject.
examples/pdf-count-strings.cc
1 // 1 //
2 // This example illustrates the use of QPDFObjectHandle::TokenFilter 2 // This example illustrates the use of QPDFObjectHandle::TokenFilter
3 -// with filterPageContents. See also pdf-filter-tokens.cc for an  
4 -// example that uses QPDFObjectHandle::TokenFilter with  
5 -// addContentTokenFilter. 3 +// with filterContents. See also pdf-filter-tokens.cc for an example
  4 +// that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter.
6 // 5 //
7 6
8 #include <iostream> 7 #include <iostream>
@@ -108,14 +107,14 @@ int main(int argc, char* argv[]) @@ -108,14 +107,14 @@ int main(int argc, char* argv[])
108 if (pageno % 2) 107 if (pageno % 2)
109 { 108 {
110 // Ignore output for odd pages. 109 // Ignore output for odd pages.
111 - ph.filterPageContents(&counter); 110 + ph.filterContents(&counter);
112 } 111 }
113 else 112 else
114 { 113 {
115 // Write output to stdout for even pages. 114 // Write output to stdout for even pages.
116 Pl_StdioFile out("stdout", stdout); 115 Pl_StdioFile out("stdout", stdout);
117 std::cout << "% Contents of page " << pageno << std::endl; 116 std::cout << "% Contents of page " << pageno << std::endl;
118 - ph.filterPageContents(&counter, &out); 117 + ph.filterContents(&counter, &out);
119 std::cout << "\n% end " << pageno << std::endl; 118 std::cout << "\n% end " << pageno << std::endl;
120 } 119 }
121 std::cout << "Page " << pageno 120 std::cout << "Page " << pageno
examples/pdf-filter-tokens.cc
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 // This example illustrates the use of QPDFObjectHandle::TokenFilter 2 // This example illustrates the use of QPDFObjectHandle::TokenFilter
3 // with addContentTokenFilter. Please see comments inline for details. 3 // with addContentTokenFilter. Please see comments inline for details.
4 // See also pdf-count-strings.cc for a use of 4 // See also pdf-count-strings.cc for a use of
5 -// QPDFObjectHandle::TokenFilter with filterPageContents. 5 +// QPDFObjectHandle::TokenFilter with filterContents.
6 // 6 //
7 7
8 #include <iostream> 8 #include <iostream>
include/qpdf/QPDFPageObjectHelper.hh
@@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
140 QPDF_DLL 140 QPDF_DLL
141 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); 141 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
142 142
143 - // Pass a page's contents through the given TokenFilter. If a  
144 - // pipeline is also provided, it will be the target of the write  
145 - // methods from the token filter. If a pipeline is not specified,  
146 - // any output generated by the token filter will be discarded. Use  
147 - // this interface if you need to pass a page's contents through  
148 - // filter for work purposes without having that filter  
149 - // automatically applied to the page's contents, as happens with  
150 - // addContentTokenFilter. See examples/pdf-count-strings.cc for an  
151 - // example. 143 + // Pass a page's or form XObject's contents through the given
  144 + // TokenFilter. If a pipeline is also provided, it will be the
  145 + // target of the write methods from the token filter. If a
  146 + // pipeline is not specified, any output generated by the token
  147 + // filter will be discarded. Use this interface if you need to
  148 + // pass a page's contents through filter for work purposes without
  149 + // having that filter automatically applied to the page's
  150 + // contents, as happens with addContentTokenFilter. See
  151 + // examples/pdf-count-strings.cc for an example.
  152 + QPDF_DLL
  153 + void filterContents(QPDFObjectHandle::TokenFilter* filter,
  154 + Pipeline* next = 0);
  155 +
  156 + // Old name -- calls filterContents()
152 QPDF_DLL 157 QPDF_DLL
153 void filterPageContents(QPDFObjectHandle::TokenFilter* filter, 158 void filterPageContents(QPDFObjectHandle::TokenFilter* filter,
154 Pipeline* next = 0); 159 Pipeline* next = 0);
@@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
265 private: 270 private:
266 static void 271 static void
267 removeUnreferencedResourcesHelper( 272 removeUnreferencedResourcesHelper(
268 - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,  
269 - std::function<QPDFObjectHandle()> get_resource,  
270 - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content); 273 + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen);
271 274
272 class Members 275 class Members
273 { 276 {
libqpdf/QPDFPageObjectHelper.cc
@@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size) @@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
435 QPDFObjectHandle::parse("<< /XObject << >> >>")); 435 QPDFObjectHandle::parse("<< /XObject << >> >>"));
436 InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); 436 InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
437 Pl_Buffer b("new page content"); 437 Pl_Buffer b("new page content");
438 - filterPageContents(&iit, &b); 438 + filterContents(&iit, &b);
439 if (iit.any_images) 439 if (iit.any_images)
440 { 440 {
441 getObjectHandle().replaceKey( 441 getObjectHandle().replaceKey(
@@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents( @@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents(
504 QPDFObjectHandle::TokenFilter* filter, 504 QPDFObjectHandle::TokenFilter* filter,
505 Pipeline* next) 505 Pipeline* next)
506 { 506 {
507 - this->oh.filterPageContents(filter, next); 507 + return filterContents(filter, next);
  508 +}
  509 +
  510 +void
  511 +QPDFPageObjectHelper::filterContents(
  512 + QPDFObjectHandle::TokenFilter* filter,
  513 + Pipeline* next)
  514 +{
  515 + if (this->oh.isFormXObject())
  516 + {
  517 + this->oh.filterAsContents(filter, next);
  518 + }
  519 + else
  520 + {
  521 + this->oh.filterPageContents(filter, next);
  522 + }
508 } 523 }
509 524
510 void 525 void
@@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const&amp; token) @@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const&amp; token)
554 569
555 void 570 void
556 QPDFPageObjectHelper::removeUnreferencedResourcesHelper( 571 QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
557 - QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,  
558 - std::function<QPDFObjectHandle()> get_resource,  
559 - std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content) 572 + QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen)
560 { 573 {
561 - if (seen.count(oh.getObjGen())) 574 + if (seen.count(ph.oh.getObjGen()))
562 { 575 {
563 return; 576 return;
564 } 577 }
565 - seen.insert(oh.getObjGen()); 578 + seen.insert(ph.oh.getObjGen());
566 NameWatcher nw; 579 NameWatcher nw;
567 try 580 try
568 { 581 {
569 - filter_content(&nw); 582 + ph.filterContents(&nw);
570 } 583 }
571 catch (std::exception& e) 584 catch (std::exception& e)
572 { 585 {
573 - oh.warnIfPossible( 586 + ph.oh.warnIfPossible(
574 std::string("Unable to parse content stream: ") + e.what() + 587 std::string("Unable to parse content stream: ") + e.what() +
575 "; not attempting to remove unreferenced objects from this page"); 588 "; not attempting to remove unreferenced objects from this page");
576 return; 589 return;
@@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
578 if (nw.saw_bad) 591 if (nw.saw_bad)
579 { 592 {
580 QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); 593 QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
581 - oh.warnIfPossible( 594 + ph.oh.warnIfPossible(
582 "Bad token found while scanning content stream; " 595 "Bad token found while scanning content stream; "
583 "not attempting to remove unreferenced objects from this page"); 596 "not attempting to remove unreferenced objects from this page");
584 return; 597 return;
@@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
591 std::vector<std::string> to_filter; 604 std::vector<std::string> to_filter;
592 to_filter.push_back("/Font"); 605 to_filter.push_back("/Font");
593 to_filter.push_back("/XObject"); 606 to_filter.push_back("/XObject");
594 - QPDFObjectHandle resources = get_resource(); 607 + QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
595 for (std::vector<std::string>::iterator d_iter = to_filter.begin(); 608 for (std::vector<std::string>::iterator d_iter = to_filter.begin();
596 d_iter != to_filter.end(); ++d_iter) 609 d_iter != to_filter.end(); ++d_iter)
597 { 610 {
@@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
615 { 628 {
616 QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); 629 QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
617 removeUnreferencedResourcesHelper( 630 removeUnreferencedResourcesHelper(
618 - resource.getDict(), seen,  
619 - [&resource]() {  
620 - return QPDFPageObjectHelper(resource)  
621 - .getAttribute("/Resources", true);  
622 - },  
623 - [&resource](QPDFObjectHandle::TokenFilter* f) {  
624 - resource.filterAsContents(f);  
625 - }); 631 + QPDFPageObjectHelper(resource), seen);
626 } 632 }
627 } 633 }
628 } 634 }
@@ -632,12 +638,7 @@ void @@ -632,12 +638,7 @@ void
632 QPDFPageObjectHelper::removeUnreferencedResources() 638 QPDFPageObjectHelper::removeUnreferencedResources()
633 { 639 {
634 std::set<QPDFObjGen> seen; 640 std::set<QPDFObjGen> seen;
635 - removeUnreferencedResourcesHelper(  
636 - this->oh, seen,  
637 - [this]() { return this->getAttribute("/Resources", true); },  
638 - [this](QPDFObjectHandle::TokenFilter* f) {  
639 - this->filterPageContents(f);  
640 - }); 641 + removeUnreferencedResourcesHelper(*this, seen);
641 } 642 }
642 643
643 QPDFPageObjectHelper 644 QPDFPageObjectHelper