Commit be3a8c0e7a5edd30cb8a0f2e7cbc56d0e5bed982

Authored by Jay Berkenbilt
1 parent 50037fb3

Keep only referenced form fields in --pages

include/qpdf/QPDFAcroFormDocumentHelper.hh
@@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper @@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
140 std::vector<QPDFAnnotationObjectHelper> 140 std::vector<QPDFAnnotationObjectHelper>
141 getWidgetAnnotationsForPage(QPDFPageObjectHelper); 141 getWidgetAnnotationsForPage(QPDFPageObjectHelper);
142 142
143 - // Return form fields for a page. 143 + // Return top-level form fields for a page.
144 QPDF_DLL 144 QPDF_DLL
145 std::vector<QPDFFormFieldObjectHelper> 145 std::vector<QPDFFormFieldObjectHelper>
146 getFormFieldsForPage(QPDFPageObjectHelper); 146 getFormFieldsForPage(QPDFPageObjectHelper);
@@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper @@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
210 QPDFAcroFormDocumentHelper* from_afdh = nullptr); 210 QPDFAcroFormDocumentHelper* from_afdh = nullptr);
211 211
212 // Copy form fields from a page in a different QPDF object to this 212 // Copy form fields from a page in a different QPDF object to this
213 - // QPDF. 213 + // QPDF. If copied_fields is not null, it will be initialized with
  214 + // the fields that were copied. Items in the vector are objects in
  215 + // the receiving QPDF (the one associated with this
  216 + // QPDFAcroFormDocumentHelper).
214 QPDF_DLL 217 QPDF_DLL
215 void copyFieldsFromForeignPage( 218 void copyFieldsFromForeignPage(
216 QPDFPageObjectHelper foreign_page, 219 QPDFPageObjectHelper foreign_page,
217 - QPDFAcroFormDocumentHelper& foreign_afdh); 220 + QPDFAcroFormDocumentHelper& foreign_afdh,
  221 + std::vector<QPDFObjectHandle>* copied_fields = nullptr);
218 222
219 private: 223 private:
220 void analyze(); 224 void analyze();
libqpdf/QPDFAcroFormDocumentHelper.cc
@@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) @@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h)
135 std::vector<QPDFFormFieldObjectHelper> 135 std::vector<QPDFFormFieldObjectHelper>
136 QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) 136 QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph)
137 { 137 {
  138 + std::set<QPDFObjGen> added;
138 std::vector<QPDFFormFieldObjectHelper> result; 139 std::vector<QPDFFormFieldObjectHelper> result;
139 auto widget_annotations = getWidgetAnnotationsForPage(ph); 140 auto widget_annotations = getWidgetAnnotationsForPage(ph);
140 for (auto annot: widget_annotations) 141 for (auto annot: widget_annotations)
141 { 142 {
142 auto field = getFieldForAnnotation(annot); 143 auto field = getFieldForAnnotation(annot);
143 field = field.getTopLevelField(); 144 field = field.getTopLevelField();
144 - if (field.getObjectHandle().isDictionary()) 145 + auto og = field.getObjectHandle().getObjGen();
  146 + if (! added.count(og))
145 { 147 {
146 - result.push_back(field); 148 + added.insert(og);
  149 + if (field.getObjectHandle().isDictionary())
  150 + {
  151 + result.push_back(field);
  152 + }
147 } 153 }
148 } 154 }
149 return result; 155 return result;
@@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
674 void 680 void
675 QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage( 681 QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage(
676 QPDFPageObjectHelper foreign_page, 682 QPDFPageObjectHelper foreign_page,
677 - QPDFAcroFormDocumentHelper& foreign_afdh) 683 + QPDFAcroFormDocumentHelper& foreign_afdh,
  684 + std::vector<QPDFObjectHandle>* copied_fields)
678 { 685 {
679 std::set<QPDFObjGen> added; 686 std::set<QPDFObjGen> added;
680 for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page)) 687 for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page))
681 { 688 {
682 auto new_field = this->qpdf.copyForeignObject( 689 auto new_field = this->qpdf.copyForeignObject(
683 field.getObjectHandle()); 690 field.getObjectHandle());
  691 + if (! new_field.isIndirect())
  692 + {
  693 + new_field = this->qpdf.makeIndirectObject(new_field);
  694 + }
684 auto og = new_field.getObjGen(); 695 auto og = new_field.getObjGen();
685 if (! added.count(og)) 696 if (! added.count(og))
686 { 697 {
687 addFormField(new_field); 698 addFormField(new_field);
688 added.insert(og); 699 added.insert(og);
  700 + if (copied_fields)
  701 + {
  702 + copied_fields->push_back(new_field);
  703 + }
689 } 704 }
690 } 705 }
691 } 706 }
qpdf/qpdf.cc
@@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings) @@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings)
5846 std::map<unsigned long long, 5846 std::map<unsigned long long,
5847 PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map; 5847 PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map;
5848 auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); 5848 auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
  5849 + std::set<QPDFObjGen> referenced_fields;
5849 for (std::vector<QPDFPageData>::iterator iter = 5850 for (std::vector<QPDFPageData>::iterator iter =
5850 parsed_specs.begin(); 5851 parsed_specs.begin();
5851 iter != parsed_specs.end(); ++iter) 5852 iter != parsed_specs.end(); ++iter)
@@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings) @@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings)
5906 else if (other_afdh->hasAcroForm()) 5907 else if (other_afdh->hasAcroForm())
5907 { 5908 {
5908 QTC::TC("qpdf", "qpdf copy form fields in pages"); 5909 QTC::TC("qpdf", "qpdf copy form fields in pages");
5909 - this_afdh->copyFieldsFromForeignPage(to_copy, *other_afdh); 5910 + std::vector<QPDFObjectHandle> copied_fields;
  5911 + this_afdh->copyFieldsFromForeignPage(
  5912 + to_copy, *other_afdh, &copied_fields);
  5913 + for (auto const& cf: copied_fields)
  5914 + {
  5915 + referenced_fields.insert(cf.getObjGen());
  5916 + }
5910 } 5917 }
5911 } 5918 }
5912 if (page_data.qpdf->anyWarnings()) 5919 if (page_data.qpdf->anyWarnings())
@@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings) @@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings)
5929 5936
5930 // Delete page objects for unused page in primary. This prevents 5937 // Delete page objects for unused page in primary. This prevents
5931 // those objects from being preserved by being referred to from 5938 // those objects from being preserved by being referred to from
5932 - // other places, such as the outlines dictionary. 5939 + // other places, such as the outlines dictionary. Also make sure
  5940 + // we keep form fields from pages we preserved.
5933 for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) 5941 for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
5934 { 5942 {
5935 - if (selected_from_orig.count(QIntC::to_int(pageno)) == 0) 5943 + auto page = orig_pages.at(pageno);
  5944 + if (selected_from_orig.count(QIntC::to_int(pageno)))
  5945 + {
  5946 + for (auto field: this_afdh->getFormFieldsForPage(page))
  5947 + {
  5948 + QTC::TC("qpdf", "qpdf pages keeping field from original");
  5949 + referenced_fields.insert(field.getObjectHandle().getObjGen());
  5950 + }
  5951 + }
  5952 + else
5936 { 5953 {
5937 pdf.replaceObject( 5954 pdf.replaceObject(
5938 - orig_pages.at(pageno).getObjectHandle().getObjGen(), 5955 + page.getObjectHandle().getObjGen(),
5939 QPDFObjectHandle::newNull()); 5956 QPDFObjectHandle::newNull());
5940 } 5957 }
5941 } 5958 }
  5959 + // Remove unreferenced form fields
  5960 + if (this_afdh->hasAcroForm())
  5961 + {
  5962 + auto acroform = pdf.getRoot().getKey("/AcroForm");
  5963 + auto fields = acroform.getKey("/Fields");
  5964 + if (fields.isArray())
  5965 + {
  5966 + auto new_fields = QPDFObjectHandle::newArray();
  5967 + if (fields.isIndirect())
  5968 + {
  5969 + new_fields = pdf.makeIndirectObject(new_fields);
  5970 + }
  5971 + for (auto const& field: fields.aitems())
  5972 + {
  5973 + if (referenced_fields.count(field.getObjGen()))
  5974 + {
  5975 + new_fields.appendItem(field);
  5976 + }
  5977 + }
  5978 + if (new_fields.getArrayNItems() > 0)
  5979 + {
  5980 + QTC::TC("qpdf", "qpdf keep some fields in pages");
  5981 + acroform.replaceKey("/Fields", new_fields);
  5982 + }
  5983 + else
  5984 + {
  5985 + QTC::TC("qpdf", "qpdf no more fields in pages");
  5986 + pdf.getRoot().removeKey("/AcroForm");
  5987 + }
  5988 + }
  5989 + }
5942 } 5990 }
5943 5991
5944 static void handle_rotations(QPDF& pdf, Options& o) 5992 static void handle_rotations(QPDF& pdf, Options& o)
qpdf/qpdf.testcov
@@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3 @@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3
577 QPDFAcroFormDocumentHelper modify ap matrix 0 577 QPDFAcroFormDocumentHelper modify ap matrix 0
578 qpdf copy form fields in split_pages 0 578 qpdf copy form fields in split_pages 0
579 qpdf copy form fields in pages 0 579 qpdf copy form fields in pages 0
  580 +qpdf keep some fields in pages 0
  581 +qpdf pages keeping field from original 0
  582 +qpdf no more fields in pages 0
qpdf/qtest/qpdf.test
@@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print)) @@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print))
2414 show_ntests(); 2414 show_ntests();
2415 # ---------- 2415 # ----------
2416 $td->notify("--- Copy Annotations ---"); 2416 $td->notify("--- Copy Annotations ---");
2417 -$n_tests += 21; 2417 +$n_tests += 25;
2418 2418
2419 $td->runtest("complex copy annotations", 2419 $td->runtest("complex copy annotations",
2420 {$td->COMMAND => 2420 {$td->COMMAND =>
@@ -2479,6 +2479,24 @@ for (my $i = 1; $i &lt;= 2; ++$i) @@ -2479,6 +2479,24 @@ for (my $i = 1; $i &lt;= 2; ++$i)
2479 {$td->FILE => "split-out-$i.pdf"}, 2479 {$td->FILE => "split-out-$i.pdf"},
2480 {$td->FILE => "fields-split-$i.pdf"}); 2480 {$td->FILE => "fields-split-$i.pdf"});
2481 } 2481 }
  2482 +$td->runtest("keeping some fields",
  2483 + {$td->COMMAND =>
  2484 + "qpdf --static-id fields-two-pages.pdf" .
  2485 + " --pages . 1 minimal.pdf -- a.pdf"},
  2486 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  2487 + $td->NORMALIZE_NEWLINES);
  2488 +$td->runtest("check output",
  2489 + {$td->FILE => "a.pdf"},
  2490 + {$td->FILE => "kept-some-fields.pdf"});
  2491 +$td->runtest("not keeping any fields",
  2492 + {$td->COMMAND =>
  2493 + "qpdf --static-id kept-some-fields.pdf" .
  2494 + " --pages . 2 -- a.pdf"},
  2495 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  2496 + $td->NORMALIZE_NEWLINES);
  2497 +$td->runtest("check output",
  2498 + {$td->FILE => "a.pdf"},
  2499 + {$td->FILE => "kept-no-fields.pdf"});
2482 2500
2483 show_ntests(); 2501 show_ntests();
2484 # ---------- 2502 # ----------
qpdf/qtest/qpdf/kept-no-fields.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/kept-some-fields.pdf 0 → 100644
No preview for this file type