Commit be3a8c0e7a5edd30cb8a0f2e7cbc56d0e5bed982

Authored by Jay Berkenbilt
1 parent 50037fb3

Keep only referenced form fields in --pages

include/qpdf/QPDFAcroFormDocumentHelper.hh
... ... @@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
140 140 std::vector<QPDFAnnotationObjectHelper>
141 141 getWidgetAnnotationsForPage(QPDFPageObjectHelper);
142 142  
143   - // Return form fields for a page.
  143 + // Return top-level form fields for a page.
144 144 QPDF_DLL
145 145 std::vector<QPDFFormFieldObjectHelper>
146 146 getFormFieldsForPage(QPDFPageObjectHelper);
... ... @@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
210 210 QPDFAcroFormDocumentHelper* from_afdh = nullptr);
211 211  
212 212 // Copy form fields from a page in a different QPDF object to this
213   - // QPDF.
  213 + // QPDF. If copied_fields is not null, it will be initialized with
  214 + // the fields that were copied. Items in the vector are objects in
  215 + // the receiving QPDF (the one associated with this
  216 + // QPDFAcroFormDocumentHelper).
214 217 QPDF_DLL
215 218 void copyFieldsFromForeignPage(
216 219 QPDFPageObjectHelper foreign_page,
217   - QPDFAcroFormDocumentHelper& foreign_afdh);
  220 + QPDFAcroFormDocumentHelper& foreign_afdh,
  221 + std::vector<QPDFObjectHandle>* copied_fields = nullptr);
218 222  
219 223 private:
220 224 void analyze();
... ...
libqpdf/QPDFAcroFormDocumentHelper.cc
... ... @@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h)
135 135 std::vector<QPDFFormFieldObjectHelper>
136 136 QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph)
137 137 {
  138 + std::set<QPDFObjGen> added;
138 139 std::vector<QPDFFormFieldObjectHelper> result;
139 140 auto widget_annotations = getWidgetAnnotationsForPage(ph);
140 141 for (auto annot: widget_annotations)
141 142 {
142 143 auto field = getFieldForAnnotation(annot);
143 144 field = field.getTopLevelField();
144   - if (field.getObjectHandle().isDictionary())
  145 + auto og = field.getObjectHandle().getObjGen();
  146 + if (! added.count(og))
145 147 {
146   - result.push_back(field);
  148 + added.insert(og);
  149 + if (field.getObjectHandle().isDictionary())
  150 + {
  151 + result.push_back(field);
  152 + }
147 153 }
148 154 }
149 155 return result;
... ... @@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
674 680 void
675 681 QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage(
676 682 QPDFPageObjectHelper foreign_page,
677   - QPDFAcroFormDocumentHelper& foreign_afdh)
  683 + QPDFAcroFormDocumentHelper& foreign_afdh,
  684 + std::vector<QPDFObjectHandle>* copied_fields)
678 685 {
679 686 std::set<QPDFObjGen> added;
680 687 for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page))
681 688 {
682 689 auto new_field = this->qpdf.copyForeignObject(
683 690 field.getObjectHandle());
  691 + if (! new_field.isIndirect())
  692 + {
  693 + new_field = this->qpdf.makeIndirectObject(new_field);
  694 + }
684 695 auto og = new_field.getObjGen();
685 696 if (! added.count(og))
686 697 {
687 698 addFormField(new_field);
688 699 added.insert(og);
  700 + if (copied_fields)
  701 + {
  702 + copied_fields->push_back(new_field);
  703 + }
689 704 }
690 705 }
691 706 }
... ...
qpdf/qpdf.cc
... ... @@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings)
5846 5846 std::map<unsigned long long,
5847 5847 PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map;
5848 5848 auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
  5849 + std::set<QPDFObjGen> referenced_fields;
5849 5850 for (std::vector<QPDFPageData>::iterator iter =
5850 5851 parsed_specs.begin();
5851 5852 iter != parsed_specs.end(); ++iter)
... ... @@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings)
5906 5907 else if (other_afdh->hasAcroForm())
5907 5908 {
5908 5909 QTC::TC("qpdf", "qpdf copy form fields in pages");
5909   - this_afdh->copyFieldsFromForeignPage(to_copy, *other_afdh);
  5910 + std::vector<QPDFObjectHandle> copied_fields;
  5911 + this_afdh->copyFieldsFromForeignPage(
  5912 + to_copy, *other_afdh, &copied_fields);
  5913 + for (auto const& cf: copied_fields)
  5914 + {
  5915 + referenced_fields.insert(cf.getObjGen());
  5916 + }
5910 5917 }
5911 5918 }
5912 5919 if (page_data.qpdf->anyWarnings())
... ... @@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, bool&amp; warnings)
5929 5936  
5930 5937 // Delete page objects for unused page in primary. This prevents
5931 5938 // those objects from being preserved by being referred to from
5932   - // other places, such as the outlines dictionary.
  5939 + // other places, such as the outlines dictionary. Also make sure
  5940 + // we keep form fields from pages we preserved.
5933 5941 for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
5934 5942 {
5935   - if (selected_from_orig.count(QIntC::to_int(pageno)) == 0)
  5943 + auto page = orig_pages.at(pageno);
  5944 + if (selected_from_orig.count(QIntC::to_int(pageno)))
  5945 + {
  5946 + for (auto field: this_afdh->getFormFieldsForPage(page))
  5947 + {
  5948 + QTC::TC("qpdf", "qpdf pages keeping field from original");
  5949 + referenced_fields.insert(field.getObjectHandle().getObjGen());
  5950 + }
  5951 + }
  5952 + else
5936 5953 {
5937 5954 pdf.replaceObject(
5938   - orig_pages.at(pageno).getObjectHandle().getObjGen(),
  5955 + page.getObjectHandle().getObjGen(),
5939 5956 QPDFObjectHandle::newNull());
5940 5957 }
5941 5958 }
  5959 + // Remove unreferenced form fields
  5960 + if (this_afdh->hasAcroForm())
  5961 + {
  5962 + auto acroform = pdf.getRoot().getKey("/AcroForm");
  5963 + auto fields = acroform.getKey("/Fields");
  5964 + if (fields.isArray())
  5965 + {
  5966 + auto new_fields = QPDFObjectHandle::newArray();
  5967 + if (fields.isIndirect())
  5968 + {
  5969 + new_fields = pdf.makeIndirectObject(new_fields);
  5970 + }
  5971 + for (auto const& field: fields.aitems())
  5972 + {
  5973 + if (referenced_fields.count(field.getObjGen()))
  5974 + {
  5975 + new_fields.appendItem(field);
  5976 + }
  5977 + }
  5978 + if (new_fields.getArrayNItems() > 0)
  5979 + {
  5980 + QTC::TC("qpdf", "qpdf keep some fields in pages");
  5981 + acroform.replaceKey("/Fields", new_fields);
  5982 + }
  5983 + else
  5984 + {
  5985 + QTC::TC("qpdf", "qpdf no more fields in pages");
  5986 + pdf.getRoot().removeKey("/AcroForm");
  5987 + }
  5988 + }
  5989 + }
5942 5990 }
5943 5991  
5944 5992 static void handle_rotations(QPDF& pdf, Options& o)
... ...
qpdf/qpdf.testcov
... ... @@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3
577 577 QPDFAcroFormDocumentHelper modify ap matrix 0
578 578 qpdf copy form fields in split_pages 0
579 579 qpdf copy form fields in pages 0
  580 +qpdf keep some fields in pages 0
  581 +qpdf pages keeping field from original 0
  582 +qpdf no more fields in pages 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print))
2414 2414 show_ntests();
2415 2415 # ----------
2416 2416 $td->notify("--- Copy Annotations ---");
2417   -$n_tests += 21;
  2417 +$n_tests += 25;
2418 2418  
2419 2419 $td->runtest("complex copy annotations",
2420 2420 {$td->COMMAND =>
... ... @@ -2479,6 +2479,24 @@ for (my $i = 1; $i &lt;= 2; ++$i)
2479 2479 {$td->FILE => "split-out-$i.pdf"},
2480 2480 {$td->FILE => "fields-split-$i.pdf"});
2481 2481 }
  2482 +$td->runtest("keeping some fields",
  2483 + {$td->COMMAND =>
  2484 + "qpdf --static-id fields-two-pages.pdf" .
  2485 + " --pages . 1 minimal.pdf -- a.pdf"},
  2486 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  2487 + $td->NORMALIZE_NEWLINES);
  2488 +$td->runtest("check output",
  2489 + {$td->FILE => "a.pdf"},
  2490 + {$td->FILE => "kept-some-fields.pdf"});
  2491 +$td->runtest("not keeping any fields",
  2492 + {$td->COMMAND =>
  2493 + "qpdf --static-id kept-some-fields.pdf" .
  2494 + " --pages . 2 -- a.pdf"},
  2495 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  2496 + $td->NORMALIZE_NEWLINES);
  2497 +$td->runtest("check output",
  2498 + {$td->FILE => "a.pdf"},
  2499 + {$td->FILE => "kept-no-fields.pdf"});
2482 2500  
2483 2501 show_ntests();
2484 2502 # ----------
... ...
qpdf/qtest/qpdf/kept-no-fields.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/kept-some-fields.pdf 0 → 100644
No preview for this file type