Commit be3a8c0e7a5edd30cb8a0f2e7cbc56d0e5bed982
1 parent
50037fb3
Keep only referenced form fields in --pages
Showing
7 changed files
with
99 additions
and
11 deletions
include/qpdf/QPDFAcroFormDocumentHelper.hh
| ... | ... | @@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper |
| 140 | 140 | std::vector<QPDFAnnotationObjectHelper> |
| 141 | 141 | getWidgetAnnotationsForPage(QPDFPageObjectHelper); |
| 142 | 142 | |
| 143 | - // Return form fields for a page. | |
| 143 | + // Return top-level form fields for a page. | |
| 144 | 144 | QPDF_DLL |
| 145 | 145 | std::vector<QPDFFormFieldObjectHelper> |
| 146 | 146 | getFormFieldsForPage(QPDFPageObjectHelper); |
| ... | ... | @@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper |
| 210 | 210 | QPDFAcroFormDocumentHelper* from_afdh = nullptr); |
| 211 | 211 | |
| 212 | 212 | // Copy form fields from a page in a different QPDF object to this |
| 213 | - // QPDF. | |
| 213 | + // QPDF. If copied_fields is not null, it will be initialized with | |
| 214 | + // the fields that were copied. Items in the vector are objects in | |
| 215 | + // the receiving QPDF (the one associated with this | |
| 216 | + // QPDFAcroFormDocumentHelper). | |
| 214 | 217 | QPDF_DLL |
| 215 | 218 | void copyFieldsFromForeignPage( |
| 216 | 219 | QPDFPageObjectHelper foreign_page, |
| 217 | - QPDFAcroFormDocumentHelper& foreign_afdh); | |
| 220 | + QPDFAcroFormDocumentHelper& foreign_afdh, | |
| 221 | + std::vector<QPDFObjectHandle>* copied_fields = nullptr); | |
| 218 | 222 | |
| 219 | 223 | private: |
| 220 | 224 | void analyze(); | ... | ... |
libqpdf/QPDFAcroFormDocumentHelper.cc
| ... | ... | @@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) |
| 135 | 135 | std::vector<QPDFFormFieldObjectHelper> |
| 136 | 136 | QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) |
| 137 | 137 | { |
| 138 | + std::set<QPDFObjGen> added; | |
| 138 | 139 | std::vector<QPDFFormFieldObjectHelper> result; |
| 139 | 140 | auto widget_annotations = getWidgetAnnotationsForPage(ph); |
| 140 | 141 | for (auto annot: widget_annotations) |
| 141 | 142 | { |
| 142 | 143 | auto field = getFieldForAnnotation(annot); |
| 143 | 144 | field = field.getTopLevelField(); |
| 144 | - if (field.getObjectHandle().isDictionary()) | |
| 145 | + auto og = field.getObjectHandle().getObjGen(); | |
| 146 | + if (! added.count(og)) | |
| 145 | 147 | { |
| 146 | - result.push_back(field); | |
| 148 | + added.insert(og); | |
| 149 | + if (field.getObjectHandle().isDictionary()) | |
| 150 | + { | |
| 151 | + result.push_back(field); | |
| 152 | + } | |
| 147 | 153 | } |
| 148 | 154 | } |
| 149 | 155 | return result; |
| ... | ... | @@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations( |
| 674 | 680 | void |
| 675 | 681 | QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage( |
| 676 | 682 | QPDFPageObjectHelper foreign_page, |
| 677 | - QPDFAcroFormDocumentHelper& foreign_afdh) | |
| 683 | + QPDFAcroFormDocumentHelper& foreign_afdh, | |
| 684 | + std::vector<QPDFObjectHandle>* copied_fields) | |
| 678 | 685 | { |
| 679 | 686 | std::set<QPDFObjGen> added; |
| 680 | 687 | for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page)) |
| 681 | 688 | { |
| 682 | 689 | auto new_field = this->qpdf.copyForeignObject( |
| 683 | 690 | field.getObjectHandle()); |
| 691 | + if (! new_field.isIndirect()) | |
| 692 | + { | |
| 693 | + new_field = this->qpdf.makeIndirectObject(new_field); | |
| 694 | + } | |
| 684 | 695 | auto og = new_field.getObjGen(); |
| 685 | 696 | if (! added.count(og)) |
| 686 | 697 | { |
| 687 | 698 | addFormField(new_field); |
| 688 | 699 | added.insert(og); |
| 700 | + if (copied_fields) | |
| 701 | + { | |
| 702 | + copied_fields->push_back(new_field); | |
| 703 | + } | |
| 689 | 704 | } |
| 690 | 705 | } |
| 691 | 706 | } | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) |
| 5846 | 5846 | std::map<unsigned long long, |
| 5847 | 5847 | PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map; |
| 5848 | 5848 | auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); |
| 5849 | + std::set<QPDFObjGen> referenced_fields; | |
| 5849 | 5850 | for (std::vector<QPDFPageData>::iterator iter = |
| 5850 | 5851 | parsed_specs.begin(); |
| 5851 | 5852 | iter != parsed_specs.end(); ++iter) |
| ... | ... | @@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) |
| 5906 | 5907 | else if (other_afdh->hasAcroForm()) |
| 5907 | 5908 | { |
| 5908 | 5909 | QTC::TC("qpdf", "qpdf copy form fields in pages"); |
| 5909 | - this_afdh->copyFieldsFromForeignPage(to_copy, *other_afdh); | |
| 5910 | + std::vector<QPDFObjectHandle> copied_fields; | |
| 5911 | + this_afdh->copyFieldsFromForeignPage( | |
| 5912 | + to_copy, *other_afdh, &copied_fields); | |
| 5913 | + for (auto const& cf: copied_fields) | |
| 5914 | + { | |
| 5915 | + referenced_fields.insert(cf.getObjGen()); | |
| 5916 | + } | |
| 5910 | 5917 | } |
| 5911 | 5918 | } |
| 5912 | 5919 | if (page_data.qpdf->anyWarnings()) |
| ... | ... | @@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) |
| 5929 | 5936 | |
| 5930 | 5937 | // Delete page objects for unused page in primary. This prevents |
| 5931 | 5938 | // those objects from being preserved by being referred to from |
| 5932 | - // other places, such as the outlines dictionary. | |
| 5939 | + // other places, such as the outlines dictionary. Also make sure | |
| 5940 | + // we keep form fields from pages we preserved. | |
| 5933 | 5941 | for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) |
| 5934 | 5942 | { |
| 5935 | - if (selected_from_orig.count(QIntC::to_int(pageno)) == 0) | |
| 5943 | + auto page = orig_pages.at(pageno); | |
| 5944 | + if (selected_from_orig.count(QIntC::to_int(pageno))) | |
| 5945 | + { | |
| 5946 | + for (auto field: this_afdh->getFormFieldsForPage(page)) | |
| 5947 | + { | |
| 5948 | + QTC::TC("qpdf", "qpdf pages keeping field from original"); | |
| 5949 | + referenced_fields.insert(field.getObjectHandle().getObjGen()); | |
| 5950 | + } | |
| 5951 | + } | |
| 5952 | + else | |
| 5936 | 5953 | { |
| 5937 | 5954 | pdf.replaceObject( |
| 5938 | - orig_pages.at(pageno).getObjectHandle().getObjGen(), | |
| 5955 | + page.getObjectHandle().getObjGen(), | |
| 5939 | 5956 | QPDFObjectHandle::newNull()); |
| 5940 | 5957 | } |
| 5941 | 5958 | } |
| 5959 | + // Remove unreferenced form fields | |
| 5960 | + if (this_afdh->hasAcroForm()) | |
| 5961 | + { | |
| 5962 | + auto acroform = pdf.getRoot().getKey("/AcroForm"); | |
| 5963 | + auto fields = acroform.getKey("/Fields"); | |
| 5964 | + if (fields.isArray()) | |
| 5965 | + { | |
| 5966 | + auto new_fields = QPDFObjectHandle::newArray(); | |
| 5967 | + if (fields.isIndirect()) | |
| 5968 | + { | |
| 5969 | + new_fields = pdf.makeIndirectObject(new_fields); | |
| 5970 | + } | |
| 5971 | + for (auto const& field: fields.aitems()) | |
| 5972 | + { | |
| 5973 | + if (referenced_fields.count(field.getObjGen())) | |
| 5974 | + { | |
| 5975 | + new_fields.appendItem(field); | |
| 5976 | + } | |
| 5977 | + } | |
| 5978 | + if (new_fields.getArrayNItems() > 0) | |
| 5979 | + { | |
| 5980 | + QTC::TC("qpdf", "qpdf keep some fields in pages"); | |
| 5981 | + acroform.replaceKey("/Fields", new_fields); | |
| 5982 | + } | |
| 5983 | + else | |
| 5984 | + { | |
| 5985 | + QTC::TC("qpdf", "qpdf no more fields in pages"); | |
| 5986 | + pdf.getRoot().removeKey("/AcroForm"); | |
| 5987 | + } | |
| 5988 | + } | |
| 5989 | + } | |
| 5942 | 5990 | } |
| 5943 | 5991 | |
| 5944 | 5992 | static void handle_rotations(QPDF& pdf, Options& o) | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3 |
| 577 | 577 | QPDFAcroFormDocumentHelper modify ap matrix 0 |
| 578 | 578 | qpdf copy form fields in split_pages 0 |
| 579 | 579 | qpdf copy form fields in pages 0 |
| 580 | +qpdf keep some fields in pages 0 | |
| 581 | +qpdf pages keeping field from original 0 | |
| 582 | +qpdf no more fields in pages 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print)) |
| 2414 | 2414 | show_ntests(); |
| 2415 | 2415 | # ---------- |
| 2416 | 2416 | $td->notify("--- Copy Annotations ---"); |
| 2417 | -$n_tests += 21; | |
| 2417 | +$n_tests += 25; | |
| 2418 | 2418 | |
| 2419 | 2419 | $td->runtest("complex copy annotations", |
| 2420 | 2420 | {$td->COMMAND => |
| ... | ... | @@ -2479,6 +2479,24 @@ for (my $i = 1; $i <= 2; ++$i) |
| 2479 | 2479 | {$td->FILE => "split-out-$i.pdf"}, |
| 2480 | 2480 | {$td->FILE => "fields-split-$i.pdf"}); |
| 2481 | 2481 | } |
| 2482 | +$td->runtest("keeping some fields", | |
| 2483 | + {$td->COMMAND => | |
| 2484 | + "qpdf --static-id fields-two-pages.pdf" . | |
| 2485 | + " --pages . 1 minimal.pdf -- a.pdf"}, | |
| 2486 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | |
| 2487 | + $td->NORMALIZE_NEWLINES); | |
| 2488 | +$td->runtest("check output", | |
| 2489 | + {$td->FILE => "a.pdf"}, | |
| 2490 | + {$td->FILE => "kept-some-fields.pdf"}); | |
| 2491 | +$td->runtest("not keeping any fields", | |
| 2492 | + {$td->COMMAND => | |
| 2493 | + "qpdf --static-id kept-some-fields.pdf" . | |
| 2494 | + " --pages . 2 -- a.pdf"}, | |
| 2495 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | |
| 2496 | + $td->NORMALIZE_NEWLINES); | |
| 2497 | +$td->runtest("check output", | |
| 2498 | + {$td->FILE => "a.pdf"}, | |
| 2499 | + {$td->FILE => "kept-no-fields.pdf"}); | |
| 2482 | 2500 | |
| 2483 | 2501 | show_ntests(); |
| 2484 | 2502 | # ---------- | ... | ... |
qpdf/qtest/qpdf/kept-no-fields.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/kept-some-fields.pdf
0 → 100644
No preview for this file type