Commit be3a8c0e7a5edd30cb8a0f2e7cbc56d0e5bed982
1 parent
50037fb3
Keep only referenced form fields in --pages
Showing
7 changed files
with
99 additions
and
11 deletions
include/qpdf/QPDFAcroFormDocumentHelper.hh
| @@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | @@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | ||
| 140 | std::vector<QPDFAnnotationObjectHelper> | 140 | std::vector<QPDFAnnotationObjectHelper> |
| 141 | getWidgetAnnotationsForPage(QPDFPageObjectHelper); | 141 | getWidgetAnnotationsForPage(QPDFPageObjectHelper); |
| 142 | 142 | ||
| 143 | - // Return form fields for a page. | 143 | + // Return top-level form fields for a page. |
| 144 | QPDF_DLL | 144 | QPDF_DLL |
| 145 | std::vector<QPDFFormFieldObjectHelper> | 145 | std::vector<QPDFFormFieldObjectHelper> |
| 146 | getFormFieldsForPage(QPDFPageObjectHelper); | 146 | getFormFieldsForPage(QPDFPageObjectHelper); |
| @@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | @@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | ||
| 210 | QPDFAcroFormDocumentHelper* from_afdh = nullptr); | 210 | QPDFAcroFormDocumentHelper* from_afdh = nullptr); |
| 211 | 211 | ||
| 212 | // Copy form fields from a page in a different QPDF object to this | 212 | // Copy form fields from a page in a different QPDF object to this |
| 213 | - // QPDF. | 213 | + // QPDF. If copied_fields is not null, it will be initialized with |
| 214 | + // the fields that were copied. Items in the vector are objects in | ||
| 215 | + // the receiving QPDF (the one associated with this | ||
| 216 | + // QPDFAcroFormDocumentHelper). | ||
| 214 | QPDF_DLL | 217 | QPDF_DLL |
| 215 | void copyFieldsFromForeignPage( | 218 | void copyFieldsFromForeignPage( |
| 216 | QPDFPageObjectHelper foreign_page, | 219 | QPDFPageObjectHelper foreign_page, |
| 217 | - QPDFAcroFormDocumentHelper& foreign_afdh); | 220 | + QPDFAcroFormDocumentHelper& foreign_afdh, |
| 221 | + std::vector<QPDFObjectHandle>* copied_fields = nullptr); | ||
| 218 | 222 | ||
| 219 | private: | 223 | private: |
| 220 | void analyze(); | 224 | void analyze(); |
libqpdf/QPDFAcroFormDocumentHelper.cc
| @@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) | @@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) | ||
| 135 | std::vector<QPDFFormFieldObjectHelper> | 135 | std::vector<QPDFFormFieldObjectHelper> |
| 136 | QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) | 136 | QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) |
| 137 | { | 137 | { |
| 138 | + std::set<QPDFObjGen> added; | ||
| 138 | std::vector<QPDFFormFieldObjectHelper> result; | 139 | std::vector<QPDFFormFieldObjectHelper> result; |
| 139 | auto widget_annotations = getWidgetAnnotationsForPage(ph); | 140 | auto widget_annotations = getWidgetAnnotationsForPage(ph); |
| 140 | for (auto annot: widget_annotations) | 141 | for (auto annot: widget_annotations) |
| 141 | { | 142 | { |
| 142 | auto field = getFieldForAnnotation(annot); | 143 | auto field = getFieldForAnnotation(annot); |
| 143 | field = field.getTopLevelField(); | 144 | field = field.getTopLevelField(); |
| 144 | - if (field.getObjectHandle().isDictionary()) | 145 | + auto og = field.getObjectHandle().getObjGen(); |
| 146 | + if (! added.count(og)) | ||
| 145 | { | 147 | { |
| 146 | - result.push_back(field); | 148 | + added.insert(og); |
| 149 | + if (field.getObjectHandle().isDictionary()) | ||
| 150 | + { | ||
| 151 | + result.push_back(field); | ||
| 152 | + } | ||
| 147 | } | 153 | } |
| 148 | } | 154 | } |
| 149 | return result; | 155 | return result; |
| @@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 674 | void | 680 | void |
| 675 | QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage( | 681 | QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage( |
| 676 | QPDFPageObjectHelper foreign_page, | 682 | QPDFPageObjectHelper foreign_page, |
| 677 | - QPDFAcroFormDocumentHelper& foreign_afdh) | 683 | + QPDFAcroFormDocumentHelper& foreign_afdh, |
| 684 | + std::vector<QPDFObjectHandle>* copied_fields) | ||
| 678 | { | 685 | { |
| 679 | std::set<QPDFObjGen> added; | 686 | std::set<QPDFObjGen> added; |
| 680 | for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page)) | 687 | for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page)) |
| 681 | { | 688 | { |
| 682 | auto new_field = this->qpdf.copyForeignObject( | 689 | auto new_field = this->qpdf.copyForeignObject( |
| 683 | field.getObjectHandle()); | 690 | field.getObjectHandle()); |
| 691 | + if (! new_field.isIndirect()) | ||
| 692 | + { | ||
| 693 | + new_field = this->qpdf.makeIndirectObject(new_field); | ||
| 694 | + } | ||
| 684 | auto og = new_field.getObjGen(); | 695 | auto og = new_field.getObjGen(); |
| 685 | if (! added.count(og)) | 696 | if (! added.count(og)) |
| 686 | { | 697 | { |
| 687 | addFormField(new_field); | 698 | addFormField(new_field); |
| 688 | added.insert(og); | 699 | added.insert(og); |
| 700 | + if (copied_fields) | ||
| 701 | + { | ||
| 702 | + copied_fields->push_back(new_field); | ||
| 703 | + } | ||
| 689 | } | 704 | } |
| 690 | } | 705 | } |
| 691 | } | 706 | } |
qpdf/qpdf.cc
| @@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) | @@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) | ||
| 5846 | std::map<unsigned long long, | 5846 | std::map<unsigned long long, |
| 5847 | PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map; | 5847 | PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map; |
| 5848 | auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); | 5848 | auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); |
| 5849 | + std::set<QPDFObjGen> referenced_fields; | ||
| 5849 | for (std::vector<QPDFPageData>::iterator iter = | 5850 | for (std::vector<QPDFPageData>::iterator iter = |
| 5850 | parsed_specs.begin(); | 5851 | parsed_specs.begin(); |
| 5851 | iter != parsed_specs.end(); ++iter) | 5852 | iter != parsed_specs.end(); ++iter) |
| @@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) | @@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) | ||
| 5906 | else if (other_afdh->hasAcroForm()) | 5907 | else if (other_afdh->hasAcroForm()) |
| 5907 | { | 5908 | { |
| 5908 | QTC::TC("qpdf", "qpdf copy form fields in pages"); | 5909 | QTC::TC("qpdf", "qpdf copy form fields in pages"); |
| 5909 | - this_afdh->copyFieldsFromForeignPage(to_copy, *other_afdh); | 5910 | + std::vector<QPDFObjectHandle> copied_fields; |
| 5911 | + this_afdh->copyFieldsFromForeignPage( | ||
| 5912 | + to_copy, *other_afdh, &copied_fields); | ||
| 5913 | + for (auto const& cf: copied_fields) | ||
| 5914 | + { | ||
| 5915 | + referenced_fields.insert(cf.getObjGen()); | ||
| 5916 | + } | ||
| 5910 | } | 5917 | } |
| 5911 | } | 5918 | } |
| 5912 | if (page_data.qpdf->anyWarnings()) | 5919 | if (page_data.qpdf->anyWarnings()) |
| @@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) | @@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) | ||
| 5929 | 5936 | ||
| 5930 | // Delete page objects for unused page in primary. This prevents | 5937 | // Delete page objects for unused page in primary. This prevents |
| 5931 | // those objects from being preserved by being referred to from | 5938 | // those objects from being preserved by being referred to from |
| 5932 | - // other places, such as the outlines dictionary. | 5939 | + // other places, such as the outlines dictionary. Also make sure |
| 5940 | + // we keep form fields from pages we preserved. | ||
| 5933 | for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) | 5941 | for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) |
| 5934 | { | 5942 | { |
| 5935 | - if (selected_from_orig.count(QIntC::to_int(pageno)) == 0) | 5943 | + auto page = orig_pages.at(pageno); |
| 5944 | + if (selected_from_orig.count(QIntC::to_int(pageno))) | ||
| 5945 | + { | ||
| 5946 | + for (auto field: this_afdh->getFormFieldsForPage(page)) | ||
| 5947 | + { | ||
| 5948 | + QTC::TC("qpdf", "qpdf pages keeping field from original"); | ||
| 5949 | + referenced_fields.insert(field.getObjectHandle().getObjGen()); | ||
| 5950 | + } | ||
| 5951 | + } | ||
| 5952 | + else | ||
| 5936 | { | 5953 | { |
| 5937 | pdf.replaceObject( | 5954 | pdf.replaceObject( |
| 5938 | - orig_pages.at(pageno).getObjectHandle().getObjGen(), | 5955 | + page.getObjectHandle().getObjGen(), |
| 5939 | QPDFObjectHandle::newNull()); | 5956 | QPDFObjectHandle::newNull()); |
| 5940 | } | 5957 | } |
| 5941 | } | 5958 | } |
| 5959 | + // Remove unreferenced form fields | ||
| 5960 | + if (this_afdh->hasAcroForm()) | ||
| 5961 | + { | ||
| 5962 | + auto acroform = pdf.getRoot().getKey("/AcroForm"); | ||
| 5963 | + auto fields = acroform.getKey("/Fields"); | ||
| 5964 | + if (fields.isArray()) | ||
| 5965 | + { | ||
| 5966 | + auto new_fields = QPDFObjectHandle::newArray(); | ||
| 5967 | + if (fields.isIndirect()) | ||
| 5968 | + { | ||
| 5969 | + new_fields = pdf.makeIndirectObject(new_fields); | ||
| 5970 | + } | ||
| 5971 | + for (auto const& field: fields.aitems()) | ||
| 5972 | + { | ||
| 5973 | + if (referenced_fields.count(field.getObjGen())) | ||
| 5974 | + { | ||
| 5975 | + new_fields.appendItem(field); | ||
| 5976 | + } | ||
| 5977 | + } | ||
| 5978 | + if (new_fields.getArrayNItems() > 0) | ||
| 5979 | + { | ||
| 5980 | + QTC::TC("qpdf", "qpdf keep some fields in pages"); | ||
| 5981 | + acroform.replaceKey("/Fields", new_fields); | ||
| 5982 | + } | ||
| 5983 | + else | ||
| 5984 | + { | ||
| 5985 | + QTC::TC("qpdf", "qpdf no more fields in pages"); | ||
| 5986 | + pdf.getRoot().removeKey("/AcroForm"); | ||
| 5987 | + } | ||
| 5988 | + } | ||
| 5989 | + } | ||
| 5942 | } | 5990 | } |
| 5943 | 5991 | ||
| 5944 | static void handle_rotations(QPDF& pdf, Options& o) | 5992 | static void handle_rotations(QPDF& pdf, Options& o) |
qpdf/qpdf.testcov
| @@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3 | @@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3 | ||
| 577 | QPDFAcroFormDocumentHelper modify ap matrix 0 | 577 | QPDFAcroFormDocumentHelper modify ap matrix 0 |
| 578 | qpdf copy form fields in split_pages 0 | 578 | qpdf copy form fields in split_pages 0 |
| 579 | qpdf copy form fields in pages 0 | 579 | qpdf copy form fields in pages 0 |
| 580 | +qpdf keep some fields in pages 0 | ||
| 581 | +qpdf pages keeping field from original 0 | ||
| 582 | +qpdf no more fields in pages 0 |
qpdf/qtest/qpdf.test
| @@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print)) | @@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print)) | ||
| 2414 | show_ntests(); | 2414 | show_ntests(); |
| 2415 | # ---------- | 2415 | # ---------- |
| 2416 | $td->notify("--- Copy Annotations ---"); | 2416 | $td->notify("--- Copy Annotations ---"); |
| 2417 | -$n_tests += 21; | 2417 | +$n_tests += 25; |
| 2418 | 2418 | ||
| 2419 | $td->runtest("complex copy annotations", | 2419 | $td->runtest("complex copy annotations", |
| 2420 | {$td->COMMAND => | 2420 | {$td->COMMAND => |
| @@ -2479,6 +2479,24 @@ for (my $i = 1; $i <= 2; ++$i) | @@ -2479,6 +2479,24 @@ for (my $i = 1; $i <= 2; ++$i) | ||
| 2479 | {$td->FILE => "split-out-$i.pdf"}, | 2479 | {$td->FILE => "split-out-$i.pdf"}, |
| 2480 | {$td->FILE => "fields-split-$i.pdf"}); | 2480 | {$td->FILE => "fields-split-$i.pdf"}); |
| 2481 | } | 2481 | } |
| 2482 | +$td->runtest("keeping some fields", | ||
| 2483 | + {$td->COMMAND => | ||
| 2484 | + "qpdf --static-id fields-two-pages.pdf" . | ||
| 2485 | + " --pages . 1 minimal.pdf -- a.pdf"}, | ||
| 2486 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | ||
| 2487 | + $td->NORMALIZE_NEWLINES); | ||
| 2488 | +$td->runtest("check output", | ||
| 2489 | + {$td->FILE => "a.pdf"}, | ||
| 2490 | + {$td->FILE => "kept-some-fields.pdf"}); | ||
| 2491 | +$td->runtest("not keeping any fields", | ||
| 2492 | + {$td->COMMAND => | ||
| 2493 | + "qpdf --static-id kept-some-fields.pdf" . | ||
| 2494 | + " --pages . 2 -- a.pdf"}, | ||
| 2495 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | ||
| 2496 | + $td->NORMALIZE_NEWLINES); | ||
| 2497 | +$td->runtest("check output", | ||
| 2498 | + {$td->FILE => "a.pdf"}, | ||
| 2499 | + {$td->FILE => "kept-no-fields.pdf"}); | ||
| 2482 | 2500 | ||
| 2483 | show_ntests(); | 2501 | show_ntests(); |
| 2484 | # ---------- | 2502 | # ---------- |
qpdf/qtest/qpdf/kept-no-fields.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/kept-some-fields.pdf
0 → 100644
No preview for this file type