Commit 7b4ecb6019b810feee0e2b077dbbe842ea78d531

Authored by m-holger
Committed by GitHub
2 parents 96e3677c f3109f67

Merge pull request #1617 from m-holger/i1403

Add `--remove-acroform` option to exclude AcroForm dictionary
include/qpdf/auto_job_c_main.hh
... ... @@ -32,6 +32,7 @@ QPDF_DLL Config* progress();
32 32 QPDF_DLL Config* qdf();
33 33 QPDF_DLL Config* rawStreamData();
34 34 QPDF_DLL Config* recompressFlate();
  35 +QPDF_DLL Config* removeAcroform();
35 36 QPDF_DLL Config* removeInfo();
36 37 QPDF_DLL Config* removeMetadata();
37 38 QPDF_DLL Config* removePageLabels();
... ...
job.sums
... ... @@ -5,17 +5,17 @@ include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf2
5 5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
6 6 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
7 7 include/qpdf/auto_job_c_global.hh 7df0ff87d18d7fa6d57437960377509420b6b6eb9527b534996f86d3bd7a0ddc
8   -include/qpdf/auto_job_c_main.hh b865eb827356554763bb8349eadfcbc5cb260f80e025a5e229467c525007356d
  8 +include/qpdf/auto_job_c_main.hh 7a7cc311e194b778ea7568ece2335c00c9bdd8162431c264af2f6105548efc5f
9 9 include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506
10 10 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62
11   -job.yml fa98c8444c8a22a89aeeb76670aa5919aa7a86ebfac2eb45018602fbc7e45b79
  11 +job.yml 442ef5e7fb8027a0f3cd4816b614d59abb327b359e861d87426b706db142d2bc
12 12 libqpdf/qpdf/auto_job_decl.hh d612a02839e4f20a80e1c6a3ba09c17187fccddc3581ec7ebb1e3919ffd6801d
13   -libqpdf/qpdf/auto_job_help.hh 7503b1083c952ace12976857c8ece4e1af67788af9f827fb4248bd22329f93cd
14   -libqpdf/qpdf/auto_job_init.hh 10a697528d4cae1ac566ee7612f62e611190b3c10c0021862a77fa7e4f330570
  13 +libqpdf/qpdf/auto_job_help.hh 43d0c1f4477949d1a4facc6984e9e341758201f4884e8d0d82ffc8dd795b2d38
  14 +libqpdf/qpdf/auto_job_init.hh 2bb9d1524817e7150da55790ba0ac5ed8042ff4ff9688d70a927ee95903849a4
15 15 libqpdf/qpdf/auto_job_json_decl.hh 7dbb83ddadcea39bfd1faa4ca061e1e3c3134d693b8ae634b463e7e19dc8bd0a
16   -libqpdf/qpdf/auto_job_json_init.hh e9cacbcb78ca250a962c226a935067ef9b76f5485bae7e5302eea0a1a8e2ff65
17   -libqpdf/qpdf/auto_job_schema.hh 2b974a436c5b4d03fb38258d6213f993cfa9f673834cebe754b4c7ad657481c9
  16 +libqpdf/qpdf/auto_job_json_init.hh 33934c23235c760a1fc1375c74f4fd52c8f5accf655c510fa3dd9a2be293b61d
  17 +libqpdf/qpdf/auto_job_schema.hh 81b8d57f05f5125a722912b6ee5e10fc6b02ae68e040108b75f32fc6527c02b1
18 18 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
19   -manual/cli.rst be2b9366e953fce743c115db00eeedc7894e547a9222133395e929f535fa9836
20   -manual/qpdf.1 c1d6e58e37aed1b8d434b37edd1837b7261c9933b09d64bf3915dc3f35d6cccb
  19 +manual/cli.rst efbce4b34fefbe1f46fca9c9693af245a7f7e2b63c0853c464569087aa49cb18
  20 +manual/qpdf.1 0567419e4f6a2b8facd03b9febaf65a384dd68e0e327fc058e77c21121a6cf17
21 21 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
... ...
... ... @@ -145,6 +145,7 @@ options:
145 145 - qdf
146 146 - raw-stream-data
147 147 - recompress-flate
  148 + - remove-acroform
148 149 - remove-info
149 150 - remove-metadata
150 151 - remove-page-labels
... ... @@ -468,6 +469,7 @@ json:
468 469 - Pages.file:
469 470 Pages.password:
470 471 range:
  472 + remove-acroform:
471 473 remove-info:
472 474 remove-metadata:
473 475 remove-page-labels:
... ...
libqpdf/QPDFJob.cc
... ... @@ -438,6 +438,9 @@ QPDFJob::createQPDF()
438 438 m->warnings |= m->inputs.clear();
439 439  
440 440 auto root = pdf.getRoot();
  441 + if (m->remove_acroform) {
  442 + root.erase("/AcroForm");
  443 + }
441 444 if (m->remove_info) {
442 445 auto trailer = pdf.getTrailer();
443 446 auto mod_date = trailer["/Info"]["/ModDate"];
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -563,6 +563,13 @@ QPDFJob::Config::removeAttachment(std::string const& parameter)
563 563 }
564 564  
565 565 QPDFJob::Config*
  566 +QPDFJob::Config::removeAcroform()
  567 +{
  568 + o.m->remove_acroform = true;
  569 + return this;
  570 +}
  571 +
  572 +QPDFJob::Config*
566 573 QPDFJob::Config::removeInfo()
567 574 {
568 575 o.m->remove_info = true;
... ...
libqpdf/qpdf/QPDFJob_private.hh
... ... @@ -250,6 +250,7 @@ class QPDFJob::Members
250 250 bool optimize_images{false};
251 251 bool externalize_inline_images{false};
252 252 bool keep_inline_images{false};
  253 + bool remove_acroform{false};
253 254 bool remove_info{false};
254 255 bool remove_metadata{false};
255 256 bool remove_page_labels{false};
... ...
libqpdf/qpdf/auto_job_help.hh
... ... @@ -430,6 +430,11 @@ ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images
430 430 }
431 431 static void add_help_5(QPDFArgParser& ap)
432 432 {
  433 +ap.addOptionHelp("--remove-acroform", "modification", "remove the interactive form dictionary", R"(Exclude the interactive form dictionary from the output file. This
  434 +option only removes the interactive form dictionary from the
  435 +document catalog. It does not remove form field dictionaries or
  436 +widget annotations. See the manual for more detail.
  437 +)");
433 438 ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file.
434 439 )");
435 440 ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file.
... ... @@ -642,6 +647,9 @@ ap.addOptionHelp("--cleartext-metadata", "encryption", "don't encrypt metadata",
642 647 encrypting the rest of the document. This option is not
643 648 available with 40-bit encryption.
644 649 )");
  650 +}
  651 +static void add_help_6(QPDFArgParser& ap)
  652 +{
645 653 ap.addOptionHelp("--use-aes", "encryption", "use AES with 128-bit encryption", R"(--use-aes=[y|n]
646 654  
647 655 Enables/disables use of the more secure AES encryption with
... ... @@ -650,9 +658,6 @@ version to be at least 1.6. This option is only available with
650 658 128-bit encryption. The default is "n" for compatibility
651 659 reasons. Use 256-bit encryption instead.
652 660 )");
653   -}
654   -static void add_help_6(QPDFArgParser& ap)
655   -{
656 661 ap.addOptionHelp("--allow-insecure", "encryption", "allow empty owner passwords", R"(Allow creation of PDF files with empty owner passwords and
657 662 non-empty user passwords when using 256-bit encryption.
658 663 )");
... ... @@ -828,14 +833,14 @@ Specify the attachment's modification date in PDF format;
828 833 defaults to the current time. Run qpdf --help=pdf-dates for
829 834 information about the date format.
830 835 )");
  836 +}
  837 +static void add_help_7(QPDFArgParser& ap)
  838 +{
831 839 ap.addOptionHelp("--mimetype", "add-attachment", "attachment mime type, e.g. application/pdf", R"(--mimetype=type/subtype
832 840  
833 841 Specify the mime type for the attachment, such as text/plain,
834 842 application/pdf, image/png, etc.
835 843 )");
836   -}
837   -static void add_help_7(QPDFArgParser& ap)
838   -{
839 844 ap.addOptionHelp("--description", "add-attachment", "set attachment's description", R"(--description="text"
840 845  
841 846 Supply descriptive text for the attachment, displayed by some
... ... @@ -924,12 +929,12 @@ Useful for scripts.
924 929 ap.addOptionHelp("--show-pages", "inspection", "display page dictionary information", R"(Show the object and generation number for each page dictionary
925 930 object and for each content stream associated with the page.
926 931 )");
927   -ap.addOptionHelp("--with-images", "inspection", "include image details with --show-pages", R"(When used with --show-pages, also shows the object and
928   -generation numbers for the image objects on each page.
929   -)");
930 932 }
931 933 static void add_help_8(QPDFArgParser& ap)
932 934 {
  935 +ap.addOptionHelp("--with-images", "inspection", "include image details with --show-pages", R"(When used with --show-pages, also shows the object and
  936 +generation numbers for the image objects on each page.
  937 +)");
933 938 ap.addOptionHelp("--list-attachments", "inspection", "list embedded files", R"(Show the key and stream number for each embedded file. Combine
934 939 with --verbose for more detailed information.
935 940 )");
... ... @@ -1033,6 +1038,9 @@ parsing. The limit applies when the PDF document's xref table is undamaged
1033 1038 and the object itself can be parsed without errors. The default limit
1034 1039 is 4,294,967,295. See also --parser-max-container-size-damaged.
1035 1040 )");
  1041 +}
  1042 +static void add_help_9(QPDFArgParser& ap)
  1043 +{
1036 1044 ap.addOptionHelp("--parser-max-container-size-damaged", "global", "set the maximum container size while parsing damaged files", R"(--parser-max-container-size-damaged=n
1037 1045  
1038 1046 Set the maximum number of top-level objects allowed in a container while
... ... @@ -1041,9 +1049,6 @@ or the object itself is damaged. The limit also applies when parsing
1041 1049 xref streams. The default limit is 5,000.
1042 1050 See also --parser-max-container-size.
1043 1051 )");
1044   -}
1045   -static void add_help_9(QPDFArgParser& ap)
1046   -{
1047 1052 ap.addOptionHelp("--max-stream-filters", "global", "set the maximum number of filters allowed when filtering streams", R"(--max-stream-filters=n
1048 1053  
1049 1054 An excessive number of stream filters is usually a sign that a file
... ...
libqpdf/qpdf/auto_job_init.hh
... ... @@ -77,6 +77,7 @@ this->ap.addBare("progress", [this](){c_main->progress();});
77 77 this->ap.addBare("qdf", [this](){c_main->qdf();});
78 78 this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();});
79 79 this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
  80 +this->ap.addBare("remove-acroform", [this](){c_main->removeAcroform();});
80 81 this->ap.addBare("remove-info", [this](){c_main->removeInfo();});
81 82 this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();});
82 83 this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
... ...
libqpdf/qpdf/auto_job_json_init.hh
... ... @@ -436,6 +436,9 @@ addParameter([this](std::string const& p) { c_pages->range(p); });
436 436 popHandler(); // key: range
437 437 popHandler(); // array: .pages[]
438 438 popHandler(); // key: pages
  439 +pushKey("removeAcroform");
  440 +addBare([this]() { c_main->removeAcroform(); });
  441 +popHandler(); // key: removeAcroform
439 442 pushKey("removeInfo");
440 443 addBare([this]() { c_main->removeInfo(); });
441 444 popHandler(); // key: removeInfo
... ...
libqpdf/qpdf/auto_job_schema.hh
... ... @@ -154,6 +154,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
154 154 "range": "page range"
155 155 }
156 156 ],
  157 + "removeAcroform": "remove the interactive form dictionary",
157 158 "removeInfo": "remove file information",
158 159 "removeMetadata": "remove metadata",
159 160 "removePageLabels": "remove explicit page numbers",
... ...
manual/cli.rst
... ... @@ -1641,6 +1641,14 @@ Related Options
1641 1641 appearances with some limitations. See the
1642 1642 :qpdf:ref:`--generate-appearances` option for details.
1643 1643  
  1644 + In some damaged files with interactive form fields the form fields
  1645 + in the document's AcroForm structure may be out-of-sync with the
  1646 + corresponding widget annotations on the page. In this case, some
  1647 + viewers may after flattening show some field values twice with
  1648 + a slight offset. In this situation it may help to remove the
  1649 + AcroForm entry in the document catalog using the
  1650 + :qpdf:ref:`--remove-acroform` option after flattening.
  1651 +
1644 1652 .. qpdf:option:: --rotate=[+|-]angle[:page-range]
1645 1653  
1646 1654 .. help: rotate pages
... ... @@ -1810,6 +1818,30 @@ Related Options
1810 1818 Prevent inline images from being included in image optimization
1811 1819 done by :qpdf:ref:`--optimize-images`.
1812 1820  
  1821 +.. qpdf:option:: --remove-acroform
  1822 +
  1823 + .. help: remove the interactive form dictionary
  1824 +
  1825 + Exclude the interactive form dictionary from the output file. This
  1826 + option only removes the interactive form dictionary from the
  1827 + document catalog. It does not remove form field dictionaries or
  1828 + widget annotations. See the manual for more detail.
  1829 +
  1830 +Exclude the interactive form dictionary from the output file. This
  1831 +option only removes the interactive form dictionary from the
  1832 +document catalog. It does not remove form field dictionaries or the
  1833 +associated widget annotations.
  1834 +
  1835 +In some damaged files with interactive form fields the form fields
  1836 +in the document's AcroForm structure may be out-of-sync with the
  1837 +corresponding widget annotations on the page. In this case,
  1838 +different viewers may display different field values, and after flattening
  1839 +annotations with the :qpdf:ref:`--flatten-annotations` option,
  1840 +some viewers may show some field values twice with
  1841 +a slight offset. In this situation it may help to remove the
  1842 +AcroForm entry in the document catalog using this option. **Users should
  1843 +check the output file carefully after using this option**.
  1844 +
1813 1845 .. qpdf:option:: --remove-info
1814 1846  
1815 1847 .. help: remove file information
... ...
manual/qpdf.1
... ... @@ -545,6 +545,12 @@ Don't optimize images whose area in pixels is below the specified value.
545 545 .B --keep-inline-images \-\- exclude inline images from optimization
546 546 Prevent inline images from being considered by --optimize-images.
547 547 .TP
  548 +.B --remove-acroform \-\- remove the interactive form dictionary
  549 +Exclude the interactive form dictionary from the output file. This
  550 +option only removes the interactive form dictionary from the
  551 +document catalog. It does not remove form field dictionaries or
  552 +widget annotations. See the manual for more detail.
  553 +.TP
548 554 .B --remove-info \-\- remove file information
549 555 Exclude file information (except modification date) from the output file.
550 556 .TP
... ...
manual/release-notes.rst
... ... @@ -98,6 +98,11 @@ more detail.
98 98 into the environment during completion and to correctly handle
99 99 ``bashcompinit`` for zsh users.
100 100  
  101 + - Add new :qpdf:ref:`--remove-acroform` option to exclude the AcroForm
  102 + dictionary from the output PDF. This option can be useful particularly
  103 + when the :qpdf:ref:`--flatten-annotations` option is used on damaged
  104 + PDF files.
  105 +
101 106 - Other enhancements
102 107  
103 108 - Add new ``inspection mode`` to help with the inspection and manual repair
... ...
qpdf/qtest/interactive-form.test
... ... @@ -26,7 +26,7 @@ my @form_tests = (
26 26 'form-document-defaults',
27 27 );
28 28  
29   -my $n_tests = scalar(@form_tests) + 6;
  29 +my $n_tests = scalar(@form_tests) + 8;
30 30  
31 31 # Many of the form*.pdf files were created by converting the
32 32 # LibreOffice document storage/form.odt to PDF and then manually
... ... @@ -70,5 +70,13 @@ $td->runtest("compare files",
70 70 {$td->FILE => "a.pdf"},
71 71 {$td->FILE => "button-set-broken-out.pdf"});
72 72  
  73 +$td->runtest("remove-acroform",
  74 + {$td->COMMAND => "qpdf form-filled-with-atril.pdf --qdf --static-id --remove-acroform a.pdf"},
  75 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  76 + $td->NORMALIZE_NEWLINES);
  77 +$td->runtest("compare files",
  78 + {$td->FILE => "a.pdf"},
  79 + {$td->FILE => "remove-acroform-out.pdf"});
  80 +
73 81 cleanup();
74 82 $td->report($n_tests);
... ...
qpdf/qtest/qpdf/remove-acroform-out.pdf 0 → 100644
No preview for this file type