Commit 8cb9bce7806ef41afbc6b9d236d2343f82814da6
1 parent
0b3debaf
Add new commands --remove-metadata and --remove-info
Showing
18 changed files
with
138 additions
and
21 deletions
ChangeLog
| 1 | +2024-08-25 M Holger <m.holger@qpdf.org> | |
| 2 | + | |
| 3 | + * Add new command-line arguments --remove-metadata and --remove-info | |
| 4 | + to exclude document metadata and information from the output PDF | |
| 5 | + file. Patially fixes #1145. | |
| 6 | + | |
| 1 | 7 | 2024-08-06 M Holger <m.holger@qpdf.org> |
| 2 | 8 | |
| 3 | 9 | * Bug fix: when writing real numbers as JSON ensure that they don't | ... | ... |
include/qpdf/QPDFJob.hh
| ... | ... | @@ -692,6 +692,8 @@ class QPDFJob |
| 692 | 692 | bool optimize_images{false}; |
| 693 | 693 | bool externalize_inline_images{false}; |
| 694 | 694 | bool keep_inline_images{false}; |
| 695 | + bool remove_info{false}; | |
| 696 | + bool remove_metadata{false}; | |
| 695 | 697 | bool remove_page_labels{false}; |
| 696 | 698 | size_t oi_min_width{DEFAULT_OI_MIN_WIDTH}; |
| 697 | 699 | size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT}; | ... | ... |
include/qpdf/auto_job_c_main.hh
| ... | ... | @@ -32,6 +32,8 @@ QPDF_DLL Config* progress(); |
| 32 | 32 | QPDF_DLL Config* qdf(); |
| 33 | 33 | QPDF_DLL Config* rawStreamData(); |
| 34 | 34 | QPDF_DLL Config* recompressFlate(); |
| 35 | +QPDF_DLL Config* removeInfo(); | |
| 36 | +QPDF_DLL Config* removeMetadata(); | |
| 35 | 37 | QPDF_DLL Config* removePageLabels(); |
| 36 | 38 | QPDF_DLL Config* reportMemoryUsage(); |
| 37 | 39 | QPDF_DLL Config* requiresPassword(); | ... | ... |
job.sums
| ... | ... | @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a |
| 4 | 4 | include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 |
| 5 | 5 | include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 |
| 6 | 6 | include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 |
| 7 | -include/qpdf/auto_job_c_main.hh dbfc221d1533120d1aa9c361d8d2483dea5fcb1c0fd95144d98d305e64ed32a6 | |
| 7 | +include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae | |
| 8 | 8 | include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506 |
| 9 | 9 | include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62 |
| 10 | -job.yml 53cad86659db6722e8f415aacb19fc51ab81bb1589c3cb8f65ec893bb4bf5566 | |
| 10 | +job.yml 31935064eca625af7657b23f2f12c614d14751ec0b12702482b1768a04905d22 | |
| 11 | 11 | libqpdf/qpdf/auto_job_decl.hh 20d6affe1e260f5a1af4f1d82a820b933835440ff03020e877382da2e8dac6c6 |
| 12 | -libqpdf/qpdf/auto_job_help.hh 74b2982771720927ce7be8f1690720ec65cb9989620493a0c154f50ba2c254e4 | |
| 13 | -libqpdf/qpdf/auto_job_init.hh 19d1da7c4c0c635bd1c5db8d5f17df8edad3442f8eba006adb075cec295fa158 | |
| 12 | +libqpdf/qpdf/auto_job_help.hh 1e9181f4729a22ff91ab54e2b4a82e6af0c57a8327efb222a4196adb609c1ade | |
| 13 | +libqpdf/qpdf/auto_job_init.hh e2a6bb87870c5522a01b15461c9fe909e360f5c7fed06e41acf13a125bd1d03e | |
| 14 | 14 | libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd |
| 15 | -libqpdf/qpdf/auto_job_json_init.hh a87256c082427ec0318223762472970b2eced535c0c8b0288d45c8cdaaf62f74 | |
| 16 | -libqpdf/qpdf/auto_job_schema.hh 5dac568dff39614e161a0af59a0f328f1e28edf69b96f08bb76fd592d51bb053 | |
| 15 | +libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d | |
| 16 | +libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c | |
| 17 | 17 | manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 |
| 18 | -manual/cli.rst 94057baba9ecffb4ce19ae61c8fa507ef07209c280fccae97b283c3dfce834e0 | |
| 19 | -manual/qpdf.1 0ec05f1392c160165cdf6adada4de84c0de75bd2fb5762caff4e1372aacada4c | |
| 18 | +manual/cli.rst b7f37995f13346518ae7b2ea84836fba13b4da4e1f55be5f2a861f20dea0ccdb | |
| 19 | +manual/qpdf.1 59c26635017cba5d142ec3fcc4aebcb91e0cf1355d51365db84f48b21585ad8d | |
| 20 | 20 | manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b | ... | ... |
job.yml
| ... | ... | @@ -130,6 +130,8 @@ options: |
| 130 | 130 | - qdf |
| 131 | 131 | - raw-stream-data |
| 132 | 132 | - recompress-flate |
| 133 | + - remove-info | |
| 134 | + - remove-metadata | |
| 133 | 135 | - remove-page-labels |
| 134 | 136 | - replace-input |
| 135 | 137 | - report-memory-usage |
| ... | ... | @@ -440,6 +442,8 @@ json: |
| 440 | 442 | - Pages.file: |
| 441 | 443 | Pages.password: |
| 442 | 444 | range: |
| 445 | + remove-info: | |
| 446 | + remove-metadata: | |
| 443 | 447 | remove-page-labels: |
| 444 | 448 | report-memory-usage: |
| 445 | 449 | rotate: | ... | ... |
libqpdf/QPDFJob.cc
| ... | ... | @@ -471,6 +471,21 @@ QPDFJob::createQPDF() |
| 471 | 471 | } |
| 472 | 472 | handleUnderOverlay(pdf); |
| 473 | 473 | handleTransformations(pdf); |
| 474 | + if (m->remove_info) { | |
| 475 | + auto trailer = pdf.getTrailer(); | |
| 476 | + auto mod_date = trailer.getKey("/Info").getKeyIfDict("/ModDate"); | |
| 477 | + if (mod_date.isNull()) { | |
| 478 | + trailer.removeKey("/Info"); | |
| 479 | + } else { | |
| 480 | + auto info = trailer.replaceKeyAndGetNew( | |
| 481 | + "/Info", pdf.makeIndirectObject(QPDFObjectHandle::newDictionary())); | |
| 482 | + info.replaceKey("/ModDate", mod_date); | |
| 483 | + } | |
| 484 | + pdf.getRoot().removeKey("/Metadata"); | |
| 485 | + } | |
| 486 | + if (m->remove_metadata) { | |
| 487 | + pdf.getRoot().removeKey("/Metadata"); | |
| 488 | + } | |
| 474 | 489 | |
| 475 | 490 | for (auto& foreign: page_heap) { |
| 476 | 491 | if (foreign->anyWarnings()) { | ... | ... |
libqpdf/QPDFJob_config.cc
| ... | ... | @@ -511,6 +511,20 @@ QPDFJob::Config::removeAttachment(std::string const& parameter) |
| 511 | 511 | } |
| 512 | 512 | |
| 513 | 513 | QPDFJob::Config* |
| 514 | +QPDFJob::Config::removeInfo() | |
| 515 | +{ | |
| 516 | + o.m->remove_info = true; | |
| 517 | + return this; | |
| 518 | +} | |
| 519 | + | |
| 520 | +QPDFJob::Config* | |
| 521 | +QPDFJob::Config::removeMetadata() | |
| 522 | +{ | |
| 523 | + o.m->remove_metadata = true; | |
| 524 | + return this; | |
| 525 | +} | |
| 526 | + | |
| 527 | +QPDFJob::Config* | |
| 514 | 528 | QPDFJob::Config::removePageLabels() |
| 515 | 529 | { |
| 516 | 530 | o.m->remove_page_labels = true; | ... | ... |
libqpdf/qpdf/auto_job_help.hh
| ... | ... | @@ -414,6 +414,13 @@ Don't optimize images whose area in pixels is below the specified value. |
| 414 | 414 | )"); |
| 415 | 415 | ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images. |
| 416 | 416 | )"); |
| 417 | +ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file. | |
| 418 | +)"); | |
| 419 | +ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file. | |
| 420 | +)"); | |
| 421 | +} | |
| 422 | +static void add_help_5(QPDFArgParser& ap) | |
| 423 | +{ | |
| 417 | 424 | ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file. |
| 418 | 425 | )"); |
| 419 | 426 | ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... -- |
| ... | ... | @@ -460,9 +467,6 @@ iv, then the remaining pages with Arabic numerals starting with |
| 460 | 467 | 1 and continuing sequentially until the end of the document. For |
| 461 | 468 | additional examples, please consult the manual. |
| 462 | 469 | )"); |
| 463 | -} | |
| 464 | -static void add_help_5(QPDFArgParser& ap) | |
| 465 | -{ | |
| 466 | 470 | ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage: |
| 467 | 471 | |
| 468 | 472 | --encrypt \ |
| ... | ... | @@ -641,6 +645,9 @@ ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R |
| 641 | 645 | algorithm that existed only in Acrobat version IX. This option |
| 642 | 646 | should not be used except for compatibility testing. |
| 643 | 647 | )"); |
| 648 | +} | |
| 649 | +static void add_help_6(QPDFArgParser& ap) | |
| 650 | +{ | |
| 644 | 651 | ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage: |
| 645 | 652 | |
| 646 | 653 | qpdf in.pdf --pages --file=input-file \ |
| ... | ... | @@ -725,9 +732,6 @@ appearance: first underlays, then the original page, then overlays. |
| 725 | 732 | |
| 726 | 733 | Run qpdf --help=page-ranges for help with page ranges. |
| 727 | 734 | )"); |
| 728 | -} | |
| 729 | -static void add_help_6(QPDFArgParser& ap) | |
| 730 | -{ | |
| 731 | 735 | ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range |
| 732 | 736 | |
| 733 | 737 | Specify the range of pages in the primary output to apply |
| ... | ... | @@ -829,6 +833,9 @@ its terminating "--". |
| 829 | 833 | To copy attachments from a password-protected file, use |
| 830 | 834 | the --password option after the file name. |
| 831 | 835 | )"); |
| 836 | +} | |
| 837 | +static void add_help_7(QPDFArgParser& ap) | |
| 838 | +{ | |
| 832 | 839 | ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix |
| 833 | 840 | |
| 834 | 841 | Prepend a prefix to each key; may be needed if there are |
| ... | ... | @@ -839,9 +846,6 @@ ap.addHelpTopic("inspection", "inspect PDF files", R"(These options provide tool |
| 839 | 846 | the options in this section are specified, no output file may be |
| 840 | 847 | given. |
| 841 | 848 | )"); |
| 842 | -} | |
| 843 | -static void add_help_7(QPDFArgParser& ap) | |
| 844 | -{ | |
| 845 | 849 | ap.addOptionHelp("--is-encrypted", "inspection", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status: |
| 846 | 850 | |
| 847 | 851 | 0: the file is encrypted |
| ... | ... | @@ -919,6 +923,9 @@ output as binary data. Get the key with --list-attachments. |
| 919 | 923 | ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the |
| 920 | 924 | JSON chapter in the qpdf manual for details. |
| 921 | 925 | )"); |
| 926 | +} | |
| 927 | +static void add_help_8(QPDFArgParser& ap) | |
| 928 | +{ | |
| 922 | 929 | ap.addOptionHelp("--json", "json", "show file in JSON format", R"(--json[=version] |
| 923 | 930 | |
| 924 | 931 | Generate a JSON representation of the file. This is described in |
| ... | ... | @@ -932,9 +939,6 @@ Describe the format of the JSON output by writing to standard |
| 932 | 939 | output a JSON object with the same keys and with values |
| 933 | 940 | containing descriptive text. |
| 934 | 941 | )"); |
| 935 | -} | |
| 936 | -static void add_help_8(QPDFArgParser& ap) | |
| 937 | -{ | |
| 938 | 942 | ap.addOptionHelp("--json-key", "json", "limit which keys are in JSON output", R"(--json-key=key |
| 939 | 943 | |
| 940 | 944 | This option is repeatable. If given, only the specified | ... | ... |
libqpdf/qpdf/auto_job_init.hh
| ... | ... | @@ -68,6 +68,8 @@ this->ap.addBare("progress", [this](){c_main->progress();}); |
| 68 | 68 | this->ap.addBare("qdf", [this](){c_main->qdf();}); |
| 69 | 69 | this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); |
| 70 | 70 | this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); |
| 71 | +this->ap.addBare("remove-info", [this](){c_main->removeInfo();}); | |
| 72 | +this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();}); | |
| 71 | 73 | this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); |
| 72 | 74 | this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); |
| 73 | 75 | this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();}); | ... | ... |
libqpdf/qpdf/auto_job_json_init.hh
| ... | ... | @@ -412,6 +412,12 @@ addParameter([this](std::string const& p) { c_pages->range(p); }); |
| 412 | 412 | popHandler(); // key: range |
| 413 | 413 | popHandler(); // array: .pages[] |
| 414 | 414 | popHandler(); // key: pages |
| 415 | +pushKey("removeInfo"); | |
| 416 | +addBare([this]() { c_main->removeInfo(); }); | |
| 417 | +popHandler(); // key: removeInfo | |
| 418 | +pushKey("removeMetadata"); | |
| 419 | +addBare([this]() { c_main->removeMetadata(); }); | |
| 420 | +popHandler(); // key: removeMetadata | |
| 415 | 421 | pushKey("removePageLabels"); |
| 416 | 422 | addBare([this]() { c_main->removePageLabels(); }); |
| 417 | 423 | popHandler(); // key: removePageLabels | ... | ... |
libqpdf/qpdf/auto_job_schema.hh
| ... | ... | @@ -145,6 +145,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ |
| 145 | 145 | "range": "page range" |
| 146 | 146 | } |
| 147 | 147 | ], |
| 148 | + "removeInfo": "remove file information", | |
| 149 | + "removeMetadata": "remove metadata", | |
| 148 | 150 | "removePageLabels": "remove explicit page numbers", |
| 149 | 151 | "reportMemoryUsage": "best effort report of memory usage", |
| 150 | 152 | "rotate": "rotate pages", | ... | ... |
manual/cli.rst
| ... | ... | @@ -1773,6 +1773,27 @@ Related Options |
| 1773 | 1773 | Prevent inline images from being included in image optimization |
| 1774 | 1774 | done by :qpdf:ref:`--optimize-images`. |
| 1775 | 1775 | |
| 1776 | +.. qpdf:option:: --remove-info | |
| 1777 | + | |
| 1778 | + .. help: remove file information | |
| 1779 | + | |
| 1780 | + Exclude file information (except modification date) from the output file. | |
| 1781 | + | |
| 1782 | + Exclude file information (except modification date) from the output file by | |
| 1783 | + omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in | |
| 1784 | + the document trailer. | |
| 1785 | + See also :qpdf:ref:`--remove-metadata`. | |
| 1786 | + | |
| 1787 | +.. qpdf:option:: --remove-metadata | |
| 1788 | + | |
| 1789 | + .. help: remove metadata | |
| 1790 | + | |
| 1791 | + Exclude metadata from the output file. | |
| 1792 | + | |
| 1793 | + Exclude metadata from the output file by omitting the ``/Metadata`` | |
| 1794 | + dictionary in the document catalog. | |
| 1795 | + See also :qpdf:ref:`--remove-info`. | |
| 1796 | + | |
| 1776 | 1797 | .. qpdf:option:: --remove-page-labels |
| 1777 | 1798 | |
| 1778 | 1799 | .. help: remove explicit page numbers | ... | ... |
manual/qpdf.1
| ... | ... | @@ -530,6 +530,12 @@ Don't optimize images whose area in pixels is below the specified value. |
| 530 | 530 | .B --keep-inline-images \-\- exclude inline images from optimization |
| 531 | 531 | Prevent inline images from being considered by --optimize-images. |
| 532 | 532 | .TP |
| 533 | +.B --remove-info \-\- remove file information | |
| 534 | +Exclude file information (except modification date) from the output file. | |
| 535 | +.TP | |
| 536 | +.B --remove-metadata \-\- remove metadata | |
| 537 | +Exclude metadata from the output file. | |
| 538 | +.TP | |
| 533 | 539 | .B --remove-page-labels \-\- remove explicit page numbers |
| 534 | 540 | Exclude page labels (explicit page numbers) from the output file. |
| 535 | 541 | .TP | ... | ... |
qpdf/qtest/merge-and-split.test
| ... | ... | @@ -14,7 +14,7 @@ cleanup(); |
| 14 | 14 | |
| 15 | 15 | my $td = new TestDriver('merge-and-split'); |
| 16 | 16 | |
| 17 | -my $n_tests = 28; | |
| 17 | +my $n_tests = 34; | |
| 18 | 18 | |
| 19 | 19 | # Select pages from the same file multiple times including selecting |
| 20 | 20 | # twice from an encrypted file and specifying the password only the |
| ... | ... | @@ -103,6 +103,39 @@ $td->runtest("check output", |
| 103 | 103 | {$td->COMMAND => "qpdf-test-compare a.pdf remove-labels.pdf"}, |
| 104 | 104 | {$td->FILE => "remove-labels.pdf", $td->EXIT_STATUS => 0}); |
| 105 | 105 | |
| 106 | +$td->runtest("remove metadata", | |
| 107 | + {$td->COMMAND => | |
| 108 | + "qpdf metadata-crypt-filter.pdf a.pdf" . | |
| 109 | + " --remove-metadata" . | |
| 110 | + " --decrypt" . | |
| 111 | + " --static-id"}, | |
| 112 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 113 | +$td->runtest("check output", | |
| 114 | + {$td->FILE => "a.pdf"}, | |
| 115 | + {$td->FILE => "remove-metadata.pdf"}); | |
| 116 | + | |
| 117 | +$td->runtest("remove info (with moddate)", | |
| 118 | + {$td->COMMAND => | |
| 119 | + "qpdf remove-metadata.pdf a.pdf" . | |
| 120 | + " --remove-info" . | |
| 121 | + " --decrypt" . | |
| 122 | + " --static-id"}, | |
| 123 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 124 | +$td->runtest("check output", | |
| 125 | + {$td->FILE => "a.pdf"}, | |
| 126 | + {$td->FILE => "remove-info.pdf"}); | |
| 127 | + | |
| 128 | +$td->runtest("remove info (without moddate)", | |
| 129 | + {$td->COMMAND => | |
| 130 | + "qpdf remove-metadata-no-moddate.pdf a.pdf" . | |
| 131 | + " --remove-info" . | |
| 132 | + " --decrypt" . | |
| 133 | + " --static-id"}, | |
| 134 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 135 | +$td->runtest("check output", | |
| 136 | + {$td->FILE => "a.pdf"}, | |
| 137 | + {$td->FILE => "remove-info-no-moddate.pdf"}); | |
| 138 | + | |
| 106 | 139 | $td->runtest("split with shared resources", |
| 107 | 140 | {$td->COMMAND => |
| 108 | 141 | "qpdf --qdf --static-id" . | ... | ... |
qpdf/qtest/qpdf/remove-info-no-moddate.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/remove-info.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/remove-metadata.pdf
0 → 100644
No preview for this file type