Commit 8cb9bce7806ef41afbc6b9d236d2343f82814da6

Authored by m-holger
1 parent 0b3debaf

Add new commands --remove-metadata and --remove-info

ChangeLog
  1 +2024-08-25 M Holger <m.holger@qpdf.org>
  2 +
  3 + * Add new command-line arguments --remove-metadata and --remove-info
  4 + to exclude document metadata and information from the output PDF
  5 + file. Patially fixes #1145.
  6 +
1 7 2024-08-06 M Holger <m.holger@qpdf.org>
2 8  
3 9 * Bug fix: when writing real numbers as JSON ensure that they don't
... ...
include/qpdf/QPDFJob.hh
... ... @@ -692,6 +692,8 @@ class QPDFJob
692 692 bool optimize_images{false};
693 693 bool externalize_inline_images{false};
694 694 bool keep_inline_images{false};
  695 + bool remove_info{false};
  696 + bool remove_metadata{false};
695 697 bool remove_page_labels{false};
696 698 size_t oi_min_width{DEFAULT_OI_MIN_WIDTH};
697 699 size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT};
... ...
include/qpdf/auto_job_c_main.hh
... ... @@ -32,6 +32,8 @@ QPDF_DLL Config* progress();
32 32 QPDF_DLL Config* qdf();
33 33 QPDF_DLL Config* rawStreamData();
34 34 QPDF_DLL Config* recompressFlate();
  35 +QPDF_DLL Config* removeInfo();
  36 +QPDF_DLL Config* removeMetadata();
35 37 QPDF_DLL Config* removePageLabels();
36 38 QPDF_DLL Config* reportMemoryUsage();
37 39 QPDF_DLL Config* requiresPassword();
... ...
job.sums
... ... @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a
4 4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
5 5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
6 6 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
7   -include/qpdf/auto_job_c_main.hh dbfc221d1533120d1aa9c361d8d2483dea5fcb1c0fd95144d98d305e64ed32a6
  7 +include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae
8 8 include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506
9 9 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62
10   -job.yml 53cad86659db6722e8f415aacb19fc51ab81bb1589c3cb8f65ec893bb4bf5566
  10 +job.yml 31935064eca625af7657b23f2f12c614d14751ec0b12702482b1768a04905d22
11 11 libqpdf/qpdf/auto_job_decl.hh 20d6affe1e260f5a1af4f1d82a820b933835440ff03020e877382da2e8dac6c6
12   -libqpdf/qpdf/auto_job_help.hh 74b2982771720927ce7be8f1690720ec65cb9989620493a0c154f50ba2c254e4
13   -libqpdf/qpdf/auto_job_init.hh 19d1da7c4c0c635bd1c5db8d5f17df8edad3442f8eba006adb075cec295fa158
  12 +libqpdf/qpdf/auto_job_help.hh 1e9181f4729a22ff91ab54e2b4a82e6af0c57a8327efb222a4196adb609c1ade
  13 +libqpdf/qpdf/auto_job_init.hh e2a6bb87870c5522a01b15461c9fe909e360f5c7fed06e41acf13a125bd1d03e
14 14 libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd
15   -libqpdf/qpdf/auto_job_json_init.hh a87256c082427ec0318223762472970b2eced535c0c8b0288d45c8cdaaf62f74
16   -libqpdf/qpdf/auto_job_schema.hh 5dac568dff39614e161a0af59a0f328f1e28edf69b96f08bb76fd592d51bb053
  15 +libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d
  16 +libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c
17 17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
18   -manual/cli.rst 94057baba9ecffb4ce19ae61c8fa507ef07209c280fccae97b283c3dfce834e0
19   -manual/qpdf.1 0ec05f1392c160165cdf6adada4de84c0de75bd2fb5762caff4e1372aacada4c
  18 +manual/cli.rst b7f37995f13346518ae7b2ea84836fba13b4da4e1f55be5f2a861f20dea0ccdb
  19 +manual/qpdf.1 59c26635017cba5d142ec3fcc4aebcb91e0cf1355d51365db84f48b21585ad8d
20 20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
... ...
... ... @@ -130,6 +130,8 @@ options:
130 130 - qdf
131 131 - raw-stream-data
132 132 - recompress-flate
  133 + - remove-info
  134 + - remove-metadata
133 135 - remove-page-labels
134 136 - replace-input
135 137 - report-memory-usage
... ... @@ -440,6 +442,8 @@ json:
440 442 - Pages.file:
441 443 Pages.password:
442 444 range:
  445 + remove-info:
  446 + remove-metadata:
443 447 remove-page-labels:
444 448 report-memory-usage:
445 449 rotate:
... ...
libqpdf/QPDFJob.cc
... ... @@ -471,6 +471,21 @@ QPDFJob::createQPDF()
471 471 }
472 472 handleUnderOverlay(pdf);
473 473 handleTransformations(pdf);
  474 + if (m->remove_info) {
  475 + auto trailer = pdf.getTrailer();
  476 + auto mod_date = trailer.getKey("/Info").getKeyIfDict("/ModDate");
  477 + if (mod_date.isNull()) {
  478 + trailer.removeKey("/Info");
  479 + } else {
  480 + auto info = trailer.replaceKeyAndGetNew(
  481 + "/Info", pdf.makeIndirectObject(QPDFObjectHandle::newDictionary()));
  482 + info.replaceKey("/ModDate", mod_date);
  483 + }
  484 + pdf.getRoot().removeKey("/Metadata");
  485 + }
  486 + if (m->remove_metadata) {
  487 + pdf.getRoot().removeKey("/Metadata");
  488 + }
474 489  
475 490 for (auto& foreign: page_heap) {
476 491 if (foreign->anyWarnings()) {
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -511,6 +511,20 @@ QPDFJob::Config::removeAttachment(std::string const&amp; parameter)
511 511 }
512 512  
513 513 QPDFJob::Config*
  514 +QPDFJob::Config::removeInfo()
  515 +{
  516 + o.m->remove_info = true;
  517 + return this;
  518 +}
  519 +
  520 +QPDFJob::Config*
  521 +QPDFJob::Config::removeMetadata()
  522 +{
  523 + o.m->remove_metadata = true;
  524 + return this;
  525 +}
  526 +
  527 +QPDFJob::Config*
514 528 QPDFJob::Config::removePageLabels()
515 529 {
516 530 o.m->remove_page_labels = true;
... ...
libqpdf/qpdf/auto_job_help.hh
... ... @@ -414,6 +414,13 @@ Don&#39;t optimize images whose area in pixels is below the specified value.
414 414 )");
415 415 ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images.
416 416 )");
  417 +ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file.
  418 +)");
  419 +ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file.
  420 +)");
  421 +}
  422 +static void add_help_5(QPDFArgParser& ap)
  423 +{
417 424 ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file.
418 425 )");
419 426 ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... --
... ... @@ -460,9 +467,6 @@ iv, then the remaining pages with Arabic numerals starting with
460 467 1 and continuing sequentially until the end of the document. For
461 468 additional examples, please consult the manual.
462 469 )");
463   -}
464   -static void add_help_5(QPDFArgParser& ap)
465   -{
466 470 ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage:
467 471  
468 472 --encrypt \
... ... @@ -641,6 +645,9 @@ ap.addOptionHelp(&quot;--force-R5&quot;, &quot;encryption&quot;, &quot;use unsupported R=5 encryption&quot;, R
641 645 algorithm that existed only in Acrobat version IX. This option
642 646 should not be used except for compatibility testing.
643 647 )");
  648 +}
  649 +static void add_help_6(QPDFArgParser& ap)
  650 +{
644 651 ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage:
645 652  
646 653 qpdf in.pdf --pages --file=input-file \
... ... @@ -725,9 +732,6 @@ appearance: first underlays, then the original page, then overlays.
725 732  
726 733 Run qpdf --help=page-ranges for help with page ranges.
727 734 )");
728   -}
729   -static void add_help_6(QPDFArgParser& ap)
730   -{
731 735 ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range
732 736  
733 737 Specify the range of pages in the primary output to apply
... ... @@ -829,6 +833,9 @@ its terminating &quot;--&quot;.
829 833 To copy attachments from a password-protected file, use
830 834 the --password option after the file name.
831 835 )");
  836 +}
  837 +static void add_help_7(QPDFArgParser& ap)
  838 +{
832 839 ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix
833 840  
834 841 Prepend a prefix to each key; may be needed if there are
... ... @@ -839,9 +846,6 @@ ap.addHelpTopic(&quot;inspection&quot;, &quot;inspect PDF files&quot;, R&quot;(These options provide tool
839 846 the options in this section are specified, no output file may be
840 847 given.
841 848 )");
842   -}
843   -static void add_help_7(QPDFArgParser& ap)
844   -{
845 849 ap.addOptionHelp("--is-encrypted", "inspection", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status:
846 850  
847 851 0: the file is encrypted
... ... @@ -919,6 +923,9 @@ output as binary data. Get the key with --list-attachments.
919 923 ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the
920 924 JSON chapter in the qpdf manual for details.
921 925 )");
  926 +}
  927 +static void add_help_8(QPDFArgParser& ap)
  928 +{
922 929 ap.addOptionHelp("--json", "json", "show file in JSON format", R"(--json[=version]
923 930  
924 931 Generate a JSON representation of the file. This is described in
... ... @@ -932,9 +939,6 @@ Describe the format of the JSON output by writing to standard
932 939 output a JSON object with the same keys and with values
933 940 containing descriptive text.
934 941 )");
935   -}
936   -static void add_help_8(QPDFArgParser& ap)
937   -{
938 942 ap.addOptionHelp("--json-key", "json", "limit which keys are in JSON output", R"(--json-key=key
939 943  
940 944 This option is repeatable. If given, only the specified
... ...
libqpdf/qpdf/auto_job_init.hh
... ... @@ -68,6 +68,8 @@ this-&gt;ap.addBare(&quot;progress&quot;, [this](){c_main-&gt;progress();});
68 68 this->ap.addBare("qdf", [this](){c_main->qdf();});
69 69 this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();});
70 70 this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
  71 +this->ap.addBare("remove-info", [this](){c_main->removeInfo();});
  72 +this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();});
71 73 this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
72 74 this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput));
73 75 this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();});
... ...
libqpdf/qpdf/auto_job_json_init.hh
... ... @@ -412,6 +412,12 @@ addParameter([this](std::string const&amp; p) { c_pages-&gt;range(p); });
412 412 popHandler(); // key: range
413 413 popHandler(); // array: .pages[]
414 414 popHandler(); // key: pages
  415 +pushKey("removeInfo");
  416 +addBare([this]() { c_main->removeInfo(); });
  417 +popHandler(); // key: removeInfo
  418 +pushKey("removeMetadata");
  419 +addBare([this]() { c_main->removeMetadata(); });
  420 +popHandler(); // key: removeMetadata
415 421 pushKey("removePageLabels");
416 422 addBare([this]() { c_main->removePageLabels(); });
417 423 popHandler(); // key: removePageLabels
... ...
libqpdf/qpdf/auto_job_schema.hh
... ... @@ -145,6 +145,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R&quot;({
145 145 "range": "page range"
146 146 }
147 147 ],
  148 + "removeInfo": "remove file information",
  149 + "removeMetadata": "remove metadata",
148 150 "removePageLabels": "remove explicit page numbers",
149 151 "reportMemoryUsage": "best effort report of memory usage",
150 152 "rotate": "rotate pages",
... ...
manual/cli.rst
... ... @@ -1773,6 +1773,27 @@ Related Options
1773 1773 Prevent inline images from being included in image optimization
1774 1774 done by :qpdf:ref:`--optimize-images`.
1775 1775  
  1776 +.. qpdf:option:: --remove-info
  1777 +
  1778 + .. help: remove file information
  1779 +
  1780 + Exclude file information (except modification date) from the output file.
  1781 +
  1782 + Exclude file information (except modification date) from the output file by
  1783 + omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in
  1784 + the document trailer.
  1785 + See also :qpdf:ref:`--remove-metadata`.
  1786 +
  1787 +.. qpdf:option:: --remove-metadata
  1788 +
  1789 + .. help: remove metadata
  1790 +
  1791 + Exclude metadata from the output file.
  1792 +
  1793 + Exclude metadata from the output file by omitting the ``/Metadata``
  1794 + dictionary in the document catalog.
  1795 + See also :qpdf:ref:`--remove-info`.
  1796 +
1776 1797 .. qpdf:option:: --remove-page-labels
1777 1798  
1778 1799 .. help: remove explicit page numbers
... ...
manual/qpdf.1
... ... @@ -530,6 +530,12 @@ Don&#39;t optimize images whose area in pixels is below the specified value.
530 530 .B --keep-inline-images \-\- exclude inline images from optimization
531 531 Prevent inline images from being considered by --optimize-images.
532 532 .TP
  533 +.B --remove-info \-\- remove file information
  534 +Exclude file information (except modification date) from the output file.
  535 +.TP
  536 +.B --remove-metadata \-\- remove metadata
  537 +Exclude metadata from the output file.
  538 +.TP
533 539 .B --remove-page-labels \-\- remove explicit page numbers
534 540 Exclude page labels (explicit page numbers) from the output file.
535 541 .TP
... ...
qpdf/qtest/merge-and-split.test
... ... @@ -14,7 +14,7 @@ cleanup();
14 14  
15 15 my $td = new TestDriver('merge-and-split');
16 16  
17   -my $n_tests = 28;
  17 +my $n_tests = 34;
18 18  
19 19 # Select pages from the same file multiple times including selecting
20 20 # twice from an encrypted file and specifying the password only the
... ... @@ -103,6 +103,39 @@ $td-&gt;runtest(&quot;check output&quot;,
103 103 {$td->COMMAND => "qpdf-test-compare a.pdf remove-labels.pdf"},
104 104 {$td->FILE => "remove-labels.pdf", $td->EXIT_STATUS => 0});
105 105  
  106 +$td->runtest("remove metadata",
  107 + {$td->COMMAND =>
  108 + "qpdf metadata-crypt-filter.pdf a.pdf" .
  109 + " --remove-metadata" .
  110 + " --decrypt" .
  111 + " --static-id"},
  112 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  113 +$td->runtest("check output",
  114 + {$td->FILE => "a.pdf"},
  115 + {$td->FILE => "remove-metadata.pdf"});
  116 +
  117 +$td->runtest("remove info (with moddate)",
  118 + {$td->COMMAND =>
  119 + "qpdf remove-metadata.pdf a.pdf" .
  120 + " --remove-info" .
  121 + " --decrypt" .
  122 + " --static-id"},
  123 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  124 +$td->runtest("check output",
  125 + {$td->FILE => "a.pdf"},
  126 + {$td->FILE => "remove-info.pdf"});
  127 +
  128 +$td->runtest("remove info (without moddate)",
  129 + {$td->COMMAND =>
  130 + "qpdf remove-metadata-no-moddate.pdf a.pdf" .
  131 + " --remove-info" .
  132 + " --decrypt" .
  133 + " --static-id"},
  134 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  135 +$td->runtest("check output",
  136 + {$td->FILE => "a.pdf"},
  137 + {$td->FILE => "remove-info-no-moddate.pdf"});
  138 +
106 139 $td->runtest("split with shared resources",
107 140 {$td->COMMAND =>
108 141 "qpdf --qdf --static-id" .
... ...
qpdf/qtest/qpdf/remove-info-no-moddate.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/remove-info.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/remove-metadata.pdf 0 → 100644
No preview for this file type