From 464d94af297c1235d2b339bb15bf5efaeb1dcc13 Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 26 Mar 2025 19:43:42 +0000 Subject: [PATCH] Add new CLI option --remove-structure --- include/qpdf/QPDFJob.hh | 1 + include/qpdf/auto_job_c_main.hh | 1 + job.sums | 16 ++++++++-------- job.yml | 2 ++ libqpdf/QPDFJob.cc | 4 ++++ libqpdf/QPDFJob_config.cc | 7 +++++++ libqpdf/qpdf/auto_job_help.hh | 20 +++++++++++--------- libqpdf/qpdf/auto_job_init.hh | 1 + libqpdf/qpdf/auto_job_json_init.hh | 3 +++ libqpdf/qpdf/auto_job_schema.hh | 1 + manual/cli.rst | 17 +++++++++++++++-- manual/qpdf.1 | 3 +++ manual/release-notes.rst | 7 ++++++- qpdf/qtest/merge-and-split.test | 11 ++++++++++- qpdf/qtest/qpdf/remove-structure.out.pdf | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ qpdf/qtest/qpdf/remove-structure.pdf | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 293 insertions(+), 21 deletions(-) create mode 100644 qpdf/qtest/qpdf/remove-structure.out.pdf create mode 100644 qpdf/qtest/qpdf/remove-structure.pdf diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index c429b4f..4f373c9 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -696,6 +696,7 @@ class QPDFJob bool remove_info{false}; bool remove_metadata{false}; bool remove_page_labels{false}; + bool remove_structure{false}; size_t oi_min_width{DEFAULT_OI_MIN_WIDTH}; size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT}; size_t oi_min_area{DEFAULT_OI_MIN_AREA}; diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh index bb58425..8778631 100644 --- a/include/qpdf/auto_job_c_main.hh +++ b/include/qpdf/auto_job_c_main.hh @@ -35,6 +35,7 @@ QPDF_DLL Config* recompressFlate(); QPDF_DLL Config* removeInfo(); QPDF_DLL Config* removeMetadata(); QPDF_DLL Config* removePageLabels(); +QPDF_DLL Config* removeStructure(); QPDF_DLL Config* reportMemoryUsage(); QPDF_DLL Config* requiresPassword(); QPDF_DLL Config* removeRestrictions(); diff --git a/job.sums b/job.sums index 6121c93..4093a2d 100644 --- a/job.sums +++ b/job.sums @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 -include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae +include/qpdf/auto_job_c_main.hh 48e8ea475e8a8f4c96de86bdad10dff83a263deccc3798c8bed7f5e0e070a037 include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62 -job.yml 2c424c7be0c02545191969e849e1d8f7fdb4ab65bbf799b9a190e21343899751 +job.yml ba9f24920c2221883b1d6d8e42f7ac7c78988063a0ca9181dc08abe9cde6f760 libqpdf/qpdf/auto_job_decl.hh 34ba07d3891c3e5cdd8712f991e508a0652c9db314c5d5bcdf4421b76e6f6e01 -libqpdf/qpdf/auto_job_help.hh a36476d0c823033b2af0e4170651e1fa31173887c310f2f208e9ed7e6e36a2ce -libqpdf/qpdf/auto_job_init.hh f89e7f9950a185372732d2ff7f113161f275f45ee7937dd7fd37e38013bf22e7 +libqpdf/qpdf/auto_job_help.hh 03bdaab05f84b16bfb15ad7993a4655b7dc14af070fa97fe3035943726d4b258 +libqpdf/qpdf/auto_job_init.hh 029d929f930f60b4055796c8c4ce2ed625f861316ac738ab638579eca46b2472 libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd -libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d -libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c +libqpdf/qpdf/auto_job_json_init.hh b883f3768c8367327ea1f17e8ca503178be62a9cede316bf7ad96c0fafee5513 +libqpdf/qpdf/auto_job_schema.hh 6d28db327dd19e0a7da375c681ecea7965513fa4b5d2349a80089b057f8c02d8 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 67357688f9a52fafa9a4f231fe4ce74c3cd8977130da7501efe54439a1ee22d4 -manual/qpdf.1 dbcc567623f1fa080743ae9bc32b6264a3b6bd3074c81c438e52ca328e94ecd7 +manual/cli.rst 1094662a10db21528fd151739a9779a4504ebac75b483a11a53d42ab0430ee42 +manual/qpdf.1 eb45321c598f23f0724cb963a17aef972ef5b817dc44cf787882a4621c6e1c8e manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b diff --git a/job.yml b/job.yml index b172086..cc34b8a 100644 --- a/job.yml +++ b/job.yml @@ -134,6 +134,7 @@ options: - remove-info - remove-metadata - remove-page-labels + - remove-structure - replace-input - report-memory-usage - requires-password @@ -446,6 +447,7 @@ json: remove-info: remove-metadata: remove-page-labels: + remove-structure: report-memory-usage: rotate: set-page-labels: diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 6bd93aa..bbaf291 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -490,6 +490,10 @@ QPDFJob::createQPDF() if (m->remove_metadata) { pdf.getRoot().removeKey("/Metadata"); } + if (m->remove_structure) { + pdf.getRoot().removeKey("/StructTreeRoot"); + pdf.getRoot().removeKey("/MarkInfo"); + } for (auto& foreign: page_heap) { if (foreign->anyWarnings()) { diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index cf6eadd..32eec68 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -532,6 +532,13 @@ QPDFJob::Config::removePageLabels() } QPDFJob::Config* +QPDFJob::Config::removeStructure() +{ + o.m->remove_structure = true; + return this; +} + +QPDFJob::Config* QPDFJob::Config::reportMemoryUsage() { o.m->report_mem_usage = true; diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 8d91db8..78799c3 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -428,6 +428,8 @@ ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Excl )"); ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file. )"); +ap.addOptionHelp("--remove-structure", "modification", "remove metadata", R"(Exclude the structure tree from the output file. +)"); ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... -- Set page labels (explicit page numbers) for the entire file. @@ -643,12 +645,12 @@ reasons. Use 256-bit encryption instead. ap.addOptionHelp("--allow-insecure", "encryption", "allow empty owner passwords", R"(Allow creation of PDF files with empty owner passwords and non-empty user passwords when using 256-bit encryption. )"); -ap.addOptionHelp("--force-V4", "encryption", "force V=4 in encryption dictionary", R"(This option is for testing and is never needed in practice since -qpdf does this automatically when needed. -)"); } static void add_help_6(QPDFArgParser& ap) { +ap.addOptionHelp("--force-V4", "encryption", "force V=4 in encryption dictionary", R"(This option is for testing and is never needed in practice since +qpdf does this automatically when needed. +)"); ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R"(Use an undocumented, unsupported, deprecated encryption algorithm that existed only in Acrobat version IX. This option should not be used except for compatibility testing. @@ -828,13 +830,13 @@ ap.addOptionHelp("--description", "add-attachment", "set attachment's descriptio Supply descriptive text for the attachment, displayed by some PDF viewers. )"); +} +static void add_help_7(QPDFArgParser& ap) +{ ap.addOptionHelp("--replace", "add-attachment", "replace attachment with same key", R"(Indicate that any existing attachment with the same key should be replaced by the new attachment. Otherwise, qpdf gives an error if an attachment with that key is already present. )"); -} -static void add_help_7(QPDFArgParser& ap) -{ ap.addHelpTopic("copy-attachments", "copy attachments from another file", R"(The options listed below appear between --copy-attachments-from and its terminating "--". @@ -920,14 +922,14 @@ generation numbers for the image objects on each page. ap.addOptionHelp("--list-attachments", "inspection", "list embedded files", R"(Show the key and stream number for each embedded file. Combine with --verbose for more detailed information. )"); +} +static void add_help_8(QPDFArgParser& ap) +{ ap.addOptionHelp("--show-attachment", "inspection", "export an embedded file", R"(--show-attachment=key Write the contents of the specified attachment to standard output as binary data. Get the key with --list-attachments. )"); -} -static void add_help_8(QPDFArgParser& ap) -{ ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the JSON chapter in the qpdf manual for details. )"); diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index 8307e46..97ccc77 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -72,6 +72,7 @@ this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); this->ap.addBare("remove-info", [this](){c_main->removeInfo();}); this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); +this->ap.addBare("remove-structure", [this](){c_main->removeStructure();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();}); this->ap.addBare("requires-password", [this](){c_main->requiresPassword();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index fa4c408..d85b75e 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -421,6 +421,9 @@ popHandler(); // key: removeMetadata pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels +pushKey("removeStructure"); +addBare([this]() { c_main->removeStructure(); }); +popHandler(); // key: removeStructure pushKey("reportMemoryUsage"); addBare([this]() { c_main->reportMemoryUsage(); }); popHandler(); // key: reportMemoryUsage diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index 6854fd8..d0c8d3b 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -148,6 +148,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ "removeInfo": "remove file information", "removeMetadata": "remove metadata", "removePageLabels": "remove explicit page numbers", + "removeStructure": "remove metadata", "reportMemoryUsage": "best effort report of memory usage", "rotate": "rotate pages", "setPageLabels": [ diff --git a/manual/cli.rst b/manual/cli.rst index c1c20b2..b8f5d74 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -1799,7 +1799,7 @@ Related Options Exclude file information (except modification date) from the output file by omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in the document trailer. - See also :qpdf:ref:`--remove-metadata`. + See also :qpdf:ref:`--remove-metadata`, :qpdf:ref:`--remove-structure`. .. qpdf:option:: --remove-metadata @@ -1809,7 +1809,7 @@ Related Options Exclude metadata from the output file by omitting the ``/Metadata`` dictionary in the document catalog. - See also :qpdf:ref:`--remove-info`. + See also :qpdf:ref:`--remove-info`, :qpdf:ref:`--remove-structure`. .. qpdf:option:: --remove-page-labels @@ -1821,6 +1821,19 @@ Related Options omitting the ``/PageLabels`` dictionary in the document catalog. See also :qpdf:ref:`--set-page-labels`. +.. qpdf:option:: --remove-structure + + .. help: remove metadata + + Exclude the structure tree from the output file. + + Exclude the structure tree from the output file by omitting the + ``/StructTreeRoot`` and ``/MarkInfo`` dictionaries in the document + catalog. + See also :qpdf:ref:`--remove-info`, :qpdf:ref:`--remove-metadata`. + + + .. qpdf:option:: --set-page-labels label-spec ... -- .. help: number pages for the entire document diff --git a/manual/qpdf.1 b/manual/qpdf.1 index 03f4e10..c2672b4 100644 --- a/manual/qpdf.1 +++ b/manual/qpdf.1 @@ -545,6 +545,9 @@ Exclude metadata from the output file. .B --remove-page-labels \-\- remove explicit page numbers Exclude page labels (explicit page numbers) from the output file. .TP +.B --remove-structure \-\- remove metadata +Exclude the structure tree from the output file. +.TP .B --set-page-labels \-\- number pages for the entire document --set-page-labels label-spec ... -- diff --git a/manual/release-notes.rst b/manual/release-notes.rst index d55e555..b718de1 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -13,7 +13,7 @@ more detail. .. x.y.z: not yet released -12.0.1: not yet released +12.1.0: not yet released - Bug fixes - In ``QPDF::isLinearized`` return false if the first object in the file is @@ -29,6 +29,11 @@ more detail. - Fix two object stream error/warning messages that reported the wrong object id. + - CLI Enhancements + + - New :qpdf:ref:`--remove-structure` option to exclude the document + structure tree from the output PDF. + - Other enhancements - There have been further enhancements to how files with damaged xref diff --git a/qpdf/qtest/merge-and-split.test b/qpdf/qtest/merge-and-split.test index db18e87..66480a2 100644 --- a/qpdf/qtest/merge-and-split.test +++ b/qpdf/qtest/merge-and-split.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('merge-and-split'); -my $n_tests = 34; +my $n_tests = 36; # Select pages from the same file multiple times including selecting # twice from an encrypted file and specifying the password only the @@ -136,6 +136,15 @@ $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "remove-info-no-moddate.pdf"}); +$td->runtest("remove structure tree", + {$td->COMMAND => + "qpdf remove-structure.pdf a.pdf" . + " --qdf --remove-structure --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-structure.out.pdf"}); + $td->runtest("split with shared resources", {$td->COMMAND => "qpdf --qdf --static-id" . diff --git a/qpdf/qtest/qpdf/remove-structure.out.pdf b/qpdf/qtest/qpdf/remove-structure.out.pdf new file mode 100644 index 0000000..d457f4e --- /dev/null +++ b/qpdf/qtest/qpdf/remove-structure.out.pdf @@ -0,0 +1,101 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 3 0 +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 5 0 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 6 0 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +%% Original object ID: 8 0 +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 9 0 +7 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 8 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000242 00000 n +0000000484 00000 n +0000000583 00000 n +0000000629 00000 n +0000000774 00000 n +trailer << + /Root 1 0 R + /Size 8 + /ID [<31415926535897932384626433832795>] +>> +startxref +809 +%%EOF diff --git a/qpdf/qtest/qpdf/remove-structure.pdf b/qpdf/qtest/qpdf/remove-structure.pdf new file mode 100644 index 0000000..7ad25e9 --- /dev/null +++ b/qpdf/qtest/qpdf/remove-structure.pdf @@ -0,0 +1,119 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /MarkInfo 2 0 R + /Pages 3 0 R + /StructTreeRoot 4 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 8 0 +2 0 obj +<< + /Marked /False +>> +endobj + +%% Original object ID: 2 0 +3 0 obj +<< + /Count 1 + /Kids [ + 5 0 R + ] + /Type /Pages +>> +endobj + +%% Original object ID: 7 0 +4 0 obj +<< + /Type /StructTreeRoot +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +5 0 obj +<< + /Contents 6 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 3 0 R + /Resources << + /Font << + /F1 8 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +6 0 obj +<< + /Length 7 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +7 0 obj +44 +endobj + +%% Original object ID: 6 0 +8 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 5 0 +9 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 10 +0000000000 65535 f +0000000052 00000 n +0000000175 00000 n +0000000241 00000 n +0000000340 00000 n +0000000423 00000 n +0000000665 00000 n +0000000764 00000 n +0000000810 00000 n +0000000955 00000 n +trailer << + /Root 1 0 R + /Size 10 + /ID [] +>> +startxref +990 +%%EOF -- libgit2 0.21.4