Commit 464d94af297c1235d2b339bb15bf5efaeb1dcc13

Authored by m-holger
1 parent 314bd3eb

Add new CLI option --remove-structure

... to remove the /Root /StructTreeRoot and /MarkInfo entries.
include/qpdf/QPDFJob.hh
... ... @@ -696,6 +696,7 @@ class QPDFJob
696 696 bool remove_info{false};
697 697 bool remove_metadata{false};
698 698 bool remove_page_labels{false};
  699 + bool remove_structure{false};
699 700 size_t oi_min_width{DEFAULT_OI_MIN_WIDTH};
700 701 size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT};
701 702 size_t oi_min_area{DEFAULT_OI_MIN_AREA};
... ...
include/qpdf/auto_job_c_main.hh
... ... @@ -35,6 +35,7 @@ QPDF_DLL Config* recompressFlate();
35 35 QPDF_DLL Config* removeInfo();
36 36 QPDF_DLL Config* removeMetadata();
37 37 QPDF_DLL Config* removePageLabels();
  38 +QPDF_DLL Config* removeStructure();
38 39 QPDF_DLL Config* reportMemoryUsage();
39 40 QPDF_DLL Config* requiresPassword();
40 41 QPDF_DLL Config* removeRestrictions();
... ...
job.sums
... ... @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a
4 4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
5 5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
6 6 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
7   -include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae
  7 +include/qpdf/auto_job_c_main.hh 48e8ea475e8a8f4c96de86bdad10dff83a263deccc3798c8bed7f5e0e070a037
8 8 include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506
9 9 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62
10   -job.yml 2c424c7be0c02545191969e849e1d8f7fdb4ab65bbf799b9a190e21343899751
  10 +job.yml ba9f24920c2221883b1d6d8e42f7ac7c78988063a0ca9181dc08abe9cde6f760
11 11 libqpdf/qpdf/auto_job_decl.hh 34ba07d3891c3e5cdd8712f991e508a0652c9db314c5d5bcdf4421b76e6f6e01
12   -libqpdf/qpdf/auto_job_help.hh a36476d0c823033b2af0e4170651e1fa31173887c310f2f208e9ed7e6e36a2ce
13   -libqpdf/qpdf/auto_job_init.hh f89e7f9950a185372732d2ff7f113161f275f45ee7937dd7fd37e38013bf22e7
  12 +libqpdf/qpdf/auto_job_help.hh 03bdaab05f84b16bfb15ad7993a4655b7dc14af070fa97fe3035943726d4b258
  13 +libqpdf/qpdf/auto_job_init.hh 029d929f930f60b4055796c8c4ce2ed625f861316ac738ab638579eca46b2472
14 14 libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd
15   -libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d
16   -libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c
  15 +libqpdf/qpdf/auto_job_json_init.hh b883f3768c8367327ea1f17e8ca503178be62a9cede316bf7ad96c0fafee5513
  16 +libqpdf/qpdf/auto_job_schema.hh 6d28db327dd19e0a7da375c681ecea7965513fa4b5d2349a80089b057f8c02d8
17 17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
18   -manual/cli.rst 67357688f9a52fafa9a4f231fe4ce74c3cd8977130da7501efe54439a1ee22d4
19   -manual/qpdf.1 dbcc567623f1fa080743ae9bc32b6264a3b6bd3074c81c438e52ca328e94ecd7
  18 +manual/cli.rst 1094662a10db21528fd151739a9779a4504ebac75b483a11a53d42ab0430ee42
  19 +manual/qpdf.1 eb45321c598f23f0724cb963a17aef972ef5b817dc44cf787882a4621c6e1c8e
20 20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
... ...
... ... @@ -134,6 +134,7 @@ options:
134 134 - remove-info
135 135 - remove-metadata
136 136 - remove-page-labels
  137 + - remove-structure
137 138 - replace-input
138 139 - report-memory-usage
139 140 - requires-password
... ... @@ -446,6 +447,7 @@ json:
446 447 remove-info:
447 448 remove-metadata:
448 449 remove-page-labels:
  450 + remove-structure:
449 451 report-memory-usage:
450 452 rotate:
451 453 set-page-labels:
... ...
libqpdf/QPDFJob.cc
... ... @@ -490,6 +490,10 @@ QPDFJob::createQPDF()
490 490 if (m->remove_metadata) {
491 491 pdf.getRoot().removeKey("/Metadata");
492 492 }
  493 + if (m->remove_structure) {
  494 + pdf.getRoot().removeKey("/StructTreeRoot");
  495 + pdf.getRoot().removeKey("/MarkInfo");
  496 + }
493 497  
494 498 for (auto& foreign: page_heap) {
495 499 if (foreign->anyWarnings()) {
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -532,6 +532,13 @@ QPDFJob::Config::removePageLabels()
532 532 }
533 533  
534 534 QPDFJob::Config*
  535 +QPDFJob::Config::removeStructure()
  536 +{
  537 + o.m->remove_structure = true;
  538 + return this;
  539 +}
  540 +
  541 +QPDFJob::Config*
535 542 QPDFJob::Config::reportMemoryUsage()
536 543 {
537 544 o.m->report_mem_usage = true;
... ...
libqpdf/qpdf/auto_job_help.hh
... ... @@ -428,6 +428,8 @@ ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Excl
428 428 )");
429 429 ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file.
430 430 )");
  431 +ap.addOptionHelp("--remove-structure", "modification", "remove metadata", R"(Exclude the structure tree from the output file.
  432 +)");
431 433 ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... --
432 434  
433 435 Set page labels (explicit page numbers) for the entire file.
... ... @@ -643,12 +645,12 @@ reasons. Use 256-bit encryption instead.
643 645 ap.addOptionHelp("--allow-insecure", "encryption", "allow empty owner passwords", R"(Allow creation of PDF files with empty owner passwords and
644 646 non-empty user passwords when using 256-bit encryption.
645 647 )");
646   -ap.addOptionHelp("--force-V4", "encryption", "force V=4 in encryption dictionary", R"(This option is for testing and is never needed in practice since
647   -qpdf does this automatically when needed.
648   -)");
649 648 }
650 649 static void add_help_6(QPDFArgParser& ap)
651 650 {
  651 +ap.addOptionHelp("--force-V4", "encryption", "force V=4 in encryption dictionary", R"(This option is for testing and is never needed in practice since
  652 +qpdf does this automatically when needed.
  653 +)");
652 654 ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R"(Use an undocumented, unsupported, deprecated encryption
653 655 algorithm that existed only in Acrobat version IX. This option
654 656 should not be used except for compatibility testing.
... ... @@ -828,13 +830,13 @@ ap.addOptionHelp("--description", "add-attachment", "set attachment's descriptio
828 830 Supply descriptive text for the attachment, displayed by some
829 831 PDF viewers.
830 832 )");
  833 +}
  834 +static void add_help_7(QPDFArgParser& ap)
  835 +{
831 836 ap.addOptionHelp("--replace", "add-attachment", "replace attachment with same key", R"(Indicate that any existing attachment with the same key should
832 837 be replaced by the new attachment. Otherwise, qpdf gives an
833 838 error if an attachment with that key is already present.
834 839 )");
835   -}
836   -static void add_help_7(QPDFArgParser& ap)
837   -{
838 840 ap.addHelpTopic("copy-attachments", "copy attachments from another file", R"(The options listed below appear between --copy-attachments-from and
839 841 its terminating "--".
840 842  
... ... @@ -920,14 +922,14 @@ generation numbers for the image objects on each page.
920 922 ap.addOptionHelp("--list-attachments", "inspection", "list embedded files", R"(Show the key and stream number for each embedded file. Combine
921 923 with --verbose for more detailed information.
922 924 )");
  925 +}
  926 +static void add_help_8(QPDFArgParser& ap)
  927 +{
923 928 ap.addOptionHelp("--show-attachment", "inspection", "export an embedded file", R"(--show-attachment=key
924 929  
925 930 Write the contents of the specified attachment to standard
926 931 output as binary data. Get the key with --list-attachments.
927 932 )");
928   -}
929   -static void add_help_8(QPDFArgParser& ap)
930   -{
931 933 ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the
932 934 JSON chapter in the qpdf manual for details.
933 935 )");
... ...
libqpdf/qpdf/auto_job_init.hh
... ... @@ -72,6 +72,7 @@ this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
72 72 this->ap.addBare("remove-info", [this](){c_main->removeInfo();});
73 73 this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();});
74 74 this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
  75 +this->ap.addBare("remove-structure", [this](){c_main->removeStructure();});
75 76 this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput));
76 77 this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();});
77 78 this->ap.addBare("requires-password", [this](){c_main->requiresPassword();});
... ...
libqpdf/qpdf/auto_job_json_init.hh
... ... @@ -421,6 +421,9 @@ popHandler(); // key: removeMetadata
421 421 pushKey("removePageLabels");
422 422 addBare([this]() { c_main->removePageLabels(); });
423 423 popHandler(); // key: removePageLabels
  424 +pushKey("removeStructure");
  425 +addBare([this]() { c_main->removeStructure(); });
  426 +popHandler(); // key: removeStructure
424 427 pushKey("reportMemoryUsage");
425 428 addBare([this]() { c_main->reportMemoryUsage(); });
426 429 popHandler(); // key: reportMemoryUsage
... ...
libqpdf/qpdf/auto_job_schema.hh
... ... @@ -148,6 +148,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
148 148 "removeInfo": "remove file information",
149 149 "removeMetadata": "remove metadata",
150 150 "removePageLabels": "remove explicit page numbers",
  151 + "removeStructure": "remove metadata",
151 152 "reportMemoryUsage": "best effort report of memory usage",
152 153 "rotate": "rotate pages",
153 154 "setPageLabels": [
... ...
manual/cli.rst
... ... @@ -1799,7 +1799,7 @@ Related Options
1799 1799 Exclude file information (except modification date) from the output file by
1800 1800 omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in
1801 1801 the document trailer.
1802   - See also :qpdf:ref:`--remove-metadata`.
  1802 + See also :qpdf:ref:`--remove-metadata`, :qpdf:ref:`--remove-structure`.
1803 1803  
1804 1804 .. qpdf:option:: --remove-metadata
1805 1805  
... ... @@ -1809,7 +1809,7 @@ Related Options
1809 1809  
1810 1810 Exclude metadata from the output file by omitting the ``/Metadata``
1811 1811 dictionary in the document catalog.
1812   - See also :qpdf:ref:`--remove-info`.
  1812 + See also :qpdf:ref:`--remove-info`, :qpdf:ref:`--remove-structure`.
1813 1813  
1814 1814 .. qpdf:option:: --remove-page-labels
1815 1815  
... ... @@ -1821,6 +1821,19 @@ Related Options
1821 1821 omitting the ``/PageLabels`` dictionary in the document catalog.
1822 1822 See also :qpdf:ref:`--set-page-labels`.
1823 1823  
  1824 +.. qpdf:option:: --remove-structure
  1825 +
  1826 + .. help: remove metadata
  1827 +
  1828 + Exclude the structure tree from the output file.
  1829 +
  1830 + Exclude the structure tree from the output file by omitting the
  1831 + ``/StructTreeRoot`` and ``/MarkInfo`` dictionaries in the document
  1832 + catalog.
  1833 + See also :qpdf:ref:`--remove-info`, :qpdf:ref:`--remove-metadata`.
  1834 +
  1835 +
  1836 +
1824 1837 .. qpdf:option:: --set-page-labels label-spec ... --
1825 1838  
1826 1839 .. help: number pages for the entire document
... ...
manual/qpdf.1
... ... @@ -545,6 +545,9 @@ Exclude metadata from the output file.
545 545 .B --remove-page-labels \-\- remove explicit page numbers
546 546 Exclude page labels (explicit page numbers) from the output file.
547 547 .TP
  548 +.B --remove-structure \-\- remove metadata
  549 +Exclude the structure tree from the output file.
  550 +.TP
548 551 .B --set-page-labels \-\- number pages for the entire document
549 552 --set-page-labels label-spec ... --
550 553  
... ...
manual/release-notes.rst
... ... @@ -13,7 +13,7 @@ more detail.
13 13  
14 14 .. x.y.z: not yet released
15 15  
16   -12.0.1: not yet released
  16 +12.1.0: not yet released
17 17 - Bug fixes
18 18  
19 19 - In ``QPDF::isLinearized`` return false if the first object in the file is
... ... @@ -29,6 +29,11 @@ more detail.
29 29 - Fix two object stream error/warning messages that reported the wrong
30 30 object id.
31 31  
  32 + - CLI Enhancements
  33 +
  34 + - New :qpdf:ref:`--remove-structure` option to exclude the document
  35 + structure tree from the output PDF.
  36 +
32 37 - Other enhancements
33 38  
34 39 - There have been further enhancements to how files with damaged xref
... ...
qpdf/qtest/merge-and-split.test
... ... @@ -14,7 +14,7 @@ cleanup();
14 14  
15 15 my $td = new TestDriver('merge-and-split');
16 16  
17   -my $n_tests = 34;
  17 +my $n_tests = 36;
18 18  
19 19 # Select pages from the same file multiple times including selecting
20 20 # twice from an encrypted file and specifying the password only the
... ... @@ -136,6 +136,15 @@ $td->runtest("check output",
136 136 {$td->FILE => "a.pdf"},
137 137 {$td->FILE => "remove-info-no-moddate.pdf"});
138 138  
  139 +$td->runtest("remove structure tree",
  140 + {$td->COMMAND =>
  141 + "qpdf remove-structure.pdf a.pdf" .
  142 + " --qdf --remove-structure --static-id"},
  143 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  144 +$td->runtest("check output",
  145 + {$td->FILE => "a.pdf"},
  146 + {$td->FILE => "remove-structure.out.pdf"});
  147 +
139 148 $td->runtest("split with shared resources",
140 149 {$td->COMMAND =>
141 150 "qpdf --qdf --static-id" .
... ...
qpdf/qtest/qpdf/remove-structure.out.pdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 3 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 5 0
  26 +3 0 obj
  27 +<<
  28 + /Contents 4 0 R
  29 + /MediaBox [
  30 + 0
  31 + 0
  32 + 612
  33 + 792
  34 + ]
  35 + /Parent 2 0 R
  36 + /Resources <<
  37 + /Font <<
  38 + /F1 6 0 R
  39 + >>
  40 + /ProcSet 7 0 R
  41 + >>
  42 + /Type /Page
  43 +>>
  44 +endobj
  45 +
  46 +%% Contents for page 1
  47 +%% Original object ID: 6 0
  48 +4 0 obj
  49 +<<
  50 + /Length 5 0 R
  51 +>>
  52 +stream
  53 +BT
  54 + /F1 24 Tf
  55 + 72 720 Td
  56 + (Potato) Tj
  57 +ET
  58 +endstream
  59 +endobj
  60 +
  61 +5 0 obj
  62 +44
  63 +endobj
  64 +
  65 +%% Original object ID: 8 0
  66 +6 0 obj
  67 +<<
  68 + /BaseFont /Helvetica
  69 + /Encoding /WinAnsiEncoding
  70 + /Name /F1
  71 + /Subtype /Type1
  72 + /Type /Font
  73 +>>
  74 +endobj
  75 +
  76 +%% Original object ID: 9 0
  77 +7 0 obj
  78 +[
  79 + /PDF
  80 + /Text
  81 +]
  82 +endobj
  83 +
  84 +xref
  85 +0 8
  86 +0000000000 65535 f
  87 +0000000052 00000 n
  88 +0000000133 00000 n
  89 +0000000242 00000 n
  90 +0000000484 00000 n
  91 +0000000583 00000 n
  92 +0000000629 00000 n
  93 +0000000774 00000 n
  94 +trailer <<
  95 + /Root 1 0 R
  96 + /Size 8
  97 + /ID [<e8147af54e2f4232c87179e90799f037><31415926535897932384626433832795>]
  98 +>>
  99 +startxref
  100 +809
  101 +%%EOF
... ...
qpdf/qtest/qpdf/remove-structure.pdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /MarkInfo 2 0 R
  9 + /Pages 3 0 R
  10 + /StructTreeRoot 4 0 R
  11 + /Type /Catalog
  12 +>>
  13 +endobj
  14 +
  15 +%% Original object ID: 8 0
  16 +2 0 obj
  17 +<<
  18 + /Marked /False
  19 +>>
  20 +endobj
  21 +
  22 +%% Original object ID: 2 0
  23 +3 0 obj
  24 +<<
  25 + /Count 1
  26 + /Kids [
  27 + 5 0 R
  28 + ]
  29 + /Type /Pages
  30 +>>
  31 +endobj
  32 +
  33 +%% Original object ID: 7 0
  34 +4 0 obj
  35 +<<
  36 + /Type /StructTreeRoot
  37 +>>
  38 +endobj
  39 +
  40 +%% Page 1
  41 +%% Original object ID: 3 0
  42 +5 0 obj
  43 +<<
  44 + /Contents 6 0 R
  45 + /MediaBox [
  46 + 0
  47 + 0
  48 + 612
  49 + 792
  50 + ]
  51 + /Parent 3 0 R
  52 + /Resources <<
  53 + /Font <<
  54 + /F1 8 0 R
  55 + >>
  56 + /ProcSet 9 0 R
  57 + >>
  58 + /Type /Page
  59 +>>
  60 +endobj
  61 +
  62 +%% Contents for page 1
  63 +%% Original object ID: 4 0
  64 +6 0 obj
  65 +<<
  66 + /Length 7 0 R
  67 +>>
  68 +stream
  69 +BT
  70 + /F1 24 Tf
  71 + 72 720 Td
  72 + (Potato) Tj
  73 +ET
  74 +endstream
  75 +endobj
  76 +
  77 +7 0 obj
  78 +44
  79 +endobj
  80 +
  81 +%% Original object ID: 6 0
  82 +8 0 obj
  83 +<<
  84 + /BaseFont /Helvetica
  85 + /Encoding /WinAnsiEncoding
  86 + /Name /F1
  87 + /Subtype /Type1
  88 + /Type /Font
  89 +>>
  90 +endobj
  91 +
  92 +%% Original object ID: 5 0
  93 +9 0 obj
  94 +[
  95 + /PDF
  96 + /Text
  97 +]
  98 +endobj
  99 +
  100 +xref
  101 +0 10
  102 +0000000000 65535 f
  103 +0000000052 00000 n
  104 +0000000175 00000 n
  105 +0000000241 00000 n
  106 +0000000340 00000 n
  107 +0000000423 00000 n
  108 +0000000665 00000 n
  109 +0000000764 00000 n
  110 +0000000810 00000 n
  111 +0000000955 00000 n
  112 +trailer <<
  113 + /Root 1 0 R
  114 + /Size 10
  115 + /ID [<e8147af54e2f4232c87179e90799f037><e8147af54e2f4232c87179e90799f037>]
  116 +>>
  117 +startxref
  118 +990
  119 +%%EOF
... ...