Commit 7cac433e2d9256fb8361aa53d136d4eaacfda6fe
1 parent
543038c5
Implement --set-page-labels and supporting API (fixes #939)
Showing
11 changed files
with
226 additions
and
11 deletions
ChangeLog
| 1 | +2024-01-05 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Add --set-page-labels command-line argument and supporting API. | ||
| 4 | + Fixes #939. | ||
| 5 | + - QPDFJob::Config::setPageLabels | ||
| 6 | + - pdf_page_label_e enumerated type | ||
| 7 | + - QPDFPageLabelDocumentHelper::pageLabelDict | ||
| 8 | + | ||
| 1 | 2024-01-01 Jay Berkenbilt <ejb@ql.org> | 9 | 2024-01-01 Jay Berkenbilt <ejb@ql.org> |
| 2 | 10 | ||
| 3 | * Support comma-separated numeric values with --collate to select | 11 | * Support comma-separated numeric values with --collate to select |
include/qpdf/Constants.h
| @@ -232,4 +232,14 @@ enum pdf_annotation_flag_e { | @@ -232,4 +232,14 @@ enum pdf_annotation_flag_e { | ||
| 232 | /* Encryption/password status for QPDFJob */ | 232 | /* Encryption/password status for QPDFJob */ |
| 233 | enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 }; | 233 | enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 }; |
| 234 | 234 | ||
| 235 | +/* Page label types */ | ||
| 236 | +enum qpdf_page_label_e { | ||
| 237 | + pl_none, | ||
| 238 | + pl_digits, | ||
| 239 | + pl_alpha_lower, | ||
| 240 | + pl_alpha_upper, | ||
| 241 | + pl_roman_lower, | ||
| 242 | + pl_roman_upper, | ||
| 243 | +}; | ||
| 244 | + | ||
| 235 | #endif /* QPDFCONSTANTS_H */ | 245 | #endif /* QPDFCONSTANTS_H */ |
include/qpdf/QPDFJob.hh
| @@ -296,7 +296,8 @@ class QPDFJob | @@ -296,7 +296,8 @@ class QPDFJob | ||
| 296 | Config* config; | 296 | Config* config; |
| 297 | }; | 297 | }; |
| 298 | 298 | ||
| 299 | - class PageLabelsConfig { | 299 | + class PageLabelsConfig |
| 300 | + { | ||
| 300 | friend class QPDFJob; | 301 | friend class QPDFJob; |
| 301 | friend class Config; | 302 | friend class Config; |
| 302 | 303 | ||
| @@ -458,6 +459,22 @@ class QPDFJob | @@ -458,6 +459,22 @@ class QPDFJob | ||
| 458 | std::vector<int> repeat_pagenos; | 459 | std::vector<int> repeat_pagenos; |
| 459 | }; | 460 | }; |
| 460 | 461 | ||
| 462 | + struct PageLabelSpec | ||
| 463 | + { | ||
| 464 | + PageLabelSpec( | ||
| 465 | + int first_page, qpdf_page_label_e label_type, int start_num, std::string_view prefix) : | ||
| 466 | + first_page(first_page), | ||
| 467 | + label_type(label_type), | ||
| 468 | + start_num(start_num), | ||
| 469 | + prefix(prefix) | ||
| 470 | + { | ||
| 471 | + } | ||
| 472 | + int first_page; | ||
| 473 | + qpdf_page_label_e label_type; | ||
| 474 | + int start_num{1}; | ||
| 475 | + std::string prefix; | ||
| 476 | + }; | ||
| 477 | + | ||
| 461 | // Helper functions | 478 | // Helper functions |
| 462 | static void usage(std::string const& msg); | 479 | static void usage(std::string const& msg); |
| 463 | static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr); | 480 | static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr); |
| @@ -694,7 +711,7 @@ class QPDFJob | @@ -694,7 +711,7 @@ class QPDFJob | ||
| 694 | bool json_output{false}; | 711 | bool json_output{false}; |
| 695 | std::string update_from_json; | 712 | std::string update_from_json; |
| 696 | bool report_mem_usage{false}; | 713 | bool report_mem_usage{false}; |
| 697 | - std::vector<std::string> page_label_specs; | 714 | + std::vector<PageLabelSpec> page_label_specs; |
| 698 | }; | 715 | }; |
| 699 | std::shared_ptr<Members> m; | 716 | std::shared_ptr<Members> m; |
| 700 | }; | 717 | }; |
include/qpdf/QPDFPageLabelDocumentHelper.hh
| @@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper | @@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper | ||
| 49 | QPDF_DLL | 49 | QPDF_DLL |
| 50 | bool hasPageLabels(); | 50 | bool hasPageLabels(); |
| 51 | 51 | ||
| 52 | + // Helper function to create a dictionary suitable for adding to the /PageLabels numbers tree. | ||
| 53 | + QPDF_DLL | ||
| 54 | + static QPDFObjectHandle | ||
| 55 | + pageLabelDict(qpdf_page_label_e label_type, int start_num, std::string_view prefix); | ||
| 56 | + | ||
| 52 | // Return a page label dictionary representing the page label for the given page. The page does | 57 | // Return a page label dictionary representing the page label for the given page. The page does |
| 53 | // not need to appear explicitly in the page label dictionary. This method will adjust /St as | 58 | // not need to appear explicitly in the page label dictionary. This method will adjust /St as |
| 54 | // needed to produce a label that is suitable for the page. | 59 | // needed to produce a label that is suitable for the page. |
libqpdf/QPDFJob.cc
| @@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF& pdf) | @@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF& pdf) | ||
| 2172 | if (m->remove_page_labels) { | 2172 | if (m->remove_page_labels) { |
| 2173 | pdf.getRoot().removeKey("/PageLabels"); | 2173 | pdf.getRoot().removeKey("/PageLabels"); |
| 2174 | } | 2174 | } |
| 2175 | + if (!m->page_label_specs.empty()) { | ||
| 2176 | + auto nums = QPDFObjectHandle::newArray(); | ||
| 2177 | + auto n_pages = QIntC::to_int(dh.getAllPages().size()); | ||
| 2178 | + int last_page_seen{0}; | ||
| 2179 | + for (auto& spec: m->page_label_specs) { | ||
| 2180 | + if (spec.first_page < 0) { | ||
| 2181 | + spec.first_page = n_pages + 1 + spec.first_page; | ||
| 2182 | + } | ||
| 2183 | + if (last_page_seen == 0) { | ||
| 2184 | + if (spec.first_page != 1) { | ||
| 2185 | + throw std::runtime_error( | ||
| 2186 | + "the first page label specification must start with page 1"); | ||
| 2187 | + } | ||
| 2188 | + } else if (spec.first_page <= last_page_seen) { | ||
| 2189 | + throw std::runtime_error( | ||
| 2190 | + "page label specifications must be in order by first page"); | ||
| 2191 | + } | ||
| 2192 | + if (spec.first_page > n_pages) { | ||
| 2193 | + throw std::runtime_error( | ||
| 2194 | + "page label spec: page " + std::to_string(spec.first_page) + | ||
| 2195 | + " is more than the total number of pages (" + std::to_string(n_pages) + ")"); | ||
| 2196 | + } | ||
| 2197 | + last_page_seen = spec.first_page; | ||
| 2198 | + nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1)); | ||
| 2199 | + nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict( | ||
| 2200 | + spec.label_type, spec.start_num, spec.prefix)); | ||
| 2201 | + } | ||
| 2202 | + auto page_labels = QPDFObjectHandle::newDictionary(); | ||
| 2203 | + page_labels.replaceKey("/Nums", nums); | ||
| 2204 | + pdf.getRoot().replaceKey("/PageLabels", page_labels); | ||
| 2205 | + } | ||
| 2175 | if (!m->attachments_to_remove.empty()) { | 2206 | if (!m->attachments_to_remove.empty()) { |
| 2176 | QPDFEmbeddedFileDocumentHelper efdh(pdf); | 2207 | QPDFEmbeddedFileDocumentHelper efdh(pdf); |
| 2177 | for (auto const& key: m->attachments_to_remove) { | 2208 | for (auto const& key: m->attachments_to_remove) { |
| @@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF& pdf) | @@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF& pdf) | ||
| 3019 | try { | 3050 | try { |
| 3020 | QUtil::remove_file(backup.c_str()); | 3051 | QUtil::remove_file(backup.c_str()); |
| 3021 | } catch (QPDFSystemError& e) { | 3052 | } catch (QPDFSystemError& e) { |
| 3022 | - *m->log->getError() | ||
| 3023 | - << m->message_prefix << ": unable to delete original file (" << e.what() << ");" | ||
| 3024 | - << " original file left in " << backup | ||
| 3025 | - << ", but the input was successfully replaced\n"; | 3053 | + *m->log->getError() << m->message_prefix << ": unable to delete original file (" |
| 3054 | + << e.what() << ");" << " original file left in " << backup | ||
| 3055 | + << ", but the input was successfully replaced\n"; | ||
| 3026 | } | 3056 | } |
| 3027 | } | 3057 | } |
| 3028 | } | 3058 | } |
libqpdf/QPDFJob_config.cc
| 1 | #include <qpdf/QPDFJob.hh> | 1 | #include <qpdf/QPDFJob.hh> |
| 2 | 2 | ||
| 3 | +#include <regex> | ||
| 4 | + | ||
| 3 | #include <qpdf/QPDFLogger.hh> | 5 | #include <qpdf/QPDFLogger.hh> |
| 4 | #include <qpdf/QTC.hh> | 6 | #include <qpdf/QTC.hh> |
| 5 | #include <qpdf/QUtil.hh> | 7 | #include <qpdf/QUtil.hh> |
| @@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt( | @@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt( | ||
| 1062 | QPDFJob::Config* | 1064 | QPDFJob::Config* |
| 1063 | QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs) | 1065 | QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs) |
| 1064 | { | 1066 | { |
| 1065 | - // XXX validate | ||
| 1066 | - for (auto const& xxx: specs) { | ||
| 1067 | - std::cout << "XXX config: spec: " << xxx << std::endl; | 1067 | + static std::regex page_label_re(R"(^(z|r?\d+):([DaArR])?(?:/(\d+)?(?:/(.+)?)?)?$)"); |
| 1068 | + o.m->page_label_specs.clear(); | ||
| 1069 | + for (auto const& spec: specs) { | ||
| 1070 | + std::smatch match; | ||
| 1071 | + if (!std::regex_match(spec, match, page_label_re)) { | ||
| 1072 | + usage("page label spec must be n:[D|a|A|r|R][/start[/prefix]]"); | ||
| 1073 | + } | ||
| 1074 | + auto first_page_str = match[1].str(); | ||
| 1075 | + int first_page; | ||
| 1076 | + if (first_page_str == "z") { | ||
| 1077 | + first_page = -1; | ||
| 1078 | + } else if (first_page_str.at(0) == 'r') { | ||
| 1079 | + first_page = -QUtil::string_to_int(first_page_str.substr(1).c_str()); | ||
| 1080 | + } else { | ||
| 1081 | + first_page = QUtil::string_to_int(first_page_str.c_str()); | ||
| 1082 | + } | ||
| 1083 | + auto label_type_ch = match[2].matched ? match[2].str().at(0) : '\0'; | ||
| 1084 | + qpdf_page_label_e label_type; | ||
| 1085 | + switch (label_type_ch) { | ||
| 1086 | + case 'D': | ||
| 1087 | + label_type = pl_digits; | ||
| 1088 | + break; | ||
| 1089 | + case 'a': | ||
| 1090 | + label_type = pl_alpha_lower; | ||
| 1091 | + break; | ||
| 1092 | + case 'A': | ||
| 1093 | + label_type = pl_alpha_upper; | ||
| 1094 | + break; | ||
| 1095 | + case 'r': | ||
| 1096 | + label_type = pl_roman_lower; | ||
| 1097 | + break; | ||
| 1098 | + case 'R': | ||
| 1099 | + label_type = pl_roman_upper; | ||
| 1100 | + break; | ||
| 1101 | + default: | ||
| 1102 | + label_type = pl_none; | ||
| 1103 | + } | ||
| 1104 | + | ||
| 1105 | + auto start_num = match[3].matched ? QUtil::string_to_int(match[3].str().c_str()) : 1; | ||
| 1106 | + auto prefix = match[4].matched ? match[4].str() : ""; | ||
| 1107 | + // We can't check ordering until we know how many pages there are, so that is delayed until | ||
| 1108 | + // near the end. | ||
| 1109 | + o.m->page_label_specs.emplace_back(first_page, label_type, start_num, prefix); | ||
| 1068 | } | 1110 | } |
| 1069 | - o.m->page_label_specs = specs; | ||
| 1070 | return this; | 1111 | return this; |
| 1071 | } | 1112 | } |
| 1072 | 1113 |
libqpdf/QPDFPageLabelDocumentHelper.cc
| @@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange( | @@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange( | ||
| 99 | } | 99 | } |
| 100 | } | 100 | } |
| 101 | } | 101 | } |
| 102 | + | ||
| 103 | +QPDFObjectHandle | ||
| 104 | +QPDFPageLabelDocumentHelper::pageLabelDict( | ||
| 105 | + qpdf_page_label_e label_type, int start_num, std::string_view prefix) | ||
| 106 | +{ | ||
| 107 | + auto num = "<< /Type /PageLabel >>"_qpdf; | ||
| 108 | + switch (label_type) { | ||
| 109 | + case pl_none: | ||
| 110 | + break; | ||
| 111 | + case pl_digits: | ||
| 112 | + num.replaceKey("/S", "/D"_qpdf); | ||
| 113 | + break; | ||
| 114 | + case pl_alpha_lower: | ||
| 115 | + num.replaceKey("/S", "/a"_qpdf); | ||
| 116 | + break; | ||
| 117 | + case pl_alpha_upper: | ||
| 118 | + num.replaceKey("/S", "/A"_qpdf); | ||
| 119 | + break; | ||
| 120 | + case pl_roman_lower: | ||
| 121 | + num.replaceKey("/S", "/r"_qpdf); | ||
| 122 | + break; | ||
| 123 | + case pl_roman_upper: | ||
| 124 | + num.replaceKey("/S", "/R"_qpdf); | ||
| 125 | + break; | ||
| 126 | + } | ||
| 127 | + if (!prefix.empty()) { | ||
| 128 | + num.replaceKey("/P", QPDFObjectHandle::newUnicodeString(std::string(prefix))); | ||
| 129 | + } | ||
| 130 | + if (start_num != 1) { | ||
| 131 | + num.replaceKey("/St", QPDFObjectHandle::newInteger(start_num)); | ||
| 132 | + } | ||
| 133 | + return num; | ||
| 134 | +} |
manual/release-notes.rst
| @@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change): | @@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change): | ||
| 38 | 38 | ||
| 39 | .. x.y.z: not yet released | 39 | .. x.y.z: not yet released |
| 40 | 40 | ||
| 41 | -11.7.1: not yet released | 41 | +11.8.0: not yet released |
| 42 | - Bug fixes: | 42 | - Bug fixes: |
| 43 | 43 | ||
| 44 | - When flattening annotations, preserve hyperlinks and other | 44 | - When flattening annotations, preserve hyperlinks and other |
| @@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change): | @@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change): | ||
| 54 | :qpdf:ref:`--collate` to select different numbers of pages from | 54 | :qpdf:ref:`--collate` to select different numbers of pages from |
| 55 | different groups. | 55 | different groups. |
| 56 | 56 | ||
| 57 | + - Add :qpdf:ref:`--set-page-labels` option to completely override | ||
| 58 | + page labels in the output. | ||
| 59 | + | ||
| 60 | + - Library Enhancements | ||
| 61 | + | ||
| 62 | + - Add API to support :qpdf:ref:`--set-page-labels`: | ||
| 63 | + | ||
| 64 | + - ``QPDFJob::Config::setPageLabels`` | ||
| 65 | + | ||
| 66 | + - ``pdf_page_label_e`` enumerated type | ||
| 67 | + | ||
| 68 | + - ``QPDFPageLabelDocumentHelper::pageLabelDict`` | ||
| 69 | + | ||
| 57 | 11.7.0: December 24, 2023 | 70 | 11.7.0: December 24, 2023 |
| 58 | - Bug fixes: | 71 | - Bug fixes: |
| 59 | 72 |
qpdf/qtest/page-labels.test
| @@ -29,5 +29,44 @@ $td->runtest("no page labels", | @@ -29,5 +29,44 @@ $td->runtest("no page labels", | ||
| 29 | {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0}, | 29 | {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0}, |
| 30 | $td->NORMALIZE_NEWLINES); | 30 | $td->NORMALIZE_NEWLINES); |
| 31 | 31 | ||
| 32 | +# --set-page-labels | ||
| 33 | +my @errors = ( | ||
| 34 | + ["quack", ".*page label spec must be.*"], | ||
| 35 | + ["5:r 10:D", ".*the first page .*must start with page 1.*"], | ||
| 36 | + ["1:r 10:D 31:A", | ||
| 37 | + ".*page 31 is more than the total number of pages \\(30\\).*"], | ||
| 38 | +); | ||
| 39 | +$n_tests += scalar(@errors); | ||
| 40 | + | ||
| 41 | +foreach my $d (@errors) | ||
| 42 | +{ | ||
| 43 | + my ($specs, $err) = @$d; | ||
| 44 | + $td->runtest("error ($specs)", | ||
| 45 | + {$td->COMMAND => "qpdf --set-page-labels $specs --" . | ||
| 46 | + " page-labels-num-tree.pdf a.pdf"}, | ||
| 47 | + {$td->REGEXP => $err, $td->EXIT_STATUS => 2}, | ||
| 48 | + $td->NORMALIZE_NEWLINES); | ||
| 49 | +} | ||
| 50 | + | ||
| 51 | +$n_tests += 4; | ||
| 52 | +$td->runtest("set page labels", | ||
| 53 | + {$td->COMMAND => "qpdf page-labels-num-tree.pdf a.pdf" . | ||
| 54 | + " --set-page-labels 1:a 3:R/2 6:r//Z- 8:A/17 r10:D/3 28: z://end --"}, | ||
| 55 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | ||
| 56 | + $td->NORMALIZE_NEWLINES); | ||
| 57 | +$td->runtest("after set page labels", | ||
| 58 | + {$td->COMMAND => "test_driver 47 a.pdf"}, | ||
| 59 | + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0}, | ||
| 60 | + $td->NORMALIZE_NEWLINES); | ||
| 61 | +$td->runtest("set page labels (json)", | ||
| 62 | + {$td->COMMAND => "qpdf page-labels-num-tree.pdf b.pdf" . | ||
| 63 | + " --job-json-file=set-page-labels.json"}, | ||
| 64 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | ||
| 65 | + $td->NORMALIZE_NEWLINES); | ||
| 66 | +$td->runtest("after set page labels", | ||
| 67 | + {$td->COMMAND => "test_driver 47 b.pdf"}, | ||
| 68 | + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0}, | ||
| 69 | + $td->NORMALIZE_NEWLINES); | ||
| 70 | + | ||
| 32 | cleanup(); | 71 | cleanup(); |
| 33 | $td->report($n_tests); | 72 | $td->report($n_tests); |
qpdf/qtest/qpdf/set-page-labels.json
0 → 100644