Commit 7cac433e2d9256fb8361aa53d136d4eaacfda6fe
1 parent
543038c5
Implement --set-page-labels and supporting API (fixes #939)
Showing
11 changed files
with
226 additions
and
11 deletions
ChangeLog
| 1 | +2024-01-05 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add --set-page-labels command-line argument and supporting API. | |
| 4 | + Fixes #939. | |
| 5 | + - QPDFJob::Config::setPageLabels | |
| 6 | + - pdf_page_label_e enumerated type | |
| 7 | + - QPDFPageLabelDocumentHelper::pageLabelDict | |
| 8 | + | |
| 1 | 9 | 2024-01-01 Jay Berkenbilt <ejb@ql.org> |
| 2 | 10 | |
| 3 | 11 | * Support comma-separated numeric values with --collate to select | ... | ... |
include/qpdf/Constants.h
| ... | ... | @@ -232,4 +232,14 @@ enum pdf_annotation_flag_e { |
| 232 | 232 | /* Encryption/password status for QPDFJob */ |
| 233 | 233 | enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 }; |
| 234 | 234 | |
| 235 | +/* Page label types */ | |
| 236 | +enum qpdf_page_label_e { | |
| 237 | + pl_none, | |
| 238 | + pl_digits, | |
| 239 | + pl_alpha_lower, | |
| 240 | + pl_alpha_upper, | |
| 241 | + pl_roman_lower, | |
| 242 | + pl_roman_upper, | |
| 243 | +}; | |
| 244 | + | |
| 235 | 245 | #endif /* QPDFCONSTANTS_H */ | ... | ... |
include/qpdf/QPDFJob.hh
| ... | ... | @@ -296,7 +296,8 @@ class QPDFJob |
| 296 | 296 | Config* config; |
| 297 | 297 | }; |
| 298 | 298 | |
| 299 | - class PageLabelsConfig { | |
| 299 | + class PageLabelsConfig | |
| 300 | + { | |
| 300 | 301 | friend class QPDFJob; |
| 301 | 302 | friend class Config; |
| 302 | 303 | |
| ... | ... | @@ -458,6 +459,22 @@ class QPDFJob |
| 458 | 459 | std::vector<int> repeat_pagenos; |
| 459 | 460 | }; |
| 460 | 461 | |
| 462 | + struct PageLabelSpec | |
| 463 | + { | |
| 464 | + PageLabelSpec( | |
| 465 | + int first_page, qpdf_page_label_e label_type, int start_num, std::string_view prefix) : | |
| 466 | + first_page(first_page), | |
| 467 | + label_type(label_type), | |
| 468 | + start_num(start_num), | |
| 469 | + prefix(prefix) | |
| 470 | + { | |
| 471 | + } | |
| 472 | + int first_page; | |
| 473 | + qpdf_page_label_e label_type; | |
| 474 | + int start_num{1}; | |
| 475 | + std::string prefix; | |
| 476 | + }; | |
| 477 | + | |
| 461 | 478 | // Helper functions |
| 462 | 479 | static void usage(std::string const& msg); |
| 463 | 480 | static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr); |
| ... | ... | @@ -694,7 +711,7 @@ class QPDFJob |
| 694 | 711 | bool json_output{false}; |
| 695 | 712 | std::string update_from_json; |
| 696 | 713 | bool report_mem_usage{false}; |
| 697 | - std::vector<std::string> page_label_specs; | |
| 714 | + std::vector<PageLabelSpec> page_label_specs; | |
| 698 | 715 | }; |
| 699 | 716 | std::shared_ptr<Members> m; |
| 700 | 717 | }; | ... | ... |
include/qpdf/QPDFPageLabelDocumentHelper.hh
| ... | ... | @@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper |
| 49 | 49 | QPDF_DLL |
| 50 | 50 | bool hasPageLabels(); |
| 51 | 51 | |
| 52 | + // Helper function to create a dictionary suitable for adding to the /PageLabels numbers tree. | |
| 53 | + QPDF_DLL | |
| 54 | + static QPDFObjectHandle | |
| 55 | + pageLabelDict(qpdf_page_label_e label_type, int start_num, std::string_view prefix); | |
| 56 | + | |
| 52 | 57 | // Return a page label dictionary representing the page label for the given page. The page does |
| 53 | 58 | // not need to appear explicitly in the page label dictionary. This method will adjust /St as |
| 54 | 59 | // needed to produce a label that is suitable for the page. | ... | ... |
libqpdf/QPDFJob.cc
| ... | ... | @@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF& pdf) |
| 2172 | 2172 | if (m->remove_page_labels) { |
| 2173 | 2173 | pdf.getRoot().removeKey("/PageLabels"); |
| 2174 | 2174 | } |
| 2175 | + if (!m->page_label_specs.empty()) { | |
| 2176 | + auto nums = QPDFObjectHandle::newArray(); | |
| 2177 | + auto n_pages = QIntC::to_int(dh.getAllPages().size()); | |
| 2178 | + int last_page_seen{0}; | |
| 2179 | + for (auto& spec: m->page_label_specs) { | |
| 2180 | + if (spec.first_page < 0) { | |
| 2181 | + spec.first_page = n_pages + 1 + spec.first_page; | |
| 2182 | + } | |
| 2183 | + if (last_page_seen == 0) { | |
| 2184 | + if (spec.first_page != 1) { | |
| 2185 | + throw std::runtime_error( | |
| 2186 | + "the first page label specification must start with page 1"); | |
| 2187 | + } | |
| 2188 | + } else if (spec.first_page <= last_page_seen) { | |
| 2189 | + throw std::runtime_error( | |
| 2190 | + "page label specifications must be in order by first page"); | |
| 2191 | + } | |
| 2192 | + if (spec.first_page > n_pages) { | |
| 2193 | + throw std::runtime_error( | |
| 2194 | + "page label spec: page " + std::to_string(spec.first_page) + | |
| 2195 | + " is more than the total number of pages (" + std::to_string(n_pages) + ")"); | |
| 2196 | + } | |
| 2197 | + last_page_seen = spec.first_page; | |
| 2198 | + nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1)); | |
| 2199 | + nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict( | |
| 2200 | + spec.label_type, spec.start_num, spec.prefix)); | |
| 2201 | + } | |
| 2202 | + auto page_labels = QPDFObjectHandle::newDictionary(); | |
| 2203 | + page_labels.replaceKey("/Nums", nums); | |
| 2204 | + pdf.getRoot().replaceKey("/PageLabels", page_labels); | |
| 2205 | + } | |
| 2175 | 2206 | if (!m->attachments_to_remove.empty()) { |
| 2176 | 2207 | QPDFEmbeddedFileDocumentHelper efdh(pdf); |
| 2177 | 2208 | for (auto const& key: m->attachments_to_remove) { |
| ... | ... | @@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF& pdf) |
| 3019 | 3050 | try { |
| 3020 | 3051 | QUtil::remove_file(backup.c_str()); |
| 3021 | 3052 | } catch (QPDFSystemError& e) { |
| 3022 | - *m->log->getError() | |
| 3023 | - << m->message_prefix << ": unable to delete original file (" << e.what() << ");" | |
| 3024 | - << " original file left in " << backup | |
| 3025 | - << ", but the input was successfully replaced\n"; | |
| 3053 | + *m->log->getError() << m->message_prefix << ": unable to delete original file (" | |
| 3054 | + << e.what() << ");" << " original file left in " << backup | |
| 3055 | + << ", but the input was successfully replaced\n"; | |
| 3026 | 3056 | } |
| 3027 | 3057 | } |
| 3028 | 3058 | } | ... | ... |
libqpdf/QPDFJob_config.cc
| 1 | 1 | #include <qpdf/QPDFJob.hh> |
| 2 | 2 | |
| 3 | +#include <regex> | |
| 4 | + | |
| 3 | 5 | #include <qpdf/QPDFLogger.hh> |
| 4 | 6 | #include <qpdf/QTC.hh> |
| 5 | 7 | #include <qpdf/QUtil.hh> |
| ... | ... | @@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt( |
| 1062 | 1064 | QPDFJob::Config* |
| 1063 | 1065 | QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs) |
| 1064 | 1066 | { |
| 1065 | - // XXX validate | |
| 1066 | - for (auto const& xxx: specs) { | |
| 1067 | - std::cout << "XXX config: spec: " << xxx << std::endl; | |
| 1067 | + static std::regex page_label_re(R"(^(z|r?\d+):([DaArR])?(?:/(\d+)?(?:/(.+)?)?)?$)"); | |
| 1068 | + o.m->page_label_specs.clear(); | |
| 1069 | + for (auto const& spec: specs) { | |
| 1070 | + std::smatch match; | |
| 1071 | + if (!std::regex_match(spec, match, page_label_re)) { | |
| 1072 | + usage("page label spec must be n:[D|a|A|r|R][/start[/prefix]]"); | |
| 1073 | + } | |
| 1074 | + auto first_page_str = match[1].str(); | |
| 1075 | + int first_page; | |
| 1076 | + if (first_page_str == "z") { | |
| 1077 | + first_page = -1; | |
| 1078 | + } else if (first_page_str.at(0) == 'r') { | |
| 1079 | + first_page = -QUtil::string_to_int(first_page_str.substr(1).c_str()); | |
| 1080 | + } else { | |
| 1081 | + first_page = QUtil::string_to_int(first_page_str.c_str()); | |
| 1082 | + } | |
| 1083 | + auto label_type_ch = match[2].matched ? match[2].str().at(0) : '\0'; | |
| 1084 | + qpdf_page_label_e label_type; | |
| 1085 | + switch (label_type_ch) { | |
| 1086 | + case 'D': | |
| 1087 | + label_type = pl_digits; | |
| 1088 | + break; | |
| 1089 | + case 'a': | |
| 1090 | + label_type = pl_alpha_lower; | |
| 1091 | + break; | |
| 1092 | + case 'A': | |
| 1093 | + label_type = pl_alpha_upper; | |
| 1094 | + break; | |
| 1095 | + case 'r': | |
| 1096 | + label_type = pl_roman_lower; | |
| 1097 | + break; | |
| 1098 | + case 'R': | |
| 1099 | + label_type = pl_roman_upper; | |
| 1100 | + break; | |
| 1101 | + default: | |
| 1102 | + label_type = pl_none; | |
| 1103 | + } | |
| 1104 | + | |
| 1105 | + auto start_num = match[3].matched ? QUtil::string_to_int(match[3].str().c_str()) : 1; | |
| 1106 | + auto prefix = match[4].matched ? match[4].str() : ""; | |
| 1107 | + // We can't check ordering until we know how many pages there are, so that is delayed until | |
| 1108 | + // near the end. | |
| 1109 | + o.m->page_label_specs.emplace_back(first_page, label_type, start_num, prefix); | |
| 1068 | 1110 | } |
| 1069 | - o.m->page_label_specs = specs; | |
| 1070 | 1111 | return this; |
| 1071 | 1112 | } |
| 1072 | 1113 | ... | ... |
libqpdf/QPDFPageLabelDocumentHelper.cc
| ... | ... | @@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange( |
| 99 | 99 | } |
| 100 | 100 | } |
| 101 | 101 | } |
| 102 | + | |
| 103 | +QPDFObjectHandle | |
| 104 | +QPDFPageLabelDocumentHelper::pageLabelDict( | |
| 105 | + qpdf_page_label_e label_type, int start_num, std::string_view prefix) | |
| 106 | +{ | |
| 107 | + auto num = "<< /Type /PageLabel >>"_qpdf; | |
| 108 | + switch (label_type) { | |
| 109 | + case pl_none: | |
| 110 | + break; | |
| 111 | + case pl_digits: | |
| 112 | + num.replaceKey("/S", "/D"_qpdf); | |
| 113 | + break; | |
| 114 | + case pl_alpha_lower: | |
| 115 | + num.replaceKey("/S", "/a"_qpdf); | |
| 116 | + break; | |
| 117 | + case pl_alpha_upper: | |
| 118 | + num.replaceKey("/S", "/A"_qpdf); | |
| 119 | + break; | |
| 120 | + case pl_roman_lower: | |
| 121 | + num.replaceKey("/S", "/r"_qpdf); | |
| 122 | + break; | |
| 123 | + case pl_roman_upper: | |
| 124 | + num.replaceKey("/S", "/R"_qpdf); | |
| 125 | + break; | |
| 126 | + } | |
| 127 | + if (!prefix.empty()) { | |
| 128 | + num.replaceKey("/P", QPDFObjectHandle::newUnicodeString(std::string(prefix))); | |
| 129 | + } | |
| 130 | + if (start_num != 1) { | |
| 131 | + num.replaceKey("/St", QPDFObjectHandle::newInteger(start_num)); | |
| 132 | + } | |
| 133 | + return num; | |
| 134 | +} | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change): |
| 38 | 38 | |
| 39 | 39 | .. x.y.z: not yet released |
| 40 | 40 | |
| 41 | -11.7.1: not yet released | |
| 41 | +11.8.0: not yet released | |
| 42 | 42 | - Bug fixes: |
| 43 | 43 | |
| 44 | 44 | - When flattening annotations, preserve hyperlinks and other |
| ... | ... | @@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change): |
| 54 | 54 | :qpdf:ref:`--collate` to select different numbers of pages from |
| 55 | 55 | different groups. |
| 56 | 56 | |
| 57 | + - Add :qpdf:ref:`--set-page-labels` option to completely override | |
| 58 | + page labels in the output. | |
| 59 | + | |
| 60 | + - Library Enhancements | |
| 61 | + | |
| 62 | + - Add API to support :qpdf:ref:`--set-page-labels`: | |
| 63 | + | |
| 64 | + - ``QPDFJob::Config::setPageLabels`` | |
| 65 | + | |
| 66 | + - ``pdf_page_label_e`` enumerated type | |
| 67 | + | |
| 68 | + - ``QPDFPageLabelDocumentHelper::pageLabelDict`` | |
| 69 | + | |
| 57 | 70 | 11.7.0: December 24, 2023 |
| 58 | 71 | - Bug fixes: |
| 59 | 72 | ... | ... |
qpdf/qtest/page-labels.test
| ... | ... | @@ -29,5 +29,44 @@ $td->runtest("no page labels", |
| 29 | 29 | {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0}, |
| 30 | 30 | $td->NORMALIZE_NEWLINES); |
| 31 | 31 | |
| 32 | +# --set-page-labels | |
| 33 | +my @errors = ( | |
| 34 | + ["quack", ".*page label spec must be.*"], | |
| 35 | + ["5:r 10:D", ".*the first page .*must start with page 1.*"], | |
| 36 | + ["1:r 10:D 31:A", | |
| 37 | + ".*page 31 is more than the total number of pages \\(30\\).*"], | |
| 38 | +); | |
| 39 | +$n_tests += scalar(@errors); | |
| 40 | + | |
| 41 | +foreach my $d (@errors) | |
| 42 | +{ | |
| 43 | + my ($specs, $err) = @$d; | |
| 44 | + $td->runtest("error ($specs)", | |
| 45 | + {$td->COMMAND => "qpdf --set-page-labels $specs --" . | |
| 46 | + " page-labels-num-tree.pdf a.pdf"}, | |
| 47 | + {$td->REGEXP => $err, $td->EXIT_STATUS => 2}, | |
| 48 | + $td->NORMALIZE_NEWLINES); | |
| 49 | +} | |
| 50 | + | |
| 51 | +$n_tests += 4; | |
| 52 | +$td->runtest("set page labels", | |
| 53 | + {$td->COMMAND => "qpdf page-labels-num-tree.pdf a.pdf" . | |
| 54 | + " --set-page-labels 1:a 3:R/2 6:r//Z- 8:A/17 r10:D/3 28: z://end --"}, | |
| 55 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | |
| 56 | + $td->NORMALIZE_NEWLINES); | |
| 57 | +$td->runtest("after set page labels", | |
| 58 | + {$td->COMMAND => "test_driver 47 a.pdf"}, | |
| 59 | + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0}, | |
| 60 | + $td->NORMALIZE_NEWLINES); | |
| 61 | +$td->runtest("set page labels (json)", | |
| 62 | + {$td->COMMAND => "qpdf page-labels-num-tree.pdf b.pdf" . | |
| 63 | + " --job-json-file=set-page-labels.json"}, | |
| 64 | + {$td->STRING => "", $td->EXIT_STATUS => 0}, | |
| 65 | + $td->NORMALIZE_NEWLINES); | |
| 66 | +$td->runtest("after set page labels", | |
| 67 | + {$td->COMMAND => "test_driver 47 b.pdf"}, | |
| 68 | + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0}, | |
| 69 | + $td->NORMALIZE_NEWLINES); | |
| 70 | + | |
| 32 | 71 | cleanup(); |
| 33 | 72 | $td->report($n_tests); | ... | ... |
qpdf/qtest/qpdf/set-page-labels.json
0 → 100644