Commit 7cac433e2d9256fb8361aa53d136d4eaacfda6fe

Authored by Jay Berkenbilt
1 parent 543038c5

Implement --set-page-labels and supporting API (fixes #939)

ChangeLog
  1 +2024-01-05 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add --set-page-labels command-line argument and supporting API.
  4 + Fixes #939.
  5 + - QPDFJob::Config::setPageLabels
  6 + - pdf_page_label_e enumerated type
  7 + - QPDFPageLabelDocumentHelper::pageLabelDict
  8 +
1 9 2024-01-01 Jay Berkenbilt <ejb@ql.org>
2 10  
3 11 * Support comma-separated numeric values with --collate to select
... ...
include/qpdf/Constants.h
... ... @@ -232,4 +232,14 @@ enum pdf_annotation_flag_e {
232 232 /* Encryption/password status for QPDFJob */
233 233 enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 };
234 234  
  235 +/* Page label types */
  236 +enum qpdf_page_label_e {
  237 + pl_none,
  238 + pl_digits,
  239 + pl_alpha_lower,
  240 + pl_alpha_upper,
  241 + pl_roman_lower,
  242 + pl_roman_upper,
  243 +};
  244 +
235 245 #endif /* QPDFCONSTANTS_H */
... ...
include/qpdf/QPDFJob.hh
... ... @@ -296,7 +296,8 @@ class QPDFJob
296 296 Config* config;
297 297 };
298 298  
299   - class PageLabelsConfig {
  299 + class PageLabelsConfig
  300 + {
300 301 friend class QPDFJob;
301 302 friend class Config;
302 303  
... ... @@ -458,6 +459,22 @@ class QPDFJob
458 459 std::vector<int> repeat_pagenos;
459 460 };
460 461  
  462 + struct PageLabelSpec
  463 + {
  464 + PageLabelSpec(
  465 + int first_page, qpdf_page_label_e label_type, int start_num, std::string_view prefix) :
  466 + first_page(first_page),
  467 + label_type(label_type),
  468 + start_num(start_num),
  469 + prefix(prefix)
  470 + {
  471 + }
  472 + int first_page;
  473 + qpdf_page_label_e label_type;
  474 + int start_num{1};
  475 + std::string prefix;
  476 + };
  477 +
461 478 // Helper functions
462 479 static void usage(std::string const& msg);
463 480 static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr);
... ... @@ -694,7 +711,7 @@ class QPDFJob
694 711 bool json_output{false};
695 712 std::string update_from_json;
696 713 bool report_mem_usage{false};
697   - std::vector<std::string> page_label_specs;
  714 + std::vector<PageLabelSpec> page_label_specs;
698 715 };
699 716 std::shared_ptr<Members> m;
700 717 };
... ...
include/qpdf/QPDFPageLabelDocumentHelper.hh
... ... @@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper
49 49 QPDF_DLL
50 50 bool hasPageLabels();
51 51  
  52 + // Helper function to create a dictionary suitable for adding to the /PageLabels numbers tree.
  53 + QPDF_DLL
  54 + static QPDFObjectHandle
  55 + pageLabelDict(qpdf_page_label_e label_type, int start_num, std::string_view prefix);
  56 +
52 57 // Return a page label dictionary representing the page label for the given page. The page does
53 58 // not need to appear explicitly in the page label dictionary. This method will adjust /St as
54 59 // needed to produce a label that is suitable for the page.
... ...
libqpdf/QPDFJob.cc
... ... @@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF&amp; pdf)
2172 2172 if (m->remove_page_labels) {
2173 2173 pdf.getRoot().removeKey("/PageLabels");
2174 2174 }
  2175 + if (!m->page_label_specs.empty()) {
  2176 + auto nums = QPDFObjectHandle::newArray();
  2177 + auto n_pages = QIntC::to_int(dh.getAllPages().size());
  2178 + int last_page_seen{0};
  2179 + for (auto& spec: m->page_label_specs) {
  2180 + if (spec.first_page < 0) {
  2181 + spec.first_page = n_pages + 1 + spec.first_page;
  2182 + }
  2183 + if (last_page_seen == 0) {
  2184 + if (spec.first_page != 1) {
  2185 + throw std::runtime_error(
  2186 + "the first page label specification must start with page 1");
  2187 + }
  2188 + } else if (spec.first_page <= last_page_seen) {
  2189 + throw std::runtime_error(
  2190 + "page label specifications must be in order by first page");
  2191 + }
  2192 + if (spec.first_page > n_pages) {
  2193 + throw std::runtime_error(
  2194 + "page label spec: page " + std::to_string(spec.first_page) +
  2195 + " is more than the total number of pages (" + std::to_string(n_pages) + ")");
  2196 + }
  2197 + last_page_seen = spec.first_page;
  2198 + nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1));
  2199 + nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict(
  2200 + spec.label_type, spec.start_num, spec.prefix));
  2201 + }
  2202 + auto page_labels = QPDFObjectHandle::newDictionary();
  2203 + page_labels.replaceKey("/Nums", nums);
  2204 + pdf.getRoot().replaceKey("/PageLabels", page_labels);
  2205 + }
2175 2206 if (!m->attachments_to_remove.empty()) {
2176 2207 QPDFEmbeddedFileDocumentHelper efdh(pdf);
2177 2208 for (auto const& key: m->attachments_to_remove) {
... ... @@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3019 3050 try {
3020 3051 QUtil::remove_file(backup.c_str());
3021 3052 } catch (QPDFSystemError& e) {
3022   - *m->log->getError()
3023   - << m->message_prefix << ": unable to delete original file (" << e.what() << ");"
3024   - << " original file left in " << backup
3025   - << ", but the input was successfully replaced\n";
  3053 + *m->log->getError() << m->message_prefix << ": unable to delete original file ("
  3054 + << e.what() << ");" << " original file left in " << backup
  3055 + << ", but the input was successfully replaced\n";
3026 3056 }
3027 3057 }
3028 3058 }
... ...
libqpdf/QPDFJob_config.cc
1 1 #include <qpdf/QPDFJob.hh>
2 2  
  3 +#include <regex>
  4 +
3 5 #include <qpdf/QPDFLogger.hh>
4 6 #include <qpdf/QTC.hh>
5 7 #include <qpdf/QUtil.hh>
... ... @@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt(
1062 1064 QPDFJob::Config*
1063 1065 QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs)
1064 1066 {
1065   - // XXX validate
1066   - for (auto const& xxx: specs) {
1067   - std::cout << "XXX config: spec: " << xxx << std::endl;
  1067 + static std::regex page_label_re(R"(^(z|r?\d+):([DaArR])?(?:/(\d+)?(?:/(.+)?)?)?$)");
  1068 + o.m->page_label_specs.clear();
  1069 + for (auto const& spec: specs) {
  1070 + std::smatch match;
  1071 + if (!std::regex_match(spec, match, page_label_re)) {
  1072 + usage("page label spec must be n:[D|a|A|r|R][/start[/prefix]]");
  1073 + }
  1074 + auto first_page_str = match[1].str();
  1075 + int first_page;
  1076 + if (first_page_str == "z") {
  1077 + first_page = -1;
  1078 + } else if (first_page_str.at(0) == 'r') {
  1079 + first_page = -QUtil::string_to_int(first_page_str.substr(1).c_str());
  1080 + } else {
  1081 + first_page = QUtil::string_to_int(first_page_str.c_str());
  1082 + }
  1083 + auto label_type_ch = match[2].matched ? match[2].str().at(0) : '\0';
  1084 + qpdf_page_label_e label_type;
  1085 + switch (label_type_ch) {
  1086 + case 'D':
  1087 + label_type = pl_digits;
  1088 + break;
  1089 + case 'a':
  1090 + label_type = pl_alpha_lower;
  1091 + break;
  1092 + case 'A':
  1093 + label_type = pl_alpha_upper;
  1094 + break;
  1095 + case 'r':
  1096 + label_type = pl_roman_lower;
  1097 + break;
  1098 + case 'R':
  1099 + label_type = pl_roman_upper;
  1100 + break;
  1101 + default:
  1102 + label_type = pl_none;
  1103 + }
  1104 +
  1105 + auto start_num = match[3].matched ? QUtil::string_to_int(match[3].str().c_str()) : 1;
  1106 + auto prefix = match[4].matched ? match[4].str() : "";
  1107 + // We can't check ordering until we know how many pages there are, so that is delayed until
  1108 + // near the end.
  1109 + o.m->page_label_specs.emplace_back(first_page, label_type, start_num, prefix);
1068 1110 }
1069   - o.m->page_label_specs = specs;
1070 1111 return this;
1071 1112 }
1072 1113  
... ...
libqpdf/QPDFPageLabelDocumentHelper.cc
... ... @@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange(
99 99 }
100 100 }
101 101 }
  102 +
  103 +QPDFObjectHandle
  104 +QPDFPageLabelDocumentHelper::pageLabelDict(
  105 + qpdf_page_label_e label_type, int start_num, std::string_view prefix)
  106 +{
  107 + auto num = "<< /Type /PageLabel >>"_qpdf;
  108 + switch (label_type) {
  109 + case pl_none:
  110 + break;
  111 + case pl_digits:
  112 + num.replaceKey("/S", "/D"_qpdf);
  113 + break;
  114 + case pl_alpha_lower:
  115 + num.replaceKey("/S", "/a"_qpdf);
  116 + break;
  117 + case pl_alpha_upper:
  118 + num.replaceKey("/S", "/A"_qpdf);
  119 + break;
  120 + case pl_roman_lower:
  121 + num.replaceKey("/S", "/r"_qpdf);
  122 + break;
  123 + case pl_roman_upper:
  124 + num.replaceKey("/S", "/R"_qpdf);
  125 + break;
  126 + }
  127 + if (!prefix.empty()) {
  128 + num.replaceKey("/P", QPDFObjectHandle::newUnicodeString(std::string(prefix)));
  129 + }
  130 + if (start_num != 1) {
  131 + num.replaceKey("/St", QPDFObjectHandle::newInteger(start_num));
  132 + }
  133 + return num;
  134 +}
... ...
manual/release-notes.rst
... ... @@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change):
38 38  
39 39 .. x.y.z: not yet released
40 40  
41   -11.7.1: not yet released
  41 +11.8.0: not yet released
42 42 - Bug fixes:
43 43  
44 44 - When flattening annotations, preserve hyperlinks and other
... ... @@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change):
54 54 :qpdf:ref:`--collate` to select different numbers of pages from
55 55 different groups.
56 56  
  57 + - Add :qpdf:ref:`--set-page-labels` option to completely override
  58 + page labels in the output.
  59 +
  60 + - Library Enhancements
  61 +
  62 + - Add API to support :qpdf:ref:`--set-page-labels`:
  63 +
  64 + - ``QPDFJob::Config::setPageLabels``
  65 +
  66 + - ``pdf_page_label_e`` enumerated type
  67 +
  68 + - ``QPDFPageLabelDocumentHelper::pageLabelDict``
  69 +
57 70 11.7.0: December 24, 2023
58 71 - Bug fixes:
59 72  
... ...
qpdf/qtest/page-labels.test
... ... @@ -29,5 +29,44 @@ $td-&gt;runtest(&quot;no page labels&quot;,
29 29 {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0},
30 30 $td->NORMALIZE_NEWLINES);
31 31  
  32 +# --set-page-labels
  33 +my @errors = (
  34 + ["quack", ".*page label spec must be.*"],
  35 + ["5:r 10:D", ".*the first page .*must start with page 1.*"],
  36 + ["1:r 10:D 31:A",
  37 + ".*page 31 is more than the total number of pages \\(30\\).*"],
  38 +);
  39 +$n_tests += scalar(@errors);
  40 +
  41 +foreach my $d (@errors)
  42 +{
  43 + my ($specs, $err) = @$d;
  44 + $td->runtest("error ($specs)",
  45 + {$td->COMMAND => "qpdf --set-page-labels $specs --" .
  46 + " page-labels-num-tree.pdf a.pdf"},
  47 + {$td->REGEXP => $err, $td->EXIT_STATUS => 2},
  48 + $td->NORMALIZE_NEWLINES);
  49 +}
  50 +
  51 +$n_tests += 4;
  52 +$td->runtest("set page labels",
  53 + {$td->COMMAND => "qpdf page-labels-num-tree.pdf a.pdf" .
  54 + " --set-page-labels 1:a 3:R/2 6:r//Z- 8:A/17 r10:D/3 28: z://end --"},
  55 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  56 + $td->NORMALIZE_NEWLINES);
  57 +$td->runtest("after set page labels",
  58 + {$td->COMMAND => "test_driver 47 a.pdf"},
  59 + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0},
  60 + $td->NORMALIZE_NEWLINES);
  61 +$td->runtest("set page labels (json)",
  62 + {$td->COMMAND => "qpdf page-labels-num-tree.pdf b.pdf" .
  63 + " --job-json-file=set-page-labels.json"},
  64 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  65 + $td->NORMALIZE_NEWLINES);
  66 +$td->runtest("after set page labels",
  67 + {$td->COMMAND => "test_driver 47 b.pdf"},
  68 + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0},
  69 + $td->NORMALIZE_NEWLINES);
  70 +
32 71 cleanup();
33 72 $td->report($n_tests);
... ...
qpdf/qtest/qpdf/set-page-labels.json 0 → 100644
  1 +{
  2 + "setPageLabels": [
  3 + "1:a",
  4 + "3:R/2",
  5 + "6:r//Z-",
  6 + "8:A/17",
  7 + "r10:D/3",
  8 + "28:",
  9 + "z://end"
  10 + ]
  11 +}
... ...
qpdf/qtest/qpdf/set-page-labels.out 0 → 100644
  1 +1 << /S /a /St 1 >>
  2 +3 << /S /R /St 2 >>
  3 +6 << /P (Z-) /S /r /St 1 >>
  4 +8 << /S /A /St 17 >>
  5 +21 << /S /D /St 3 >>
  6 +28 << /St 1 >>
  7 +30 << /P (end) /St 1 >>
  8 +test 47 done
... ...