Commit 7cac433e2d9256fb8361aa53d136d4eaacfda6fe

Authored by Jay Berkenbilt
1 parent 543038c5

Implement --set-page-labels and supporting API (fixes #939)

ChangeLog
  1 +2024-01-05 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add --set-page-labels command-line argument and supporting API.
  4 + Fixes #939.
  5 + - QPDFJob::Config::setPageLabels
  6 + - pdf_page_label_e enumerated type
  7 + - QPDFPageLabelDocumentHelper::pageLabelDict
  8 +
1 2024-01-01 Jay Berkenbilt <ejb@ql.org> 9 2024-01-01 Jay Berkenbilt <ejb@ql.org>
2 10
3 * Support comma-separated numeric values with --collate to select 11 * Support comma-separated numeric values with --collate to select
include/qpdf/Constants.h
@@ -232,4 +232,14 @@ enum pdf_annotation_flag_e { @@ -232,4 +232,14 @@ enum pdf_annotation_flag_e {
232 /* Encryption/password status for QPDFJob */ 232 /* Encryption/password status for QPDFJob */
233 enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 }; 233 enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 };
234 234
  235 +/* Page label types */
  236 +enum qpdf_page_label_e {
  237 + pl_none,
  238 + pl_digits,
  239 + pl_alpha_lower,
  240 + pl_alpha_upper,
  241 + pl_roman_lower,
  242 + pl_roman_upper,
  243 +};
  244 +
235 #endif /* QPDFCONSTANTS_H */ 245 #endif /* QPDFCONSTANTS_H */
include/qpdf/QPDFJob.hh
@@ -296,7 +296,8 @@ class QPDFJob @@ -296,7 +296,8 @@ class QPDFJob
296 Config* config; 296 Config* config;
297 }; 297 };
298 298
299 - class PageLabelsConfig { 299 + class PageLabelsConfig
  300 + {
300 friend class QPDFJob; 301 friend class QPDFJob;
301 friend class Config; 302 friend class Config;
302 303
@@ -458,6 +459,22 @@ class QPDFJob @@ -458,6 +459,22 @@ class QPDFJob
458 std::vector<int> repeat_pagenos; 459 std::vector<int> repeat_pagenos;
459 }; 460 };
460 461
  462 + struct PageLabelSpec
  463 + {
  464 + PageLabelSpec(
  465 + int first_page, qpdf_page_label_e label_type, int start_num, std::string_view prefix) :
  466 + first_page(first_page),
  467 + label_type(label_type),
  468 + start_num(start_num),
  469 + prefix(prefix)
  470 + {
  471 + }
  472 + int first_page;
  473 + qpdf_page_label_e label_type;
  474 + int start_num{1};
  475 + std::string prefix;
  476 + };
  477 +
461 // Helper functions 478 // Helper functions
462 static void usage(std::string const& msg); 479 static void usage(std::string const& msg);
463 static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr); 480 static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr);
@@ -694,7 +711,7 @@ class QPDFJob @@ -694,7 +711,7 @@ class QPDFJob
694 bool json_output{false}; 711 bool json_output{false};
695 std::string update_from_json; 712 std::string update_from_json;
696 bool report_mem_usage{false}; 713 bool report_mem_usage{false};
697 - std::vector<std::string> page_label_specs; 714 + std::vector<PageLabelSpec> page_label_specs;
698 }; 715 };
699 std::shared_ptr<Members> m; 716 std::shared_ptr<Members> m;
700 }; 717 };
include/qpdf/QPDFPageLabelDocumentHelper.hh
@@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper @@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper
49 QPDF_DLL 49 QPDF_DLL
50 bool hasPageLabels(); 50 bool hasPageLabels();
51 51
  52 + // Helper function to create a dictionary suitable for adding to the /PageLabels numbers tree.
  53 + QPDF_DLL
  54 + static QPDFObjectHandle
  55 + pageLabelDict(qpdf_page_label_e label_type, int start_num, std::string_view prefix);
  56 +
52 // Return a page label dictionary representing the page label for the given page. The page does 57 // Return a page label dictionary representing the page label for the given page. The page does
53 // not need to appear explicitly in the page label dictionary. This method will adjust /St as 58 // not need to appear explicitly in the page label dictionary. This method will adjust /St as
54 // needed to produce a label that is suitable for the page. 59 // needed to produce a label that is suitable for the page.
libqpdf/QPDFJob.cc
@@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF&amp; pdf) @@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF&amp; pdf)
2172 if (m->remove_page_labels) { 2172 if (m->remove_page_labels) {
2173 pdf.getRoot().removeKey("/PageLabels"); 2173 pdf.getRoot().removeKey("/PageLabels");
2174 } 2174 }
  2175 + if (!m->page_label_specs.empty()) {
  2176 + auto nums = QPDFObjectHandle::newArray();
  2177 + auto n_pages = QIntC::to_int(dh.getAllPages().size());
  2178 + int last_page_seen{0};
  2179 + for (auto& spec: m->page_label_specs) {
  2180 + if (spec.first_page < 0) {
  2181 + spec.first_page = n_pages + 1 + spec.first_page;
  2182 + }
  2183 + if (last_page_seen == 0) {
  2184 + if (spec.first_page != 1) {
  2185 + throw std::runtime_error(
  2186 + "the first page label specification must start with page 1");
  2187 + }
  2188 + } else if (spec.first_page <= last_page_seen) {
  2189 + throw std::runtime_error(
  2190 + "page label specifications must be in order by first page");
  2191 + }
  2192 + if (spec.first_page > n_pages) {
  2193 + throw std::runtime_error(
  2194 + "page label spec: page " + std::to_string(spec.first_page) +
  2195 + " is more than the total number of pages (" + std::to_string(n_pages) + ")");
  2196 + }
  2197 + last_page_seen = spec.first_page;
  2198 + nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1));
  2199 + nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict(
  2200 + spec.label_type, spec.start_num, spec.prefix));
  2201 + }
  2202 + auto page_labels = QPDFObjectHandle::newDictionary();
  2203 + page_labels.replaceKey("/Nums", nums);
  2204 + pdf.getRoot().replaceKey("/PageLabels", page_labels);
  2205 + }
2175 if (!m->attachments_to_remove.empty()) { 2206 if (!m->attachments_to_remove.empty()) {
2176 QPDFEmbeddedFileDocumentHelper efdh(pdf); 2207 QPDFEmbeddedFileDocumentHelper efdh(pdf);
2177 for (auto const& key: m->attachments_to_remove) { 2208 for (auto const& key: m->attachments_to_remove) {
@@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF&amp; pdf) @@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3019 try { 3050 try {
3020 QUtil::remove_file(backup.c_str()); 3051 QUtil::remove_file(backup.c_str());
3021 } catch (QPDFSystemError& e) { 3052 } catch (QPDFSystemError& e) {
3022 - *m->log->getError()  
3023 - << m->message_prefix << ": unable to delete original file (" << e.what() << ");"  
3024 - << " original file left in " << backup  
3025 - << ", but the input was successfully replaced\n"; 3053 + *m->log->getError() << m->message_prefix << ": unable to delete original file ("
  3054 + << e.what() << ");" << " original file left in " << backup
  3055 + << ", but the input was successfully replaced\n";
3026 } 3056 }
3027 } 3057 }
3028 } 3058 }
libqpdf/QPDFJob_config.cc
1 #include <qpdf/QPDFJob.hh> 1 #include <qpdf/QPDFJob.hh>
2 2
  3 +#include <regex>
  4 +
3 #include <qpdf/QPDFLogger.hh> 5 #include <qpdf/QPDFLogger.hh>
4 #include <qpdf/QTC.hh> 6 #include <qpdf/QTC.hh>
5 #include <qpdf/QUtil.hh> 7 #include <qpdf/QUtil.hh>
@@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt( @@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt(
1062 QPDFJob::Config* 1064 QPDFJob::Config*
1063 QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs) 1065 QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs)
1064 { 1066 {
1065 - // XXX validate  
1066 - for (auto const& xxx: specs) {  
1067 - std::cout << "XXX config: spec: " << xxx << std::endl; 1067 + static std::regex page_label_re(R"(^(z|r?\d+):([DaArR])?(?:/(\d+)?(?:/(.+)?)?)?$)");
  1068 + o.m->page_label_specs.clear();
  1069 + for (auto const& spec: specs) {
  1070 + std::smatch match;
  1071 + if (!std::regex_match(spec, match, page_label_re)) {
  1072 + usage("page label spec must be n:[D|a|A|r|R][/start[/prefix]]");
  1073 + }
  1074 + auto first_page_str = match[1].str();
  1075 + int first_page;
  1076 + if (first_page_str == "z") {
  1077 + first_page = -1;
  1078 + } else if (first_page_str.at(0) == 'r') {
  1079 + first_page = -QUtil::string_to_int(first_page_str.substr(1).c_str());
  1080 + } else {
  1081 + first_page = QUtil::string_to_int(first_page_str.c_str());
  1082 + }
  1083 + auto label_type_ch = match[2].matched ? match[2].str().at(0) : '\0';
  1084 + qpdf_page_label_e label_type;
  1085 + switch (label_type_ch) {
  1086 + case 'D':
  1087 + label_type = pl_digits;
  1088 + break;
  1089 + case 'a':
  1090 + label_type = pl_alpha_lower;
  1091 + break;
  1092 + case 'A':
  1093 + label_type = pl_alpha_upper;
  1094 + break;
  1095 + case 'r':
  1096 + label_type = pl_roman_lower;
  1097 + break;
  1098 + case 'R':
  1099 + label_type = pl_roman_upper;
  1100 + break;
  1101 + default:
  1102 + label_type = pl_none;
  1103 + }
  1104 +
  1105 + auto start_num = match[3].matched ? QUtil::string_to_int(match[3].str().c_str()) : 1;
  1106 + auto prefix = match[4].matched ? match[4].str() : "";
  1107 + // We can't check ordering until we know how many pages there are, so that is delayed until
  1108 + // near the end.
  1109 + o.m->page_label_specs.emplace_back(first_page, label_type, start_num, prefix);
1068 } 1110 }
1069 - o.m->page_label_specs = specs;  
1070 return this; 1111 return this;
1071 } 1112 }
1072 1113
libqpdf/QPDFPageLabelDocumentHelper.cc
@@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange( @@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange(
99 } 99 }
100 } 100 }
101 } 101 }
  102 +
  103 +QPDFObjectHandle
  104 +QPDFPageLabelDocumentHelper::pageLabelDict(
  105 + qpdf_page_label_e label_type, int start_num, std::string_view prefix)
  106 +{
  107 + auto num = "<< /Type /PageLabel >>"_qpdf;
  108 + switch (label_type) {
  109 + case pl_none:
  110 + break;
  111 + case pl_digits:
  112 + num.replaceKey("/S", "/D"_qpdf);
  113 + break;
  114 + case pl_alpha_lower:
  115 + num.replaceKey("/S", "/a"_qpdf);
  116 + break;
  117 + case pl_alpha_upper:
  118 + num.replaceKey("/S", "/A"_qpdf);
  119 + break;
  120 + case pl_roman_lower:
  121 + num.replaceKey("/S", "/r"_qpdf);
  122 + break;
  123 + case pl_roman_upper:
  124 + num.replaceKey("/S", "/R"_qpdf);
  125 + break;
  126 + }
  127 + if (!prefix.empty()) {
  128 + num.replaceKey("/P", QPDFObjectHandle::newUnicodeString(std::string(prefix)));
  129 + }
  130 + if (start_num != 1) {
  131 + num.replaceKey("/St", QPDFObjectHandle::newInteger(start_num));
  132 + }
  133 + return num;
  134 +}
manual/release-notes.rst
@@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change): @@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change):
38 38
39 .. x.y.z: not yet released 39 .. x.y.z: not yet released
40 40
41 -11.7.1: not yet released 41 +11.8.0: not yet released
42 - Bug fixes: 42 - Bug fixes:
43 43
44 - When flattening annotations, preserve hyperlinks and other 44 - When flattening annotations, preserve hyperlinks and other
@@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change): @@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change):
54 :qpdf:ref:`--collate` to select different numbers of pages from 54 :qpdf:ref:`--collate` to select different numbers of pages from
55 different groups. 55 different groups.
56 56
  57 + - Add :qpdf:ref:`--set-page-labels` option to completely override
  58 + page labels in the output.
  59 +
  60 + - Library Enhancements
  61 +
  62 + - Add API to support :qpdf:ref:`--set-page-labels`:
  63 +
  64 + - ``QPDFJob::Config::setPageLabels``
  65 +
  66 + - ``pdf_page_label_e`` enumerated type
  67 +
  68 + - ``QPDFPageLabelDocumentHelper::pageLabelDict``
  69 +
57 11.7.0: December 24, 2023 70 11.7.0: December 24, 2023
58 - Bug fixes: 71 - Bug fixes:
59 72
qpdf/qtest/page-labels.test
@@ -29,5 +29,44 @@ $td-&gt;runtest(&quot;no page labels&quot;, @@ -29,5 +29,44 @@ $td-&gt;runtest(&quot;no page labels&quot;,
29 {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0}, 29 {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0},
30 $td->NORMALIZE_NEWLINES); 30 $td->NORMALIZE_NEWLINES);
31 31
  32 +# --set-page-labels
  33 +my @errors = (
  34 + ["quack", ".*page label spec must be.*"],
  35 + ["5:r 10:D", ".*the first page .*must start with page 1.*"],
  36 + ["1:r 10:D 31:A",
  37 + ".*page 31 is more than the total number of pages \\(30\\).*"],
  38 +);
  39 +$n_tests += scalar(@errors);
  40 +
  41 +foreach my $d (@errors)
  42 +{
  43 + my ($specs, $err) = @$d;
  44 + $td->runtest("error ($specs)",
  45 + {$td->COMMAND => "qpdf --set-page-labels $specs --" .
  46 + " page-labels-num-tree.pdf a.pdf"},
  47 + {$td->REGEXP => $err, $td->EXIT_STATUS => 2},
  48 + $td->NORMALIZE_NEWLINES);
  49 +}
  50 +
  51 +$n_tests += 4;
  52 +$td->runtest("set page labels",
  53 + {$td->COMMAND => "qpdf page-labels-num-tree.pdf a.pdf" .
  54 + " --set-page-labels 1:a 3:R/2 6:r//Z- 8:A/17 r10:D/3 28: z://end --"},
  55 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  56 + $td->NORMALIZE_NEWLINES);
  57 +$td->runtest("after set page labels",
  58 + {$td->COMMAND => "test_driver 47 a.pdf"},
  59 + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0},
  60 + $td->NORMALIZE_NEWLINES);
  61 +$td->runtest("set page labels (json)",
  62 + {$td->COMMAND => "qpdf page-labels-num-tree.pdf b.pdf" .
  63 + " --job-json-file=set-page-labels.json"},
  64 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  65 + $td->NORMALIZE_NEWLINES);
  66 +$td->runtest("after set page labels",
  67 + {$td->COMMAND => "test_driver 47 b.pdf"},
  68 + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0},
  69 + $td->NORMALIZE_NEWLINES);
  70 +
32 cleanup(); 71 cleanup();
33 $td->report($n_tests); 72 $td->report($n_tests);
qpdf/qtest/qpdf/set-page-labels.json 0 → 100644
  1 +{
  2 + "setPageLabels": [
  3 + "1:a",
  4 + "3:R/2",
  5 + "6:r//Z-",
  6 + "8:A/17",
  7 + "r10:D/3",
  8 + "28:",
  9 + "z://end"
  10 + ]
  11 +}
qpdf/qtest/qpdf/set-page-labels.out 0 → 100644
  1 +1 << /S /a /St 1 >>
  2 +3 << /S /R /St 2 >>
  3 +6 << /P (Z-) /S /r /St 1 >>
  4 +8 << /S /A /St 17 >>
  5 +21 << /S /D /St 3 >>
  6 +28 << /St 1 >>
  7 +30 << /P (end) /St 1 >>
  8 +test 47 done