Commit 070ee710eb0aaf6ddc845735c6ea0c28d3b7e5a1

Authored by Jay Berkenbilt
1 parent 708ea4ef

Support excluding values from numeric ranges (fixes #564, #790)

ChangeLog
  1 +2024-01-01 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Support "x" before a group in a numeric range to exclude a group
  4 + from the previous group. Details are in the manual.
  5 +
1 2023-12-29 Jay Berkenbilt <ejb@ql.org> 6 2023-12-29 Jay Berkenbilt <ejb@ql.org>
2 7
3 * When flattening annotations, preserve annotations without any 8 * When flattening annotations, preserve annotations without any
include/qpdf/QUtil.hh
@@ -442,7 +442,24 @@ namespace QUtil @@ -442,7 +442,24 @@ namespace QUtil
442 inline bool is_number(char const*); 442 inline bool is_number(char const*);
443 443
444 // This method parses the numeric range syntax used by the qpdf command-line tool. May throw 444 // This method parses the numeric range syntax used by the qpdf command-line tool. May throw
445 - // std::runtime_error. 445 + // std::runtime_error. A numeric range is as comma-separated list of groups. A group may be a
  446 + // number specification or a range of number specifications separated by a dash. A number
  447 + // specification may be one of the following (where <n> is a number):
  448 + // * <n> -- the numeric value of n
  449 + // * z -- the value of the `max` parameter
  450 + // * r<n> -- represents max + 1 - <n> (<n> from the end)
  451 + //
  452 + // If the group is two number specifications separated by a dash, it represents the range of
  453 + // numbers from the first to the second, inclusive. If the first is greater than the second, the
  454 + // numbers are descending.
  455 + //
  456 + // From qpdf 11.7.1: if a group starts with `x`, its members are excluded from the previous
  457 + // group that didn't start with `x1.
  458 + //
  459 + // Example: with max of 15, the range "4-10,x7-9,12-8,xr5" is 4, 5, 6, 10, 12, 10, 9, 8. This is
  460 + // 4 through 10 inclusive without 7 through 9 inclusive followed by 12 to 8 inclusiuve
  461 + // (descending) without 11 (the fifth value counting backwards from 15). For more information
  462 + // and additional examples, see the "Page Ranges" section in the manual.
446 QPDF_DLL 463 QPDF_DLL
447 std::vector<int> parse_numrange(char const* range, int max); 464 std::vector<int> parse_numrange(char const* range, int max);
448 465
job.sums
@@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c @@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c
9 include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 9 include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
10 job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c 10 job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c
11 libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2 11 libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2
12 -libqpdf/qpdf/auto_job_help.hh 62c40dcd827fcea261a9f432f457aac1331731199ee3530e40de763811ba158e 12 +libqpdf/qpdf/auto_job_help.hh 838f4065f64dc3fbd493510fd21d8ab4e16ee2434592776f44f80cbe3045cb50
13 libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065 13 libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065
14 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 14 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
15 libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f 15 libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f
16 libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9 16 libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9
17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
18 -manual/cli.rst f361df89dd212daf65e82df8b7b1f8a5e3554043c545f8e7cb14ba5ded21e04e  
19 -manual/qpdf.1 def5ee093f342b222da7e1890cf44145fb7ee7f8024e75d1668f560b7f7f20d6 18 +manual/cli.rst d6d1ca82c936ffeaf137c586f988f80043db4c3b226d26fdf94f19a6005d012e
  19 +manual/qpdf.1 10dc52d32a6d8885ce4e4292875ee7fe8e7a826ef3fc28db5671be413bcaacc7
20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b 20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
libqpdf/QUtil.cc
@@ -1303,6 +1303,10 @@ QUtil::str_compare_nocase(char const* s1, char const* s2) @@ -1303,6 +1303,10 @@ QUtil::str_compare_nocase(char const* s1, char const* s2)
1303 std::vector<int> 1303 std::vector<int>
1304 QUtil::parse_numrange(char const* range, int max) 1304 QUtil::parse_numrange(char const* range, int max)
1305 { 1305 {
  1306 + // Performance note: this implementation aims to be straightforward, not efficient. Numeric
  1307 + // range parsing is used only during argument processing. It is not used during processing of
  1308 + // PDF files.
  1309 +
1306 static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); 1310 static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)");
1307 auto parse_num = [&max](std::string const& s) -> int { 1311 auto parse_num = [&max](std::string const& s) -> int {
1308 if (s == "z") { 1312 if (s == "z") {
@@ -1375,12 +1379,22 @@ QUtil::parse_numrange(char const* range, int max) @@ -1375,12 +1379,22 @@ QUtil::parse_numrange(char const* range, int max)
1375 first = false; 1379 first = false;
1376 auto first_num = parse_num(m[2].str()); 1380 auto first_num = parse_num(m[2].str());
1377 auto is_span = m[3].matched; 1381 auto is_span = m[3].matched;
1378 - int last_num; 1382 + int last_num{0};
1379 if (is_span) { 1383 if (is_span) {
1380 last_num = parse_num(m[3].str()); 1384 last_num = parse_num(m[3].str());
1381 } 1385 }
1382 if (is_exclude) { 1386 if (is_exclude) {
1383 - // XXX 1387 + std::vector<int> work;
  1388 + populate(work, first_num, is_span, last_num);
  1389 + std::set<int> exclusions;
  1390 + exclusions.insert(work.begin(), work.end());
  1391 + work = last_group;
  1392 + last_group.clear();
  1393 + for (auto n: work) {
  1394 + if (exclusions.count(n) == 0) {
  1395 + last_group.emplace_back(n);
  1396 + }
  1397 + }
1384 } else { 1398 } else {
1385 result.insert(result.end(), last_group.begin(), last_group.end()); 1399 result.insert(result.end(), last_group.begin(), last_group.end());
1386 populate(last_group, first_num, is_span, last_num); 1400 populate(last_group, first_num, is_span, last_num);
libqpdf/qpdf/auto_job_help.hh
@@ -286,12 +286,19 @@ value, even if the file uses features that may not be available @@ -286,12 +286,19 @@ value, even if the file uses features that may not be available
286 in that version. 286 in that version.
287 )"); 287 )");
288 ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be 288 ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be
289 -found in the manual. Summary:  
290 -  
291 -- a,b,c pages a, b, and c  
292 -- a-b pages a through b inclusive; if a > b, this counts down  
293 -- r<n> where <n> represents a number is the <n>th page from the end  
294 -- z the last page, same as r1 289 +found in the manual. In summary, a range is a comma-separated list
  290 +of groups. A group is a number or a range of numbers separated by a
  291 +dash. A group may be prepended by x to exclude its members from the
  292 +previous group. A number may be one of
  293 +
  294 +- <n> where <n> represents a number is the <n>th page
  295 +- r<n> is the <n>th page from the end
  296 +- z the last page, same as r1
  297 +
  298 +- a,b,c pages a, b, and c
  299 +- a-b pages a through b inclusive; if a > b, this counts down
  300 +- a-b,xc pages a through b except page c
  301 +- a-b,xc-d pages a through b except pages c through d
295 302
296 You can append :even or :odd to select every other page from the 303 You can append :even or :odd to select every other page from the
297 resulting set of pages, where :odd starts with the first page and 304 resulting set of pages, where :odd starts with the first page and
libtests/qtest/numrange.test
@@ -67,6 +67,12 @@ my @nrange_tests = ( @@ -67,6 +67,12 @@ my @nrange_tests = (
67 ["1-6,8-12:even", 67 ["1-6,8-12:even",
68 "numeric range 1-6,8-12:even -> 2 4 6 9 11", 68 "numeric range 1-6,8-12:even -> 2 4 6 9 11",
69 0], 69 0],
  70 + ["x1",
  71 + "error at * in numeric range *x1: first range group may not be an exclusion",
  72 + 2],
  73 + ["4-10,x7-9,12-8,xr5",
  74 + "numeric range 4-10,x7-9,12-8,xr5 -> 4 5 6 10 12 10 9 8",
  75 + 0],
70 ); 76 );
71 foreach my $d (@nrange_tests) 77 foreach my $d (@nrange_tests)
72 { 78 {
manual/cli.rst
@@ -1274,12 +1274,19 @@ Page Ranges @@ -1274,12 +1274,19 @@ Page Ranges
1274 .. help-topic page-ranges: page range syntax 1274 .. help-topic page-ranges: page range syntax
1275 1275
1276 A full description of the page range syntax, with examples, can be 1276 A full description of the page range syntax, with examples, can be
1277 - found in the manual. Summary: 1277 + found in the manual. In summary, a range is a comma-separated list
  1278 + of groups. A group is a number or a range of numbers separated by a
  1279 + dash. A group may be prepended by x to exclude its members from the
  1280 + previous group. A number may be one of
1278 1281
1279 - - a,b,c pages a, b, and c  
1280 - - a-b pages a through b inclusive; if a > b, this counts down  
1281 - - r<n> where <n> represents a number is the <n>th page from the end  
1282 - - z the last page, same as r1 1282 + - <n> where <n> represents a number is the <n>th page
  1283 + - r<n> is the <n>th page from the end
  1284 + - z the last page, same as r1
  1285 +
  1286 + - a,b,c pages a, b, and c
  1287 + - a-b pages a through b inclusive; if a > b, this counts down
  1288 + - a-b,xc pages a through b except page c
  1289 + - a-b,xc-d pages a through b except pages c through d
1283 1290
1284 You can append :even or :odd to select every other page from the 1291 You can append :even or :odd to select every other page from the
1285 resulting set of pages, where :odd starts with the first page and 1292 resulting set of pages, where :odd starts with the first page and
@@ -1303,6 +1310,10 @@ section describes the syntax of a page range. @@ -1303,6 +1310,10 @@ section describes the syntax of a page range.
1303 of pages from the first to the second. If the first number is higher 1310 of pages from the first to the second. If the first number is higher
1304 than the second number, it is the range of pages in reverse. 1311 than the second number, it is the range of pages in reverse.
1305 1312
  1313 +- A number or dash-separated range of numbers may be prepended with
  1314 + ``x`` (from qpdf 11.7.1). This means to exclude the pages in that
  1315 + range from the previous range that didn't start with ``x``.
  1316 +
1306 - The range may be appended with ``:odd`` or ``:even`` to select only 1317 - The range may be appended with ``:odd`` or ``:even`` to select only
1307 pages from the resulting range in odd or even positions. In this 1318 pages from the resulting range in odd or even positions. In this
1308 case, odd and even refer to positions in the final range, not 1319 case, odd and even refer to positions in the final range, not
@@ -1350,6 +1361,16 @@ section describes the syntax of a page range. @@ -1350,6 +1361,16 @@ section describes the syntax of a page range.
1350 - pages 7 and 9, which are the pages in even positions from the 1361 - pages 7 and 9, which are the pages in even positions from the
1351 original set of 5, 7, 8, 9, 12 1362 original set of 5, 7, 8, 9, 12
1352 1363
  1364 + - - ``1-10,x3-4``
  1365 + - pages 1 through 10 except pages 3 and 4 (1, 2, and 5
  1366 + through 10)
  1367 +
  1368 + - - ``4-10,x7-9,12-8,xr5``
  1369 + - In a 15-page file, this is 4, 5, 6, 10, 12, 10, 9, and 8 in
  1370 + that order. That is pages 4 through 10 except 7 through 9
  1371 + followed by 12 through 8 descending except 11 (the fifth page
  1372 + from the end)
  1373 +
1353 .. _modification-options: 1374 .. _modification-options:
1354 1375
1355 PDF Modification 1376 PDF Modification
manual/qpdf.1
@@ -377,16 +377,26 @@ value, even if the file uses features that may not be available @@ -377,16 +377,26 @@ value, even if the file uses features that may not be available
377 in that version. 377 in that version.
378 .SH PAGE-RANGES (page range syntax) 378 .SH PAGE-RANGES (page range syntax)
379 A full description of the page range syntax, with examples, can be 379 A full description of the page range syntax, with examples, can be
380 -found in the manual. Summary: 380 +found in the manual. In summary, a range is a comma-separated list
  381 +of groups. A group is a number or a range of numbers separated by a
  382 +dash. A group may be prepended by x to exclude its members from the
  383 +previous group. A number may be one of
381 384
382 .IP \[bu] 385 .IP \[bu]
383 -a,b,c pages a, b, and c 386 +<n> where <n> represents a number is the <n>th page
384 .IP \[bu] 387 .IP \[bu]
385 -a-b pages a through b inclusive; if a > b, this counts down 388 +r<n> is the <n>th page from the end
386 .IP \[bu] 389 .IP \[bu]
387 -r<n> where <n> represents a number is the <n>th page from the end 390 +z the last page, same as r1
  391 +
  392 +.IP \[bu]
  393 +a,b,c pages a, b, and c
  394 +.IP \[bu]
  395 +a-b pages a through b inclusive; if a > b, this counts down
  396 +.IP \[bu]
  397 +a-b,xc pages a through b except page c
388 .IP \[bu] 398 .IP \[bu]
389 -z the last page, same as r1 399 +a-b,xc-d pages a through b except pages c through d
390 400
391 You can append :even or :odd to select every other page from the 401 You can append :even or :odd to select every other page from the
392 resulting set of pages, where :odd starts with the first page and 402 resulting set of pages, where :odd starts with the first page and
manual/release-notes.rst
@@ -44,6 +44,12 @@ Planned changes for future 12.x (subject to change): @@ -44,6 +44,12 @@ Planned changes for future 12.x (subject to change):
44 - When flattening annotations, preserve hyperlinks and other 44 - When flattening annotations, preserve hyperlinks and other
45 annotations that inherently have no appearance information. 45 annotations that inherently have no appearance information.
46 46
  47 + - CLI Enhancements
  48 +
  49 + - Introduce ``x`` in the numeric range syntax to allow exclusion
  50 + of pages within a page range. See :ref:`page-ranges` for
  51 + details.
  52 +
47 11.7.0: December 24, 2023 53 11.7.0: December 24, 2023
48 - Bug fixes: 54 - Bug fixes:
49 55