Commit 070ee710eb0aaf6ddc845735c6ea0c28d3b7e5a1
1 parent
708ea4ef
Support excluding values from numeric ranges (fixes #564, #790)
Showing
9 changed files
with
108 additions
and
22 deletions
ChangeLog
| 1 | +2024-01-01 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Support "x" before a group in a numeric range to exclude a group | ||
| 4 | + from the previous group. Details are in the manual. | ||
| 5 | + | ||
| 1 | 2023-12-29 Jay Berkenbilt <ejb@ql.org> | 6 | 2023-12-29 Jay Berkenbilt <ejb@ql.org> |
| 2 | 7 | ||
| 3 | * When flattening annotations, preserve annotations without any | 8 | * When flattening annotations, preserve annotations without any |
include/qpdf/QUtil.hh
| @@ -442,7 +442,24 @@ namespace QUtil | @@ -442,7 +442,24 @@ namespace QUtil | ||
| 442 | inline bool is_number(char const*); | 442 | inline bool is_number(char const*); |
| 443 | 443 | ||
| 444 | // This method parses the numeric range syntax used by the qpdf command-line tool. May throw | 444 | // This method parses the numeric range syntax used by the qpdf command-line tool. May throw |
| 445 | - // std::runtime_error. | 445 | + // std::runtime_error. A numeric range is as comma-separated list of groups. A group may be a |
| 446 | + // number specification or a range of number specifications separated by a dash. A number | ||
| 447 | + // specification may be one of the following (where <n> is a number): | ||
| 448 | + // * <n> -- the numeric value of n | ||
| 449 | + // * z -- the value of the `max` parameter | ||
| 450 | + // * r<n> -- represents max + 1 - <n> (<n> from the end) | ||
| 451 | + // | ||
| 452 | + // If the group is two number specifications separated by a dash, it represents the range of | ||
| 453 | + // numbers from the first to the second, inclusive. If the first is greater than the second, the | ||
| 454 | + // numbers are descending. | ||
| 455 | + // | ||
| 456 | + // From qpdf 11.7.1: if a group starts with `x`, its members are excluded from the previous | ||
| 457 | + // group that didn't start with `x1. | ||
| 458 | + // | ||
| 459 | + // Example: with max of 15, the range "4-10,x7-9,12-8,xr5" is 4, 5, 6, 10, 12, 10, 9, 8. This is | ||
| 460 | + // 4 through 10 inclusive without 7 through 9 inclusive followed by 12 to 8 inclusiuve | ||
| 461 | + // (descending) without 11 (the fifth value counting backwards from 15). For more information | ||
| 462 | + // and additional examples, see the "Page Ranges" section in the manual. | ||
| 446 | QPDF_DLL | 463 | QPDF_DLL |
| 447 | std::vector<int> parse_numrange(char const* range, int max); | 464 | std::vector<int> parse_numrange(char const* range, int max); |
| 448 | 465 |
job.sums
| @@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c | @@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c | ||
| 9 | include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 | 9 | include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 |
| 10 | job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c | 10 | job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c |
| 11 | libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2 | 11 | libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2 |
| 12 | -libqpdf/qpdf/auto_job_help.hh 62c40dcd827fcea261a9f432f457aac1331731199ee3530e40de763811ba158e | 12 | +libqpdf/qpdf/auto_job_help.hh 838f4065f64dc3fbd493510fd21d8ab4e16ee2434592776f44f80cbe3045cb50 |
| 13 | libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065 | 13 | libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065 |
| 14 | libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 | 14 | libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 |
| 15 | libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f | 15 | libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f |
| 16 | libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9 | 16 | libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9 |
| 17 | manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 | 17 | manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 |
| 18 | -manual/cli.rst f361df89dd212daf65e82df8b7b1f8a5e3554043c545f8e7cb14ba5ded21e04e | ||
| 19 | -manual/qpdf.1 def5ee093f342b222da7e1890cf44145fb7ee7f8024e75d1668f560b7f7f20d6 | 18 | +manual/cli.rst d6d1ca82c936ffeaf137c586f988f80043db4c3b226d26fdf94f19a6005d012e |
| 19 | +manual/qpdf.1 10dc52d32a6d8885ce4e4292875ee7fe8e7a826ef3fc28db5671be413bcaacc7 | ||
| 20 | manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b | 20 | manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b |
libqpdf/QUtil.cc
| @@ -1303,6 +1303,10 @@ QUtil::str_compare_nocase(char const* s1, char const* s2) | @@ -1303,6 +1303,10 @@ QUtil::str_compare_nocase(char const* s1, char const* s2) | ||
| 1303 | std::vector<int> | 1303 | std::vector<int> |
| 1304 | QUtil::parse_numrange(char const* range, int max) | 1304 | QUtil::parse_numrange(char const* range, int max) |
| 1305 | { | 1305 | { |
| 1306 | + // Performance note: this implementation aims to be straightforward, not efficient. Numeric | ||
| 1307 | + // range parsing is used only during argument processing. It is not used during processing of | ||
| 1308 | + // PDF files. | ||
| 1309 | + | ||
| 1306 | static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); | 1310 | static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); |
| 1307 | auto parse_num = [&max](std::string const& s) -> int { | 1311 | auto parse_num = [&max](std::string const& s) -> int { |
| 1308 | if (s == "z") { | 1312 | if (s == "z") { |
| @@ -1375,12 +1379,22 @@ QUtil::parse_numrange(char const* range, int max) | @@ -1375,12 +1379,22 @@ QUtil::parse_numrange(char const* range, int max) | ||
| 1375 | first = false; | 1379 | first = false; |
| 1376 | auto first_num = parse_num(m[2].str()); | 1380 | auto first_num = parse_num(m[2].str()); |
| 1377 | auto is_span = m[3].matched; | 1381 | auto is_span = m[3].matched; |
| 1378 | - int last_num; | 1382 | + int last_num{0}; |
| 1379 | if (is_span) { | 1383 | if (is_span) { |
| 1380 | last_num = parse_num(m[3].str()); | 1384 | last_num = parse_num(m[3].str()); |
| 1381 | } | 1385 | } |
| 1382 | if (is_exclude) { | 1386 | if (is_exclude) { |
| 1383 | - // XXX | 1387 | + std::vector<int> work; |
| 1388 | + populate(work, first_num, is_span, last_num); | ||
| 1389 | + std::set<int> exclusions; | ||
| 1390 | + exclusions.insert(work.begin(), work.end()); | ||
| 1391 | + work = last_group; | ||
| 1392 | + last_group.clear(); | ||
| 1393 | + for (auto n: work) { | ||
| 1394 | + if (exclusions.count(n) == 0) { | ||
| 1395 | + last_group.emplace_back(n); | ||
| 1396 | + } | ||
| 1397 | + } | ||
| 1384 | } else { | 1398 | } else { |
| 1385 | result.insert(result.end(), last_group.begin(), last_group.end()); | 1399 | result.insert(result.end(), last_group.begin(), last_group.end()); |
| 1386 | populate(last_group, first_num, is_span, last_num); | 1400 | populate(last_group, first_num, is_span, last_num); |
libqpdf/qpdf/auto_job_help.hh
| @@ -286,12 +286,19 @@ value, even if the file uses features that may not be available | @@ -286,12 +286,19 @@ value, even if the file uses features that may not be available | ||
| 286 | in that version. | 286 | in that version. |
| 287 | )"); | 287 | )"); |
| 288 | ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be | 288 | ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be |
| 289 | -found in the manual. Summary: | ||
| 290 | - | ||
| 291 | -- a,b,c pages a, b, and c | ||
| 292 | -- a-b pages a through b inclusive; if a > b, this counts down | ||
| 293 | -- r<n> where <n> represents a number is the <n>th page from the end | ||
| 294 | -- z the last page, same as r1 | 289 | +found in the manual. In summary, a range is a comma-separated list |
| 290 | +of groups. A group is a number or a range of numbers separated by a | ||
| 291 | +dash. A group may be prepended by x to exclude its members from the | ||
| 292 | +previous group. A number may be one of | ||
| 293 | + | ||
| 294 | +- <n> where <n> represents a number is the <n>th page | ||
| 295 | +- r<n> is the <n>th page from the end | ||
| 296 | +- z the last page, same as r1 | ||
| 297 | + | ||
| 298 | +- a,b,c pages a, b, and c | ||
| 299 | +- a-b pages a through b inclusive; if a > b, this counts down | ||
| 300 | +- a-b,xc pages a through b except page c | ||
| 301 | +- a-b,xc-d pages a through b except pages c through d | ||
| 295 | 302 | ||
| 296 | You can append :even or :odd to select every other page from the | 303 | You can append :even or :odd to select every other page from the |
| 297 | resulting set of pages, where :odd starts with the first page and | 304 | resulting set of pages, where :odd starts with the first page and |
libtests/qtest/numrange.test
| @@ -67,6 +67,12 @@ my @nrange_tests = ( | @@ -67,6 +67,12 @@ my @nrange_tests = ( | ||
| 67 | ["1-6,8-12:even", | 67 | ["1-6,8-12:even", |
| 68 | "numeric range 1-6,8-12:even -> 2 4 6 9 11", | 68 | "numeric range 1-6,8-12:even -> 2 4 6 9 11", |
| 69 | 0], | 69 | 0], |
| 70 | + ["x1", | ||
| 71 | + "error at * in numeric range *x1: first range group may not be an exclusion", | ||
| 72 | + 2], | ||
| 73 | + ["4-10,x7-9,12-8,xr5", | ||
| 74 | + "numeric range 4-10,x7-9,12-8,xr5 -> 4 5 6 10 12 10 9 8", | ||
| 75 | + 0], | ||
| 70 | ); | 76 | ); |
| 71 | foreach my $d (@nrange_tests) | 77 | foreach my $d (@nrange_tests) |
| 72 | { | 78 | { |
manual/cli.rst
| @@ -1274,12 +1274,19 @@ Page Ranges | @@ -1274,12 +1274,19 @@ Page Ranges | ||
| 1274 | .. help-topic page-ranges: page range syntax | 1274 | .. help-topic page-ranges: page range syntax |
| 1275 | 1275 | ||
| 1276 | A full description of the page range syntax, with examples, can be | 1276 | A full description of the page range syntax, with examples, can be |
| 1277 | - found in the manual. Summary: | 1277 | + found in the manual. In summary, a range is a comma-separated list |
| 1278 | + of groups. A group is a number or a range of numbers separated by a | ||
| 1279 | + dash. A group may be prepended by x to exclude its members from the | ||
| 1280 | + previous group. A number may be one of | ||
| 1278 | 1281 | ||
| 1279 | - - a,b,c pages a, b, and c | ||
| 1280 | - - a-b pages a through b inclusive; if a > b, this counts down | ||
| 1281 | - - r<n> where <n> represents a number is the <n>th page from the end | ||
| 1282 | - - z the last page, same as r1 | 1282 | + - <n> where <n> represents a number is the <n>th page |
| 1283 | + - r<n> is the <n>th page from the end | ||
| 1284 | + - z the last page, same as r1 | ||
| 1285 | + | ||
| 1286 | + - a,b,c pages a, b, and c | ||
| 1287 | + - a-b pages a through b inclusive; if a > b, this counts down | ||
| 1288 | + - a-b,xc pages a through b except page c | ||
| 1289 | + - a-b,xc-d pages a through b except pages c through d | ||
| 1283 | 1290 | ||
| 1284 | You can append :even or :odd to select every other page from the | 1291 | You can append :even or :odd to select every other page from the |
| 1285 | resulting set of pages, where :odd starts with the first page and | 1292 | resulting set of pages, where :odd starts with the first page and |
| @@ -1303,6 +1310,10 @@ section describes the syntax of a page range. | @@ -1303,6 +1310,10 @@ section describes the syntax of a page range. | ||
| 1303 | of pages from the first to the second. If the first number is higher | 1310 | of pages from the first to the second. If the first number is higher |
| 1304 | than the second number, it is the range of pages in reverse. | 1311 | than the second number, it is the range of pages in reverse. |
| 1305 | 1312 | ||
| 1313 | +- A number or dash-separated range of numbers may be prepended with | ||
| 1314 | + ``x`` (from qpdf 11.7.1). This means to exclude the pages in that | ||
| 1315 | + range from the previous range that didn't start with ``x``. | ||
| 1316 | + | ||
| 1306 | - The range may be appended with ``:odd`` or ``:even`` to select only | 1317 | - The range may be appended with ``:odd`` or ``:even`` to select only |
| 1307 | pages from the resulting range in odd or even positions. In this | 1318 | pages from the resulting range in odd or even positions. In this |
| 1308 | case, odd and even refer to positions in the final range, not | 1319 | case, odd and even refer to positions in the final range, not |
| @@ -1350,6 +1361,16 @@ section describes the syntax of a page range. | @@ -1350,6 +1361,16 @@ section describes the syntax of a page range. | ||
| 1350 | - pages 7 and 9, which are the pages in even positions from the | 1361 | - pages 7 and 9, which are the pages in even positions from the |
| 1351 | original set of 5, 7, 8, 9, 12 | 1362 | original set of 5, 7, 8, 9, 12 |
| 1352 | 1363 | ||
| 1364 | + - - ``1-10,x3-4`` | ||
| 1365 | + - pages 1 through 10 except pages 3 and 4 (1, 2, and 5 | ||
| 1366 | + through 10) | ||
| 1367 | + | ||
| 1368 | + - - ``4-10,x7-9,12-8,xr5`` | ||
| 1369 | + - In a 15-page file, this is 4, 5, 6, 10, 12, 10, 9, and 8 in | ||
| 1370 | + that order. That is pages 4 through 10 except 7 through 9 | ||
| 1371 | + followed by 12 through 8 descending except 11 (the fifth page | ||
| 1372 | + from the end) | ||
| 1373 | + | ||
| 1353 | .. _modification-options: | 1374 | .. _modification-options: |
| 1354 | 1375 | ||
| 1355 | PDF Modification | 1376 | PDF Modification |
manual/qpdf.1
| @@ -377,16 +377,26 @@ value, even if the file uses features that may not be available | @@ -377,16 +377,26 @@ value, even if the file uses features that may not be available | ||
| 377 | in that version. | 377 | in that version. |
| 378 | .SH PAGE-RANGES (page range syntax) | 378 | .SH PAGE-RANGES (page range syntax) |
| 379 | A full description of the page range syntax, with examples, can be | 379 | A full description of the page range syntax, with examples, can be |
| 380 | -found in the manual. Summary: | 380 | +found in the manual. In summary, a range is a comma-separated list |
| 381 | +of groups. A group is a number or a range of numbers separated by a | ||
| 382 | +dash. A group may be prepended by x to exclude its members from the | ||
| 383 | +previous group. A number may be one of | ||
| 381 | 384 | ||
| 382 | .IP \[bu] | 385 | .IP \[bu] |
| 383 | -a,b,c pages a, b, and c | 386 | +<n> where <n> represents a number is the <n>th page |
| 384 | .IP \[bu] | 387 | .IP \[bu] |
| 385 | -a-b pages a through b inclusive; if a > b, this counts down | 388 | +r<n> is the <n>th page from the end |
| 386 | .IP \[bu] | 389 | .IP \[bu] |
| 387 | -r<n> where <n> represents a number is the <n>th page from the end | 390 | +z the last page, same as r1 |
| 391 | + | ||
| 392 | +.IP \[bu] | ||
| 393 | +a,b,c pages a, b, and c | ||
| 394 | +.IP \[bu] | ||
| 395 | +a-b pages a through b inclusive; if a > b, this counts down | ||
| 396 | +.IP \[bu] | ||
| 397 | +a-b,xc pages a through b except page c | ||
| 388 | .IP \[bu] | 398 | .IP \[bu] |
| 389 | -z the last page, same as r1 | 399 | +a-b,xc-d pages a through b except pages c through d |
| 390 | 400 | ||
| 391 | You can append :even or :odd to select every other page from the | 401 | You can append :even or :odd to select every other page from the |
| 392 | resulting set of pages, where :odd starts with the first page and | 402 | resulting set of pages, where :odd starts with the first page and |
manual/release-notes.rst
| @@ -44,6 +44,12 @@ Planned changes for future 12.x (subject to change): | @@ -44,6 +44,12 @@ Planned changes for future 12.x (subject to change): | ||
| 44 | - When flattening annotations, preserve hyperlinks and other | 44 | - When flattening annotations, preserve hyperlinks and other |
| 45 | annotations that inherently have no appearance information. | 45 | annotations that inherently have no appearance information. |
| 46 | 46 | ||
| 47 | + - CLI Enhancements | ||
| 48 | + | ||
| 49 | + - Introduce ``x`` in the numeric range syntax to allow exclusion | ||
| 50 | + of pages within a page range. See :ref:`page-ranges` for | ||
| 51 | + details. | ||
| 52 | + | ||
| 47 | 11.7.0: December 24, 2023 | 53 | 11.7.0: December 24, 2023 |
| 48 | - Bug fixes: | 54 | - Bug fixes: |
| 49 | 55 |