Commit 666f7943931bed6318e9ffeeeb696bad4d813222

Authored by Jay Berkenbilt
1 parent e577dfc8

Support "r" in page ranges (fixes #155)

ChangeLog
  1 +2018-03-04 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * On the command line when specifying page ranges, support
  4 + preceding a page number by "r" to indicate that it should be
  5 + counted from the end. For example, the range r3-r1 would indicate
  6 + the last three pages of a document.
  7 +
1 8 2018-03-03 Jay Berkenbilt <ejb@ql.org>
2 9  
3 10 * Ignore zlib data check errors while uncompressing streams. This
... ...
manual/qpdf-manual.xml
... ... @@ -818,13 +818,15 @@ make
818 818 </para>
819 819 <para>
820 820 The page range is a set of numbers separated by commas, ranges of
821   - numbers separated dashes, or combinations of those. The character
822   - &ldquo;z&rdquo; represents the last page. Pages can appear in any
823   - order. Ranges can appear with a high number followed by a low
824   - number, which causes the pages to appear in reverse. Repeating a
825   - number will cause an error, but you can use the workaround
826   - discussed above should you really want to include the same page
827   - twice.
  821 + numbers separated dashes, or combinations of those. The character
  822 + &ldquo;z&rdquo; represents the last page. A number preceded by an
  823 + &ldquo;r&rdquo; indicates to count from the end, so
  824 + &ldquo;r3-r1&rdquo; would be the last three pages of the document.
  825 + Pages can appear in any order. Ranges can appear with a high
  826 + number followed by a low number, which causes the pages to appear
  827 + in reverse. Repeating a number will cause an error, but you can
  828 + use the workaround discussed above should you really want to
  829 + include the same page twice.
828 830 </para>
829 831 <para>
830 832 Example page ranges:
... ... @@ -840,6 +842,17 @@ make
840 842 <literal>z-1</literal>: all pages in the document in reverse
841 843 </para>
842 844 </listitem>
  845 + <listitem>
  846 + <para>
  847 + <literal>r3-r1</literal>: the last three pages of the document
  848 + </para>
  849 + </listitem>
  850 + <listitem>
  851 + <para>
  852 + <literal>r1-r3</literal>: the last three pages of the document
  853 + in reverse order
  854 + </para>
  855 + </listitem>
843 856 </itemizedlist>
844 857 </para>
845 858 <para>
... ...
qpdf/qpdf.cc
... ... @@ -358,11 +358,12 @@ input.\n\
358 358 \n\
359 359 The page range is a set of numbers separated by commas, ranges of\n\
360 360 numbers separated dashes, or combinations of those. The character\n\
361   -\"z\" represents the last page. Pages can appear in any order. Ranges\n\
362   -can appear with a high number followed by a low number, which causes the\n\
363   -pages to appear in reverse. Repeating a number will cause an error, but\n\
364   -the manual discusses a workaround should you really want to include the\n\
365   -same page twice.\n\
  361 +\"z\" represents the last page. A number preceded by an \"r\" indicates\n\
  362 +to count from the end, so \"r3-r1\" would be the last three pages of the\n\
  363 +document. Pages can appear in any order. Ranges can appear with a\n\
  364 +high number followed by a low number, which causes the pages to appear in\n\
  365 +reverse. Repeating a number will cause an error, but the manual discusses\n\
  366 +a workaround should you really want to include the same page twice.\n\
366 367 \n\
367 368 If the page range is omitted, the range of 1-z is assumed. qpdf decides\n\
368 369 that the page range is omitted if the range argument is either -- or a\n\
... ... @@ -577,6 +578,22 @@ static void show_encryption(QPDF&amp; pdf, Options&amp; o)
577 578 }
578 579 }
579 580  
  581 +static int maybe_from_end(int num, bool from_end, int max)
  582 +{
  583 + if (from_end)
  584 + {
  585 + if (num > max)
  586 + {
  587 + num = 0;
  588 + }
  589 + else
  590 + {
  591 + num = max + 1 - num;
  592 + }
  593 + }
  594 + return num;
  595 +}
  596 +
580 597 static std::vector<int> parse_numrange(char const* range, int max,
581 598 bool throw_error = false)
582 599 {
... ... @@ -593,6 +610,7 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max,
593 610 st_after_number } state = st_top;
594 611 bool last_separator_was_dash = false;
595 612 int cur_number = 0;
  613 + bool from_end = false;
596 614 while (*p)
597 615 {
598 616 char ch = *p;
... ... @@ -616,14 +634,25 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max,
616 634 state = st_after_number;
617 635 cur_number = max;
618 636 }
  637 + else if (ch == 'r')
  638 + {
  639 + if (! (state == st_top))
  640 + {
  641 + throw std::runtime_error("r not expected");
  642 + }
  643 + state = st_in_number;
  644 + from_end = true;
  645 + }
619 646 else if ((ch == ',') || (ch == '-'))
620 647 {
621 648 if (! ((state == st_in_number) || (state == st_after_number)))
622 649 {
623 650 throw std::runtime_error("unexpected separator");
624 651 }
  652 + cur_number = maybe_from_end(cur_number, from_end, max);
625 653 work.push_back(cur_number);
626 654 cur_number = 0;
  655 + from_end = false;
627 656 if (ch == ',')
628 657 {
629 658 state = st_top;
... ... @@ -649,6 +678,7 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max,
649 678 }
650 679 if ((state == st_in_number) || (state == st_after_number))
651 680 {
  681 + cur_number = maybe_from_end(cur_number, from_end, max);
652 682 work.push_back(cur_number);
653 683 }
654 684 else
... ...
qpdf/qtest/qpdf.test
... ... @@ -1092,9 +1092,19 @@ my @nrange_tests = (
1092 1092 ["1-10,1234,5",
1093 1093 "qpdf: error in numeric range 1-10,1234,5: number 1234 out of range",
1094 1094 2],
1095   - ["1,3,5-10,z-13,13,9,z,2",
1096   - "numeric range 1,3,5-10,z-13,13,9,z,2" .
1097   - " -> 1 3 5 6 7 8 9 10 15 14 13 13 9 15 2",
  1095 + ["1,r,3",
  1096 + "qpdf: error in numeric range 1,r,3: number 16 out of range",
  1097 + 2],
  1098 + ["1,r16,3",
  1099 + "qpdf: error in numeric range 1,r16,3: number 0 out of range",
  1100 + 2],
  1101 + ["1,3,5-10,z-13,13,9,z,2,r2-r4",
  1102 + "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" .
  1103 + " -> 1 3 5 6 7 8 9 10 15 14 13 13 9 15 2 14 13 12",
  1104 + 0],
  1105 + ["r1-r15", # r\d+ at end
  1106 + "numeric range r1-r15" .
  1107 + " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1",
1098 1108 0],
1099 1109 );
1100 1110 $n_tests += scalar(@nrange_tests);
... ...