Commit 666f7943931bed6318e9ffeeeb696bad4d813222

Authored by Jay Berkenbilt
1 parent e577dfc8

Support "r" in page ranges (fixes #155)

ChangeLog
  1 +2018-03-04 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * On the command line when specifying page ranges, support
  4 + preceding a page number by "r" to indicate that it should be
  5 + counted from the end. For example, the range r3-r1 would indicate
  6 + the last three pages of a document.
  7 +
1 2018-03-03 Jay Berkenbilt <ejb@ql.org> 8 2018-03-03 Jay Berkenbilt <ejb@ql.org>
2 9
3 * Ignore zlib data check errors while uncompressing streams. This 10 * Ignore zlib data check errors while uncompressing streams. This
manual/qpdf-manual.xml
@@ -818,13 +818,15 @@ make @@ -818,13 +818,15 @@ make
818 </para> 818 </para>
819 <para> 819 <para>
820 The page range is a set of numbers separated by commas, ranges of 820 The page range is a set of numbers separated by commas, ranges of
821 - numbers separated dashes, or combinations of those. The character  
822 - &ldquo;z&rdquo; represents the last page. Pages can appear in any  
823 - order. Ranges can appear with a high number followed by a low  
824 - number, which causes the pages to appear in reverse. Repeating a  
825 - number will cause an error, but you can use the workaround  
826 - discussed above should you really want to include the same page  
827 - twice. 821 + numbers separated dashes, or combinations of those. The character
  822 + &ldquo;z&rdquo; represents the last page. A number preceded by an
  823 + &ldquo;r&rdquo; indicates to count from the end, so
  824 + &ldquo;r3-r1&rdquo; would be the last three pages of the document.
  825 + Pages can appear in any order. Ranges can appear with a high
  826 + number followed by a low number, which causes the pages to appear
  827 + in reverse. Repeating a number will cause an error, but you can
  828 + use the workaround discussed above should you really want to
  829 + include the same page twice.
828 </para> 830 </para>
829 <para> 831 <para>
830 Example page ranges: 832 Example page ranges:
@@ -840,6 +842,17 @@ make @@ -840,6 +842,17 @@ make
840 <literal>z-1</literal>: all pages in the document in reverse 842 <literal>z-1</literal>: all pages in the document in reverse
841 </para> 843 </para>
842 </listitem> 844 </listitem>
  845 + <listitem>
  846 + <para>
  847 + <literal>r3-r1</literal>: the last three pages of the document
  848 + </para>
  849 + </listitem>
  850 + <listitem>
  851 + <para>
  852 + <literal>r1-r3</literal>: the last three pages of the document
  853 + in reverse order
  854 + </para>
  855 + </listitem>
843 </itemizedlist> 856 </itemizedlist>
844 </para> 857 </para>
845 <para> 858 <para>
qpdf/qpdf.cc
@@ -358,11 +358,12 @@ input.\n\ @@ -358,11 +358,12 @@ input.\n\
358 \n\ 358 \n\
359 The page range is a set of numbers separated by commas, ranges of\n\ 359 The page range is a set of numbers separated by commas, ranges of\n\
360 numbers separated dashes, or combinations of those. The character\n\ 360 numbers separated dashes, or combinations of those. The character\n\
361 -\"z\" represents the last page. Pages can appear in any order. Ranges\n\  
362 -can appear with a high number followed by a low number, which causes the\n\  
363 -pages to appear in reverse. Repeating a number will cause an error, but\n\  
364 -the manual discusses a workaround should you really want to include the\n\  
365 -same page twice.\n\ 361 +\"z\" represents the last page. A number preceded by an \"r\" indicates\n\
  362 +to count from the end, so \"r3-r1\" would be the last three pages of the\n\
  363 +document. Pages can appear in any order. Ranges can appear with a\n\
  364 +high number followed by a low number, which causes the pages to appear in\n\
  365 +reverse. Repeating a number will cause an error, but the manual discusses\n\
  366 +a workaround should you really want to include the same page twice.\n\
366 \n\ 367 \n\
367 If the page range is omitted, the range of 1-z is assumed. qpdf decides\n\ 368 If the page range is omitted, the range of 1-z is assumed. qpdf decides\n\
368 that the page range is omitted if the range argument is either -- or a\n\ 369 that the page range is omitted if the range argument is either -- or a\n\
@@ -577,6 +578,22 @@ static void show_encryption(QPDF&amp; pdf, Options&amp; o) @@ -577,6 +578,22 @@ static void show_encryption(QPDF&amp; pdf, Options&amp; o)
577 } 578 }
578 } 579 }
579 580
  581 +static int maybe_from_end(int num, bool from_end, int max)
  582 +{
  583 + if (from_end)
  584 + {
  585 + if (num > max)
  586 + {
  587 + num = 0;
  588 + }
  589 + else
  590 + {
  591 + num = max + 1 - num;
  592 + }
  593 + }
  594 + return num;
  595 +}
  596 +
580 static std::vector<int> parse_numrange(char const* range, int max, 597 static std::vector<int> parse_numrange(char const* range, int max,
581 bool throw_error = false) 598 bool throw_error = false)
582 { 599 {
@@ -593,6 +610,7 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max, @@ -593,6 +610,7 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max,
593 st_after_number } state = st_top; 610 st_after_number } state = st_top;
594 bool last_separator_was_dash = false; 611 bool last_separator_was_dash = false;
595 int cur_number = 0; 612 int cur_number = 0;
  613 + bool from_end = false;
596 while (*p) 614 while (*p)
597 { 615 {
598 char ch = *p; 616 char ch = *p;
@@ -616,14 +634,25 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max, @@ -616,14 +634,25 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max,
616 state = st_after_number; 634 state = st_after_number;
617 cur_number = max; 635 cur_number = max;
618 } 636 }
  637 + else if (ch == 'r')
  638 + {
  639 + if (! (state == st_top))
  640 + {
  641 + throw std::runtime_error("r not expected");
  642 + }
  643 + state = st_in_number;
  644 + from_end = true;
  645 + }
619 else if ((ch == ',') || (ch == '-')) 646 else if ((ch == ',') || (ch == '-'))
620 { 647 {
621 if (! ((state == st_in_number) || (state == st_after_number))) 648 if (! ((state == st_in_number) || (state == st_after_number)))
622 { 649 {
623 throw std::runtime_error("unexpected separator"); 650 throw std::runtime_error("unexpected separator");
624 } 651 }
  652 + cur_number = maybe_from_end(cur_number, from_end, max);
625 work.push_back(cur_number); 653 work.push_back(cur_number);
626 cur_number = 0; 654 cur_number = 0;
  655 + from_end = false;
627 if (ch == ',') 656 if (ch == ',')
628 { 657 {
629 state = st_top; 658 state = st_top;
@@ -649,6 +678,7 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max, @@ -649,6 +678,7 @@ static std::vector&lt;int&gt; parse_numrange(char const* range, int max,
649 } 678 }
650 if ((state == st_in_number) || (state == st_after_number)) 679 if ((state == st_in_number) || (state == st_after_number))
651 { 680 {
  681 + cur_number = maybe_from_end(cur_number, from_end, max);
652 work.push_back(cur_number); 682 work.push_back(cur_number);
653 } 683 }
654 else 684 else
qpdf/qtest/qpdf.test
@@ -1092,9 +1092,19 @@ my @nrange_tests = ( @@ -1092,9 +1092,19 @@ my @nrange_tests = (
1092 ["1-10,1234,5", 1092 ["1-10,1234,5",
1093 "qpdf: error in numeric range 1-10,1234,5: number 1234 out of range", 1093 "qpdf: error in numeric range 1-10,1234,5: number 1234 out of range",
1094 2], 1094 2],
1095 - ["1,3,5-10,z-13,13,9,z,2",  
1096 - "numeric range 1,3,5-10,z-13,13,9,z,2" .  
1097 - " -> 1 3 5 6 7 8 9 10 15 14 13 13 9 15 2", 1095 + ["1,r,3",
  1096 + "qpdf: error in numeric range 1,r,3: number 16 out of range",
  1097 + 2],
  1098 + ["1,r16,3",
  1099 + "qpdf: error in numeric range 1,r16,3: number 0 out of range",
  1100 + 2],
  1101 + ["1,3,5-10,z-13,13,9,z,2,r2-r4",
  1102 + "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" .
  1103 + " -> 1 3 5 6 7 8 9 10 15 14 13 13 9 15 2 14 13 12",
  1104 + 0],
  1105 + ["r1-r15", # r\d+ at end
  1106 + "numeric range r1-r15" .
  1107 + " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1",
1098 0], 1108 0],
1099 ); 1109 );
1100 $n_tests += scalar(@nrange_tests); 1110 $n_tests += scalar(@nrange_tests);