Commit 708ea4ef43c2f7d6a88f215f1b932c5118aceafb

Authored by Jay Berkenbilt
1 parent d9b8b0aa

Completely rewrite QUtil::parse_numrange

libqpdf/QUtil.cc
@@ -9,15 +9,12 @@ @@ -9,15 +9,12 @@
9 #include <qpdf/QPDFSystemError.hh> 9 #include <qpdf/QPDFSystemError.hh>
10 #include <qpdf/QTC.hh> 10 #include <qpdf/QTC.hh>
11 11
12 -#include <cctype>  
13 #include <cerrno> 12 #include <cerrno>
14 -#include <cstdio>  
15 #include <cstdlib> 13 #include <cstdlib>
16 #include <cstring> 14 #include <cstring>
17 #include <fcntl.h> 15 #include <fcntl.h>
18 #include <fstream> 16 #include <fstream>
19 #include <iomanip> 17 #include <iomanip>
20 -#include <locale>  
21 #include <map> 18 #include <map>
22 #include <memory> 19 #include <memory>
23 #include <regex> 20 #include <regex>
@@ -1303,93 +1300,52 @@ QUtil::str_compare_nocase(char const* s1, char const* s2) @@ -1303,93 +1300,52 @@ QUtil::str_compare_nocase(char const* s1, char const* s2)
1303 #endif 1300 #endif
1304 } 1301 }
1305 1302
1306 -static int  
1307 -maybe_from_end(int num, bool from_end, int max)  
1308 -{  
1309 - if (from_end) {  
1310 - if (num > max) {  
1311 - num = 0;  
1312 - } else {  
1313 - num = max + 1 - num;  
1314 - }  
1315 - }  
1316 - return num;  
1317 -}  
1318 -  
1319 std::vector<int> 1303 std::vector<int>
1320 QUtil::parse_numrange(char const* range, int max) 1304 QUtil::parse_numrange(char const* range, int max)
1321 { 1305 {
1322 - std::vector<int> result;  
1323 - char const* p = range;  
1324 - try {  
1325 - std::vector<int> work;  
1326 - static int const comma = -1;  
1327 - static int const dash = -2;  
1328 - size_t start_idx = 0;  
1329 - size_t skip = 1; 1306 + static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)");
  1307 + auto parse_num = [&max](std::string const& s) -> int {
  1308 + if (s == "z") {
  1309 + return max;
  1310 + }
  1311 + int num;
  1312 + if (s.at(0) == 'r') {
  1313 + num = max + 1 - string_to_int(s.substr(1).c_str());
  1314 + } else {
  1315 + num = string_to_int(s.c_str());
  1316 + }
  1317 + // max == 0 means we don't know the max and are just testing for valid syntax.
  1318 + if ((max > 0) && ((num < 1) || (num > max))) {
  1319 + throw std::runtime_error("number " + std::to_string(num) + " out of range");
  1320 + }
  1321 + return num;
  1322 + };
1330 1323
1331 - enum { st_top, st_in_number, st_after_number } state = st_top;  
1332 - bool last_separator_was_dash = false;  
1333 - int cur_number = 0;  
1334 - bool from_end = false;  
1335 - while (*p) {  
1336 - char ch = *p;  
1337 - if (isdigit(ch)) {  
1338 - if (!((state == st_top) || (state == st_in_number))) {  
1339 - throw std::runtime_error("digit not expected");  
1340 - }  
1341 - state = st_in_number;  
1342 - cur_number *= 10;  
1343 - cur_number += (ch - '0');  
1344 - } else if (ch == 'z') {  
1345 - // z represents max  
1346 - if (!(state == st_top)) {  
1347 - throw std::runtime_error("z not expected");  
1348 - }  
1349 - state = st_after_number;  
1350 - cur_number = max;  
1351 - } else if (ch == 'r') {  
1352 - if (!(state == st_top)) {  
1353 - throw std::runtime_error("r not expected");  
1354 - }  
1355 - state = st_in_number;  
1356 - from_end = true;  
1357 - } else if ((ch == ',') || (ch == '-')) {  
1358 - if (!((state == st_in_number) || (state == st_after_number))) {  
1359 - throw std::runtime_error("unexpected separator"); 1324 + auto populate = [](std::vector<int>& group, int first_num, bool is_span, int last_num) {
  1325 + group.clear();
  1326 + group.emplace_back(first_num);
  1327 + if (is_span) {
  1328 + if (first_num > last_num) {
  1329 + for (auto i = first_num - 1; i >= last_num; --i) {
  1330 + group.push_back(i);
1360 } 1331 }
1361 - cur_number = maybe_from_end(cur_number, from_end, max);  
1362 - work.push_back(cur_number);  
1363 - cur_number = 0;  
1364 - from_end = false;  
1365 - if (ch == ',') {  
1366 - state = st_top;  
1367 - last_separator_was_dash = false;  
1368 - work.push_back(comma);  
1369 - } else if (ch == '-') {  
1370 - if (last_separator_was_dash) {  
1371 - throw std::runtime_error("unexpected dash");  
1372 - }  
1373 - state = st_top;  
1374 - last_separator_was_dash = true;  
1375 - work.push_back(dash);  
1376 - }  
1377 - } else if (ch == ':') {  
1378 - if (!((state == st_in_number) || (state == st_after_number))) {  
1379 - throw std::runtime_error("unexpected colon");  
1380 - }  
1381 - break;  
1382 } else { 1332 } else {
1383 - throw std::runtime_error("unexpected character"); 1333 + for (auto i = first_num + 1; i <= last_num; ++i) {
  1334 + group.push_back(i);
  1335 + }
1384 } 1336 }
1385 - ++p;  
1386 - }  
1387 - if ((state == st_in_number) || (state == st_after_number)) {  
1388 - cur_number = maybe_from_end(cur_number, from_end, max);  
1389 - work.push_back(cur_number);  
1390 - } else {  
1391 - throw std::runtime_error("number expected");  
1392 } 1337 }
  1338 + };
  1339 +
  1340 + char const* p;
  1341 + try {
  1342 + char const* range_end = range + strlen(range);
  1343 + std::vector<int> result;
  1344 + std::vector<int> last_group;
  1345 + // See if range ends with :even or :odd.
  1346 + size_t start_idx = 0;
  1347 + size_t skip = 1;
  1348 + p = std::find(range, range_end, ':');
1393 if (*p == ':') { 1349 if (*p == ':') {
1394 if (strcmp(p, ":odd") == 0) { 1350 if (strcmp(p, ":odd") == 0) {
1395 skip = 2; 1351 skip = 2;
@@ -1397,46 +1353,55 @@ QUtil::parse_numrange(char const* range, int max) @@ -1397,46 +1353,55 @@ QUtil::parse_numrange(char const* range, int max)
1397 skip = 2; 1353 skip = 2;
1398 start_idx = 1; 1354 start_idx = 1;
1399 } else { 1355 } else {
1400 - throw std::runtime_error("unexpected even/odd modifier"); 1356 + throw std::runtime_error("expected :even or :odd");
1401 } 1357 }
  1358 + range_end = p;
1402 } 1359 }
1403 1360
1404 - p = nullptr;  
1405 - for (size_t i = 0; i < work.size(); i += 2) {  
1406 - int num = work.at(i);  
1407 - // max == 0 means we don't know the max and are just testing for valid syntax.  
1408 - if ((max > 0) && ((num < 1) || (num > max))) {  
1409 - throw std::runtime_error("number " + QUtil::int_to_string(num) + " out of range"); 1361 + // Divide the range into groups
  1362 + p = range;
  1363 + char const* group_end;
  1364 + bool first = true;
  1365 + while (p != range_end) {
  1366 + group_end = std::find(p, range_end, ',');
  1367 + std::cmatch m;
  1368 + if (!std::regex_match(p, group_end, m, group_re)) {
  1369 + throw std::runtime_error("invalid range syntax");
  1370 + }
  1371 + auto is_exclude = m[1].matched;
  1372 + if (first && is_exclude) {
  1373 + throw std::runtime_error("first range group may not be an exclusion");
1410 } 1374 }
1411 - if (i == 0) {  
1412 - result.push_back(work.at(i)); 1375 + first = false;
  1376 + auto first_num = parse_num(m[2].str());
  1377 + auto is_span = m[3].matched;
  1378 + int last_num;
  1379 + if (is_span) {
  1380 + last_num = parse_num(m[3].str());
  1381 + }
  1382 + if (is_exclude) {
  1383 + // XXX
1413 } else { 1384 } else {
1414 - int separator = work.at(i - 1);  
1415 - if (separator == comma) {  
1416 - result.push_back(num);  
1417 - } else if (separator == dash) {  
1418 - int lastnum = result.back();  
1419 - if (num > lastnum) {  
1420 - for (int j = lastnum + 1; j <= num; ++j) {  
1421 - result.push_back(j);  
1422 - }  
1423 - } else {  
1424 - for (int j = lastnum - 1; j >= num; --j) {  
1425 - result.push_back(j);  
1426 - }  
1427 - }  
1428 - } else {  
1429 - throw std::logic_error("INTERNAL ERROR parsing numeric range"); 1385 + result.insert(result.end(), last_group.begin(), last_group.end());
  1386 + populate(last_group, first_num, is_span, last_num);
  1387 + }
  1388 + p = group_end;
  1389 + if (*p == ',') {
  1390 + ++p;
  1391 + if (p == range_end) {
  1392 + throw std::runtime_error("trailing comma");
1430 } 1393 }
1431 } 1394 }
1432 } 1395 }
1433 - if ((start_idx > 0) || (skip != 1)) {  
1434 - auto t = result;  
1435 - result.clear();  
1436 - for (size_t i = start_idx; i < t.size(); i += skip) {  
1437 - result.push_back(t.at(i));  
1438 - } 1396 + result.insert(result.end(), last_group.begin(), last_group.end());
  1397 + if (skip == 1) {
  1398 + return result;
1439 } 1399 }
  1400 + std::vector<int> filtered;
  1401 + for (auto i = start_idx; i < result.size(); i += skip) {
  1402 + filtered.emplace_back(result.at(i));
  1403 + }
  1404 + return filtered;
1440 } catch (std::runtime_error const& e) { 1405 } catch (std::runtime_error const& e) {
1441 std::string message; 1406 std::string message;
1442 if (p) { 1407 if (p) {
@@ -1447,7 +1412,6 @@ QUtil::parse_numrange(char const* range, int max) @@ -1447,7 +1412,6 @@ QUtil::parse_numrange(char const* range, int max)
1447 } 1412 }
1448 throw std::runtime_error(message); 1413 throw std::runtime_error(message);
1449 } 1414 }
1450 - return result;  
1451 } 1415 }
1452 1416
1453 enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc }; 1417 enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc };
libtests/qtest/numrange.test
@@ -9,37 +9,37 @@ my $td = new TestDriver(&#39;numrange&#39;); @@ -9,37 +9,37 @@ my $td = new TestDriver(&#39;numrange&#39;);
9 9
10 my @nrange_tests = ( 10 my @nrange_tests = (
11 [",5", 11 [",5",
12 - "error at * in numeric range *,5: unexpected separator", 12 + "error at * in numeric range *,5: invalid range syntax",
13 2], 13 2],
14 ["4,,5", 14 ["4,,5",
15 - "error at * in numeric range 4,*,5: unexpected separator", 15 + "error at * in numeric range 4,*,5: invalid range syntax",
16 2], 16 2],
17 ["4,5,", 17 ["4,5,",
18 - "error at * in numeric range 4,5,*: number expected", 18 + "error at * in numeric range 4,5,*: trailing comma",
19 2], 19 2],
20 ["z1,", 20 ["z1,",
21 - "error at * in numeric range z*1,: digit not expected", 21 + "error at * in numeric range *z1,: invalid range syntax",
22 2], 22 2],
23 ["1z,", 23 ["1z,",
24 - "error at * in numeric range 1*z,: z not expected", 24 + "error at * in numeric range *1z,: invalid range syntax",
25 2], 25 2],
26 ["1-5?", 26 ["1-5?",
27 - "error at * in numeric range 1-5*?: unexpected character", 27 + "error at * in numeric range *1-5?: invalid range syntax",
28 2], 28 2],
29 ["1-30", 29 ["1-30",
30 - "error in numeric range 1-30: number 30 out of range", 30 + "error at * in numeric range *1-30: number 30 out of range",
31 2], 31 2],
32 ["1-10,0,5", 32 ["1-10,0,5",
33 - "error in numeric range 1-10,0,5: number 0 out of range", 33 + "error at * in numeric range 1-10,*0,5: number 0 out of range",
34 2], 34 2],
35 ["1-10,1234,5", 35 ["1-10,1234,5",
36 - "error in numeric range 1-10,1234,5: number 1234 out of range", 36 + "error at * in numeric range 1-10,*1234,5: number 1234 out of range",
37 2], 37 2],
38 ["1,r,3", 38 ["1,r,3",
39 - "error in numeric range 1,r,3: number 16 out of range", 39 + "error at * in numeric range 1,*r,3: invalid range syntax",
40 2], 40 2],
41 ["1,r16,3", 41 ["1,r16,3",
42 - "error in numeric range 1,r16,3: number 0 out of range", 42 + "error at * in numeric range 1,*r16,3: number 0 out of range",
43 2], 43 2],
44 ["1,3,5-10,z-13,13,9,z,2,r2-r4", 44 ["1,3,5-10,z-13,13,9,z,2,r2-r4",
45 "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" . 45 "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" .
@@ -50,16 +50,16 @@ my @nrange_tests = ( @@ -50,16 +50,16 @@ my @nrange_tests = (
50 " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1", 50 " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1",
51 0], 51 0],
52 ["1-10:quack", 52 ["1-10:quack",
53 - "error at * in numeric range 1-10*:quack: unexpected even/odd modifier", 53 + "error at * in numeric range 1-10*:quack: expected :even or :odd",
54 2], 54 2],
55 ["1-10:", 55 ["1-10:",
56 - "error at * in numeric range 1-10*:: unexpected even/odd modifier", 56 + "error at * in numeric range 1-10*:: expected :even or :odd",
57 2], 57 2],
58 ["1-10,r:", 58 ["1-10,r:",
59 - "error at * in numeric range 1-10,r*:: unexpected even/odd modifier", 59 + "error at * in numeric range 1-10,r*:: expected :even or :odd",
60 2], 60 2],
61 ["1-10,:", 61 ["1-10,:",
62 - "error at * in numeric range 1-10,*:: unexpected colon", 62 + "error at * in numeric range 1-10,*:: expected :even or :odd",
63 2], 63 2],
64 ["1-6,8-12:odd", 64 ["1-6,8-12:odd",
65 "numeric range 1-6,8-12:odd -> 1 3 5 8 10 12", 65 "numeric range 1-6,8-12:odd -> 1 3 5 8 10 12",