Commit 708ea4ef43c2f7d6a88f215f1b932c5118aceafb

Authored by Jay Berkenbilt
1 parent d9b8b0aa

Completely rewrite QUtil::parse_numrange

libqpdf/QUtil.cc
... ... @@ -9,15 +9,12 @@
9 9 #include <qpdf/QPDFSystemError.hh>
10 10 #include <qpdf/QTC.hh>
11 11  
12   -#include <cctype>
13 12 #include <cerrno>
14   -#include <cstdio>
15 13 #include <cstdlib>
16 14 #include <cstring>
17 15 #include <fcntl.h>
18 16 #include <fstream>
19 17 #include <iomanip>
20   -#include <locale>
21 18 #include <map>
22 19 #include <memory>
23 20 #include <regex>
... ... @@ -1303,93 +1300,52 @@ QUtil::str_compare_nocase(char const* s1, char const* s2)
1303 1300 #endif
1304 1301 }
1305 1302  
1306   -static int
1307   -maybe_from_end(int num, bool from_end, int max)
1308   -{
1309   - if (from_end) {
1310   - if (num > max) {
1311   - num = 0;
1312   - } else {
1313   - num = max + 1 - num;
1314   - }
1315   - }
1316   - return num;
1317   -}
1318   -
1319 1303 std::vector<int>
1320 1304 QUtil::parse_numrange(char const* range, int max)
1321 1305 {
1322   - std::vector<int> result;
1323   - char const* p = range;
1324   - try {
1325   - std::vector<int> work;
1326   - static int const comma = -1;
1327   - static int const dash = -2;
1328   - size_t start_idx = 0;
1329   - size_t skip = 1;
  1306 + static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)");
  1307 + auto parse_num = [&max](std::string const& s) -> int {
  1308 + if (s == "z") {
  1309 + return max;
  1310 + }
  1311 + int num;
  1312 + if (s.at(0) == 'r') {
  1313 + num = max + 1 - string_to_int(s.substr(1).c_str());
  1314 + } else {
  1315 + num = string_to_int(s.c_str());
  1316 + }
  1317 + // max == 0 means we don't know the max and are just testing for valid syntax.
  1318 + if ((max > 0) && ((num < 1) || (num > max))) {
  1319 + throw std::runtime_error("number " + std::to_string(num) + " out of range");
  1320 + }
  1321 + return num;
  1322 + };
1330 1323  
1331   - enum { st_top, st_in_number, st_after_number } state = st_top;
1332   - bool last_separator_was_dash = false;
1333   - int cur_number = 0;
1334   - bool from_end = false;
1335   - while (*p) {
1336   - char ch = *p;
1337   - if (isdigit(ch)) {
1338   - if (!((state == st_top) || (state == st_in_number))) {
1339   - throw std::runtime_error("digit not expected");
1340   - }
1341   - state = st_in_number;
1342   - cur_number *= 10;
1343   - cur_number += (ch - '0');
1344   - } else if (ch == 'z') {
1345   - // z represents max
1346   - if (!(state == st_top)) {
1347   - throw std::runtime_error("z not expected");
1348   - }
1349   - state = st_after_number;
1350   - cur_number = max;
1351   - } else if (ch == 'r') {
1352   - if (!(state == st_top)) {
1353   - throw std::runtime_error("r not expected");
1354   - }
1355   - state = st_in_number;
1356   - from_end = true;
1357   - } else if ((ch == ',') || (ch == '-')) {
1358   - if (!((state == st_in_number) || (state == st_after_number))) {
1359   - throw std::runtime_error("unexpected separator");
  1324 + auto populate = [](std::vector<int>& group, int first_num, bool is_span, int last_num) {
  1325 + group.clear();
  1326 + group.emplace_back(first_num);
  1327 + if (is_span) {
  1328 + if (first_num > last_num) {
  1329 + for (auto i = first_num - 1; i >= last_num; --i) {
  1330 + group.push_back(i);
1360 1331 }
1361   - cur_number = maybe_from_end(cur_number, from_end, max);
1362   - work.push_back(cur_number);
1363   - cur_number = 0;
1364   - from_end = false;
1365   - if (ch == ',') {
1366   - state = st_top;
1367   - last_separator_was_dash = false;
1368   - work.push_back(comma);
1369   - } else if (ch == '-') {
1370   - if (last_separator_was_dash) {
1371   - throw std::runtime_error("unexpected dash");
1372   - }
1373   - state = st_top;
1374   - last_separator_was_dash = true;
1375   - work.push_back(dash);
1376   - }
1377   - } else if (ch == ':') {
1378   - if (!((state == st_in_number) || (state == st_after_number))) {
1379   - throw std::runtime_error("unexpected colon");
1380   - }
1381   - break;
1382 1332 } else {
1383   - throw std::runtime_error("unexpected character");
  1333 + for (auto i = first_num + 1; i <= last_num; ++i) {
  1334 + group.push_back(i);
  1335 + }
1384 1336 }
1385   - ++p;
1386   - }
1387   - if ((state == st_in_number) || (state == st_after_number)) {
1388   - cur_number = maybe_from_end(cur_number, from_end, max);
1389   - work.push_back(cur_number);
1390   - } else {
1391   - throw std::runtime_error("number expected");
1392 1337 }
  1338 + };
  1339 +
  1340 + char const* p;
  1341 + try {
  1342 + char const* range_end = range + strlen(range);
  1343 + std::vector<int> result;
  1344 + std::vector<int> last_group;
  1345 + // See if range ends with :even or :odd.
  1346 + size_t start_idx = 0;
  1347 + size_t skip = 1;
  1348 + p = std::find(range, range_end, ':');
1393 1349 if (*p == ':') {
1394 1350 if (strcmp(p, ":odd") == 0) {
1395 1351 skip = 2;
... ... @@ -1397,46 +1353,55 @@ QUtil::parse_numrange(char const* range, int max)
1397 1353 skip = 2;
1398 1354 start_idx = 1;
1399 1355 } else {
1400   - throw std::runtime_error("unexpected even/odd modifier");
  1356 + throw std::runtime_error("expected :even or :odd");
1401 1357 }
  1358 + range_end = p;
1402 1359 }
1403 1360  
1404   - p = nullptr;
1405   - for (size_t i = 0; i < work.size(); i += 2) {
1406   - int num = work.at(i);
1407   - // max == 0 means we don't know the max and are just testing for valid syntax.
1408   - if ((max > 0) && ((num < 1) || (num > max))) {
1409   - throw std::runtime_error("number " + QUtil::int_to_string(num) + " out of range");
  1361 + // Divide the range into groups
  1362 + p = range;
  1363 + char const* group_end;
  1364 + bool first = true;
  1365 + while (p != range_end) {
  1366 + group_end = std::find(p, range_end, ',');
  1367 + std::cmatch m;
  1368 + if (!std::regex_match(p, group_end, m, group_re)) {
  1369 + throw std::runtime_error("invalid range syntax");
  1370 + }
  1371 + auto is_exclude = m[1].matched;
  1372 + if (first && is_exclude) {
  1373 + throw std::runtime_error("first range group may not be an exclusion");
1410 1374 }
1411   - if (i == 0) {
1412   - result.push_back(work.at(i));
  1375 + first = false;
  1376 + auto first_num = parse_num(m[2].str());
  1377 + auto is_span = m[3].matched;
  1378 + int last_num;
  1379 + if (is_span) {
  1380 + last_num = parse_num(m[3].str());
  1381 + }
  1382 + if (is_exclude) {
  1383 + // XXX
1413 1384 } else {
1414   - int separator = work.at(i - 1);
1415   - if (separator == comma) {
1416   - result.push_back(num);
1417   - } else if (separator == dash) {
1418   - int lastnum = result.back();
1419   - if (num > lastnum) {
1420   - for (int j = lastnum + 1; j <= num; ++j) {
1421   - result.push_back(j);
1422   - }
1423   - } else {
1424   - for (int j = lastnum - 1; j >= num; --j) {
1425   - result.push_back(j);
1426   - }
1427   - }
1428   - } else {
1429   - throw std::logic_error("INTERNAL ERROR parsing numeric range");
  1385 + result.insert(result.end(), last_group.begin(), last_group.end());
  1386 + populate(last_group, first_num, is_span, last_num);
  1387 + }
  1388 + p = group_end;
  1389 + if (*p == ',') {
  1390 + ++p;
  1391 + if (p == range_end) {
  1392 + throw std::runtime_error("trailing comma");
1430 1393 }
1431 1394 }
1432 1395 }
1433   - if ((start_idx > 0) || (skip != 1)) {
1434   - auto t = result;
1435   - result.clear();
1436   - for (size_t i = start_idx; i < t.size(); i += skip) {
1437   - result.push_back(t.at(i));
1438   - }
  1396 + result.insert(result.end(), last_group.begin(), last_group.end());
  1397 + if (skip == 1) {
  1398 + return result;
1439 1399 }
  1400 + std::vector<int> filtered;
  1401 + for (auto i = start_idx; i < result.size(); i += skip) {
  1402 + filtered.emplace_back(result.at(i));
  1403 + }
  1404 + return filtered;
1440 1405 } catch (std::runtime_error const& e) {
1441 1406 std::string message;
1442 1407 if (p) {
... ... @@ -1447,7 +1412,6 @@ QUtil::parse_numrange(char const* range, int max)
1447 1412 }
1448 1413 throw std::runtime_error(message);
1449 1414 }
1450   - return result;
1451 1415 }
1452 1416  
1453 1417 enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc };
... ...
libtests/qtest/numrange.test
... ... @@ -9,37 +9,37 @@ my $td = new TestDriver(&#39;numrange&#39;);
9 9  
10 10 my @nrange_tests = (
11 11 [",5",
12   - "error at * in numeric range *,5: unexpected separator",
  12 + "error at * in numeric range *,5: invalid range syntax",
13 13 2],
14 14 ["4,,5",
15   - "error at * in numeric range 4,*,5: unexpected separator",
  15 + "error at * in numeric range 4,*,5: invalid range syntax",
16 16 2],
17 17 ["4,5,",
18   - "error at * in numeric range 4,5,*: number expected",
  18 + "error at * in numeric range 4,5,*: trailing comma",
19 19 2],
20 20 ["z1,",
21   - "error at * in numeric range z*1,: digit not expected",
  21 + "error at * in numeric range *z1,: invalid range syntax",
22 22 2],
23 23 ["1z,",
24   - "error at * in numeric range 1*z,: z not expected",
  24 + "error at * in numeric range *1z,: invalid range syntax",
25 25 2],
26 26 ["1-5?",
27   - "error at * in numeric range 1-5*?: unexpected character",
  27 + "error at * in numeric range *1-5?: invalid range syntax",
28 28 2],
29 29 ["1-30",
30   - "error in numeric range 1-30: number 30 out of range",
  30 + "error at * in numeric range *1-30: number 30 out of range",
31 31 2],
32 32 ["1-10,0,5",
33   - "error in numeric range 1-10,0,5: number 0 out of range",
  33 + "error at * in numeric range 1-10,*0,5: number 0 out of range",
34 34 2],
35 35 ["1-10,1234,5",
36   - "error in numeric range 1-10,1234,5: number 1234 out of range",
  36 + "error at * in numeric range 1-10,*1234,5: number 1234 out of range",
37 37 2],
38 38 ["1,r,3",
39   - "error in numeric range 1,r,3: number 16 out of range",
  39 + "error at * in numeric range 1,*r,3: invalid range syntax",
40 40 2],
41 41 ["1,r16,3",
42   - "error in numeric range 1,r16,3: number 0 out of range",
  42 + "error at * in numeric range 1,*r16,3: number 0 out of range",
43 43 2],
44 44 ["1,3,5-10,z-13,13,9,z,2,r2-r4",
45 45 "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" .
... ... @@ -50,16 +50,16 @@ my @nrange_tests = (
50 50 " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1",
51 51 0],
52 52 ["1-10:quack",
53   - "error at * in numeric range 1-10*:quack: unexpected even/odd modifier",
  53 + "error at * in numeric range 1-10*:quack: expected :even or :odd",
54 54 2],
55 55 ["1-10:",
56   - "error at * in numeric range 1-10*:: unexpected even/odd modifier",
  56 + "error at * in numeric range 1-10*:: expected :even or :odd",
57 57 2],
58 58 ["1-10,r:",
59   - "error at * in numeric range 1-10,r*:: unexpected even/odd modifier",
  59 + "error at * in numeric range 1-10,r*:: expected :even or :odd",
60 60 2],
61 61 ["1-10,:",
62   - "error at * in numeric range 1-10,*:: unexpected colon",
  62 + "error at * in numeric range 1-10,*:: expected :even or :odd",
63 63 2],
64 64 ["1-6,8-12:odd",
65 65 "numeric range 1-6,8-12:odd -> 1 3 5 8 10 12",
... ...