Commit 708ea4ef43c2f7d6a88f215f1b932c5118aceafb
1 parent
d9b8b0aa
Completely rewrite QUtil::parse_numrange
Showing
2 changed files
with
93 additions
and
129 deletions
libqpdf/QUtil.cc
| ... | ... | @@ -9,15 +9,12 @@ |
| 9 | 9 | #include <qpdf/QPDFSystemError.hh> |
| 10 | 10 | #include <qpdf/QTC.hh> |
| 11 | 11 | |
| 12 | -#include <cctype> | |
| 13 | 12 | #include <cerrno> |
| 14 | -#include <cstdio> | |
| 15 | 13 | #include <cstdlib> |
| 16 | 14 | #include <cstring> |
| 17 | 15 | #include <fcntl.h> |
| 18 | 16 | #include <fstream> |
| 19 | 17 | #include <iomanip> |
| 20 | -#include <locale> | |
| 21 | 18 | #include <map> |
| 22 | 19 | #include <memory> |
| 23 | 20 | #include <regex> |
| ... | ... | @@ -1303,93 +1300,52 @@ QUtil::str_compare_nocase(char const* s1, char const* s2) |
| 1303 | 1300 | #endif |
| 1304 | 1301 | } |
| 1305 | 1302 | |
| 1306 | -static int | |
| 1307 | -maybe_from_end(int num, bool from_end, int max) | |
| 1308 | -{ | |
| 1309 | - if (from_end) { | |
| 1310 | - if (num > max) { | |
| 1311 | - num = 0; | |
| 1312 | - } else { | |
| 1313 | - num = max + 1 - num; | |
| 1314 | - } | |
| 1315 | - } | |
| 1316 | - return num; | |
| 1317 | -} | |
| 1318 | - | |
| 1319 | 1303 | std::vector<int> |
| 1320 | 1304 | QUtil::parse_numrange(char const* range, int max) |
| 1321 | 1305 | { |
| 1322 | - std::vector<int> result; | |
| 1323 | - char const* p = range; | |
| 1324 | - try { | |
| 1325 | - std::vector<int> work; | |
| 1326 | - static int const comma = -1; | |
| 1327 | - static int const dash = -2; | |
| 1328 | - size_t start_idx = 0; | |
| 1329 | - size_t skip = 1; | |
| 1306 | + static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); | |
| 1307 | + auto parse_num = [&max](std::string const& s) -> int { | |
| 1308 | + if (s == "z") { | |
| 1309 | + return max; | |
| 1310 | + } | |
| 1311 | + int num; | |
| 1312 | + if (s.at(0) == 'r') { | |
| 1313 | + num = max + 1 - string_to_int(s.substr(1).c_str()); | |
| 1314 | + } else { | |
| 1315 | + num = string_to_int(s.c_str()); | |
| 1316 | + } | |
| 1317 | + // max == 0 means we don't know the max and are just testing for valid syntax. | |
| 1318 | + if ((max > 0) && ((num < 1) || (num > max))) { | |
| 1319 | + throw std::runtime_error("number " + std::to_string(num) + " out of range"); | |
| 1320 | + } | |
| 1321 | + return num; | |
| 1322 | + }; | |
| 1330 | 1323 | |
| 1331 | - enum { st_top, st_in_number, st_after_number } state = st_top; | |
| 1332 | - bool last_separator_was_dash = false; | |
| 1333 | - int cur_number = 0; | |
| 1334 | - bool from_end = false; | |
| 1335 | - while (*p) { | |
| 1336 | - char ch = *p; | |
| 1337 | - if (isdigit(ch)) { | |
| 1338 | - if (!((state == st_top) || (state == st_in_number))) { | |
| 1339 | - throw std::runtime_error("digit not expected"); | |
| 1340 | - } | |
| 1341 | - state = st_in_number; | |
| 1342 | - cur_number *= 10; | |
| 1343 | - cur_number += (ch - '0'); | |
| 1344 | - } else if (ch == 'z') { | |
| 1345 | - // z represents max | |
| 1346 | - if (!(state == st_top)) { | |
| 1347 | - throw std::runtime_error("z not expected"); | |
| 1348 | - } | |
| 1349 | - state = st_after_number; | |
| 1350 | - cur_number = max; | |
| 1351 | - } else if (ch == 'r') { | |
| 1352 | - if (!(state == st_top)) { | |
| 1353 | - throw std::runtime_error("r not expected"); | |
| 1354 | - } | |
| 1355 | - state = st_in_number; | |
| 1356 | - from_end = true; | |
| 1357 | - } else if ((ch == ',') || (ch == '-')) { | |
| 1358 | - if (!((state == st_in_number) || (state == st_after_number))) { | |
| 1359 | - throw std::runtime_error("unexpected separator"); | |
| 1324 | + auto populate = [](std::vector<int>& group, int first_num, bool is_span, int last_num) { | |
| 1325 | + group.clear(); | |
| 1326 | + group.emplace_back(first_num); | |
| 1327 | + if (is_span) { | |
| 1328 | + if (first_num > last_num) { | |
| 1329 | + for (auto i = first_num - 1; i >= last_num; --i) { | |
| 1330 | + group.push_back(i); | |
| 1360 | 1331 | } |
| 1361 | - cur_number = maybe_from_end(cur_number, from_end, max); | |
| 1362 | - work.push_back(cur_number); | |
| 1363 | - cur_number = 0; | |
| 1364 | - from_end = false; | |
| 1365 | - if (ch == ',') { | |
| 1366 | - state = st_top; | |
| 1367 | - last_separator_was_dash = false; | |
| 1368 | - work.push_back(comma); | |
| 1369 | - } else if (ch == '-') { | |
| 1370 | - if (last_separator_was_dash) { | |
| 1371 | - throw std::runtime_error("unexpected dash"); | |
| 1372 | - } | |
| 1373 | - state = st_top; | |
| 1374 | - last_separator_was_dash = true; | |
| 1375 | - work.push_back(dash); | |
| 1376 | - } | |
| 1377 | - } else if (ch == ':') { | |
| 1378 | - if (!((state == st_in_number) || (state == st_after_number))) { | |
| 1379 | - throw std::runtime_error("unexpected colon"); | |
| 1380 | - } | |
| 1381 | - break; | |
| 1382 | 1332 | } else { |
| 1383 | - throw std::runtime_error("unexpected character"); | |
| 1333 | + for (auto i = first_num + 1; i <= last_num; ++i) { | |
| 1334 | + group.push_back(i); | |
| 1335 | + } | |
| 1384 | 1336 | } |
| 1385 | - ++p; | |
| 1386 | - } | |
| 1387 | - if ((state == st_in_number) || (state == st_after_number)) { | |
| 1388 | - cur_number = maybe_from_end(cur_number, from_end, max); | |
| 1389 | - work.push_back(cur_number); | |
| 1390 | - } else { | |
| 1391 | - throw std::runtime_error("number expected"); | |
| 1392 | 1337 | } |
| 1338 | + }; | |
| 1339 | + | |
| 1340 | + char const* p; | |
| 1341 | + try { | |
| 1342 | + char const* range_end = range + strlen(range); | |
| 1343 | + std::vector<int> result; | |
| 1344 | + std::vector<int> last_group; | |
| 1345 | + // See if range ends with :even or :odd. | |
| 1346 | + size_t start_idx = 0; | |
| 1347 | + size_t skip = 1; | |
| 1348 | + p = std::find(range, range_end, ':'); | |
| 1393 | 1349 | if (*p == ':') { |
| 1394 | 1350 | if (strcmp(p, ":odd") == 0) { |
| 1395 | 1351 | skip = 2; |
| ... | ... | @@ -1397,46 +1353,55 @@ QUtil::parse_numrange(char const* range, int max) |
| 1397 | 1353 | skip = 2; |
| 1398 | 1354 | start_idx = 1; |
| 1399 | 1355 | } else { |
| 1400 | - throw std::runtime_error("unexpected even/odd modifier"); | |
| 1356 | + throw std::runtime_error("expected :even or :odd"); | |
| 1401 | 1357 | } |
| 1358 | + range_end = p; | |
| 1402 | 1359 | } |
| 1403 | 1360 | |
| 1404 | - p = nullptr; | |
| 1405 | - for (size_t i = 0; i < work.size(); i += 2) { | |
| 1406 | - int num = work.at(i); | |
| 1407 | - // max == 0 means we don't know the max and are just testing for valid syntax. | |
| 1408 | - if ((max > 0) && ((num < 1) || (num > max))) { | |
| 1409 | - throw std::runtime_error("number " + QUtil::int_to_string(num) + " out of range"); | |
| 1361 | + // Divide the range into groups | |
| 1362 | + p = range; | |
| 1363 | + char const* group_end; | |
| 1364 | + bool first = true; | |
| 1365 | + while (p != range_end) { | |
| 1366 | + group_end = std::find(p, range_end, ','); | |
| 1367 | + std::cmatch m; | |
| 1368 | + if (!std::regex_match(p, group_end, m, group_re)) { | |
| 1369 | + throw std::runtime_error("invalid range syntax"); | |
| 1370 | + } | |
| 1371 | + auto is_exclude = m[1].matched; | |
| 1372 | + if (first && is_exclude) { | |
| 1373 | + throw std::runtime_error("first range group may not be an exclusion"); | |
| 1410 | 1374 | } |
| 1411 | - if (i == 0) { | |
| 1412 | - result.push_back(work.at(i)); | |
| 1375 | + first = false; | |
| 1376 | + auto first_num = parse_num(m[2].str()); | |
| 1377 | + auto is_span = m[3].matched; | |
| 1378 | + int last_num; | |
| 1379 | + if (is_span) { | |
| 1380 | + last_num = parse_num(m[3].str()); | |
| 1381 | + } | |
| 1382 | + if (is_exclude) { | |
| 1383 | + // XXX | |
| 1413 | 1384 | } else { |
| 1414 | - int separator = work.at(i - 1); | |
| 1415 | - if (separator == comma) { | |
| 1416 | - result.push_back(num); | |
| 1417 | - } else if (separator == dash) { | |
| 1418 | - int lastnum = result.back(); | |
| 1419 | - if (num > lastnum) { | |
| 1420 | - for (int j = lastnum + 1; j <= num; ++j) { | |
| 1421 | - result.push_back(j); | |
| 1422 | - } | |
| 1423 | - } else { | |
| 1424 | - for (int j = lastnum - 1; j >= num; --j) { | |
| 1425 | - result.push_back(j); | |
| 1426 | - } | |
| 1427 | - } | |
| 1428 | - } else { | |
| 1429 | - throw std::logic_error("INTERNAL ERROR parsing numeric range"); | |
| 1385 | + result.insert(result.end(), last_group.begin(), last_group.end()); | |
| 1386 | + populate(last_group, first_num, is_span, last_num); | |
| 1387 | + } | |
| 1388 | + p = group_end; | |
| 1389 | + if (*p == ',') { | |
| 1390 | + ++p; | |
| 1391 | + if (p == range_end) { | |
| 1392 | + throw std::runtime_error("trailing comma"); | |
| 1430 | 1393 | } |
| 1431 | 1394 | } |
| 1432 | 1395 | } |
| 1433 | - if ((start_idx > 0) || (skip != 1)) { | |
| 1434 | - auto t = result; | |
| 1435 | - result.clear(); | |
| 1436 | - for (size_t i = start_idx; i < t.size(); i += skip) { | |
| 1437 | - result.push_back(t.at(i)); | |
| 1438 | - } | |
| 1396 | + result.insert(result.end(), last_group.begin(), last_group.end()); | |
| 1397 | + if (skip == 1) { | |
| 1398 | + return result; | |
| 1439 | 1399 | } |
| 1400 | + std::vector<int> filtered; | |
| 1401 | + for (auto i = start_idx; i < result.size(); i += skip) { | |
| 1402 | + filtered.emplace_back(result.at(i)); | |
| 1403 | + } | |
| 1404 | + return filtered; | |
| 1440 | 1405 | } catch (std::runtime_error const& e) { |
| 1441 | 1406 | std::string message; |
| 1442 | 1407 | if (p) { |
| ... | ... | @@ -1447,7 +1412,6 @@ QUtil::parse_numrange(char const* range, int max) |
| 1447 | 1412 | } |
| 1448 | 1413 | throw std::runtime_error(message); |
| 1449 | 1414 | } |
| 1450 | - return result; | |
| 1451 | 1415 | } |
| 1452 | 1416 | |
| 1453 | 1417 | enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc }; | ... | ... |
libtests/qtest/numrange.test
| ... | ... | @@ -9,37 +9,37 @@ my $td = new TestDriver('numrange'); |
| 9 | 9 | |
| 10 | 10 | my @nrange_tests = ( |
| 11 | 11 | [",5", |
| 12 | - "error at * in numeric range *,5: unexpected separator", | |
| 12 | + "error at * in numeric range *,5: invalid range syntax", | |
| 13 | 13 | 2], |
| 14 | 14 | ["4,,5", |
| 15 | - "error at * in numeric range 4,*,5: unexpected separator", | |
| 15 | + "error at * in numeric range 4,*,5: invalid range syntax", | |
| 16 | 16 | 2], |
| 17 | 17 | ["4,5,", |
| 18 | - "error at * in numeric range 4,5,*: number expected", | |
| 18 | + "error at * in numeric range 4,5,*: trailing comma", | |
| 19 | 19 | 2], |
| 20 | 20 | ["z1,", |
| 21 | - "error at * in numeric range z*1,: digit not expected", | |
| 21 | + "error at * in numeric range *z1,: invalid range syntax", | |
| 22 | 22 | 2], |
| 23 | 23 | ["1z,", |
| 24 | - "error at * in numeric range 1*z,: z not expected", | |
| 24 | + "error at * in numeric range *1z,: invalid range syntax", | |
| 25 | 25 | 2], |
| 26 | 26 | ["1-5?", |
| 27 | - "error at * in numeric range 1-5*?: unexpected character", | |
| 27 | + "error at * in numeric range *1-5?: invalid range syntax", | |
| 28 | 28 | 2], |
| 29 | 29 | ["1-30", |
| 30 | - "error in numeric range 1-30: number 30 out of range", | |
| 30 | + "error at * in numeric range *1-30: number 30 out of range", | |
| 31 | 31 | 2], |
| 32 | 32 | ["1-10,0,5", |
| 33 | - "error in numeric range 1-10,0,5: number 0 out of range", | |
| 33 | + "error at * in numeric range 1-10,*0,5: number 0 out of range", | |
| 34 | 34 | 2], |
| 35 | 35 | ["1-10,1234,5", |
| 36 | - "error in numeric range 1-10,1234,5: number 1234 out of range", | |
| 36 | + "error at * in numeric range 1-10,*1234,5: number 1234 out of range", | |
| 37 | 37 | 2], |
| 38 | 38 | ["1,r,3", |
| 39 | - "error in numeric range 1,r,3: number 16 out of range", | |
| 39 | + "error at * in numeric range 1,*r,3: invalid range syntax", | |
| 40 | 40 | 2], |
| 41 | 41 | ["1,r16,3", |
| 42 | - "error in numeric range 1,r16,3: number 0 out of range", | |
| 42 | + "error at * in numeric range 1,*r16,3: number 0 out of range", | |
| 43 | 43 | 2], |
| 44 | 44 | ["1,3,5-10,z-13,13,9,z,2,r2-r4", |
| 45 | 45 | "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" . |
| ... | ... | @@ -50,16 +50,16 @@ my @nrange_tests = ( |
| 50 | 50 | " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1", |
| 51 | 51 | 0], |
| 52 | 52 | ["1-10:quack", |
| 53 | - "error at * in numeric range 1-10*:quack: unexpected even/odd modifier", | |
| 53 | + "error at * in numeric range 1-10*:quack: expected :even or :odd", | |
| 54 | 54 | 2], |
| 55 | 55 | ["1-10:", |
| 56 | - "error at * in numeric range 1-10*:: unexpected even/odd modifier", | |
| 56 | + "error at * in numeric range 1-10*:: expected :even or :odd", | |
| 57 | 57 | 2], |
| 58 | 58 | ["1-10,r:", |
| 59 | - "error at * in numeric range 1-10,r*:: unexpected even/odd modifier", | |
| 59 | + "error at * in numeric range 1-10,r*:: expected :even or :odd", | |
| 60 | 60 | 2], |
| 61 | 61 | ["1-10,:", |
| 62 | - "error at * in numeric range 1-10,*:: unexpected colon", | |
| 62 | + "error at * in numeric range 1-10,*:: expected :even or :odd", | |
| 63 | 63 | 2], |
| 64 | 64 | ["1-6,8-12:odd", |
| 65 | 65 | "numeric range 1-6,8-12:odd -> 1 3 5 8 10 12", | ... | ... |