Commit 5e9f8225982e9711b548c886fe6b477cdefd0fd4

Authored by Jarryd Beck
1 parent f931fd42

Improve integer parsing

Fixes #39. Closes #40. This is an overhaul of the way that integer
arguments are parsed. Instead of using std::istream, which allows,
for example, negative integers for unsigned types, we use our own
parser.

This allows us to do proper range checking depending on the type,
and to correctly check for negative values passed to unsigned types.

This also allows the handling of base 16 numbers.
include/cxxopts.hpp
1 /* 1 /*
2 2
3 -Copyright (c) 2014, 2015, 2016 Jarryd Beck 3 +Copyright (c) 2014, 2015, 2016, 2017 Jarryd Beck
4 4
5 Permission is hereby granted, free of charge, to any person obtaining a copy 5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal 6 of this software and associated documentation files (the "Software"), to deal
@@ -31,6 +31,7 @@ THE SOFTWARE. @@ -31,6 +31,7 @@ THE SOFTWARE.
31 #endif 31 #endif
32 32
33 #include <cstring> 33 #include <cstring>
  34 +#include <cctype>
34 #include <exception> 35 #include <exception>
35 #include <iostream> 36 #include <iostream>
36 #include <map> 37 #include <map>
@@ -411,20 +412,170 @@ namespace cxxopts @@ -411,20 +412,170 @@ namespace cxxopts
411 412
412 namespace values 413 namespace values
413 { 414 {
  415 + std::basic_regex<char> integer_pattern
  416 + ("(-)?(0x)?([1-9a-zA-Z][0-9a-zA-Z]*)|(0)");
  417 +
  418 + namespace detail
  419 + {
  420 + template <typename T, bool B>
  421 + struct SignedCheck;
  422 +
  423 + template <typename T>
  424 + struct SignedCheck<T, true>
  425 + {
  426 + template <typename U>
  427 + void
  428 + operator()(bool negative, U u, const std::string& text)
  429 + {
  430 + if (negative)
  431 + {
  432 + if (u > U(-std::numeric_limits<T>::min()))
  433 + {
  434 + throw argument_incorrect_type(text);
  435 + }
  436 + }
  437 + else
  438 + {
  439 + if (u > std::numeric_limits<T>::max())
  440 + {
  441 + throw argument_incorrect_type(text);
  442 + }
  443 + }
  444 + }
  445 + };
  446 +
  447 + template <typename T>
  448 + struct SignedCheck<T, false>
  449 + {
  450 + template <typename U>
  451 + void
  452 + operator()(bool, U, const std::string&) {}
  453 + };
  454 +
  455 + template <typename T, typename U>
  456 + void
  457 + check_signed_range(bool negative, U value, const std::string& text)
  458 + {
  459 + SignedCheck<T, std::numeric_limits<T>::is_signed>()(negative, value, text);
  460 + }
  461 + }
  462 +
414 template <typename T> 463 template <typename T>
415 void 464 void
416 - parse_value(const std::string& text, T& value) 465 + integer_parser(const std::string& text, T& value)
417 { 466 {
418 - std::istringstream is(text);  
419 - if (!(is >> value)) 467 + std::smatch match;
  468 + std::regex_match(text, match, integer_pattern);
  469 +
  470 + if (match.length() == 0)
420 { 471 {
421 throw argument_incorrect_type(text); 472 throw argument_incorrect_type(text);
422 } 473 }
423 474
424 - if (is.rdbuf()->in_avail() != 0) 475 + if (match.length(4) > 0)
425 { 476 {
426 - throw argument_incorrect_type(text); 477 + value = 0;
  478 + return;
  479 + }
  480 +
  481 + using US = typename std::make_unsigned<T>::type;
  482 +
  483 + constexpr auto umax = std::numeric_limits<US>::max();
  484 + constexpr bool is_signed = std::numeric_limits<T>::is_signed;
  485 + const bool negative = match.length(1) > 0;
  486 + const auto base = match.length(2) > 0 ? 16 : 10;
  487 +
  488 + auto value_match = match[3];
  489 +
  490 + US result = 0;
  491 +
  492 + for (auto iter = value_match.first; iter != value_match.second; ++iter)
  493 + {
  494 + int digit = 0;
  495 +
  496 + if (*iter >= '0' && *iter <= '9')
  497 + {
  498 + digit = *iter - '0';
  499 + }
  500 + else if (*iter >= 'a' && *iter <= 'f')
  501 + {
  502 + digit = *iter - 'a' + 10;
  503 + }
  504 + else if (*iter >= 'A' && *iter <= 'F')
  505 + {
  506 + digit = *iter - 'A' + 10;
  507 + }
  508 +
  509 + if (umax - digit < result * base)
  510 + {
  511 + throw argument_incorrect_type(text);
  512 + }
  513 +
  514 + result = result * base + digit;
427 } 515 }
  516 +
  517 + detail::check_signed_range<T>(negative, result, text);
  518 +
  519 + if (negative)
  520 + {
  521 + if (!is_signed)
  522 + {
  523 + throw argument_incorrect_type(text);
  524 + }
  525 + value = -result;
  526 + }
  527 + else
  528 + {
  529 + value = result;
  530 + }
  531 + }
  532 +
  533 + void
  534 + parse_value(const std::string& text, uint8_t& value)
  535 + {
  536 + integer_parser(text, value);
  537 + }
  538 +
  539 + void
  540 + parse_value(const std::string& text, int8_t& value)
  541 + {
  542 + integer_parser(text, value);
  543 + }
  544 +
  545 + void
  546 + parse_value(const std::string& text, uint16_t& value)
  547 + {
  548 + integer_parser(text, value);
  549 + }
  550 +
  551 + void
  552 + parse_value(const std::string& text, int16_t& value)
  553 + {
  554 + integer_parser(text, value);
  555 + }
  556 +
  557 + void
  558 + parse_value(const std::string& text, uint32_t& value)
  559 + {
  560 + integer_parser(text, value);
  561 + }
  562 +
  563 + void
  564 + parse_value(const std::string& text, int32_t& value)
  565 + {
  566 + integer_parser(text, value);
  567 + }
  568 +
  569 + void
  570 + parse_value(const std::string& text, uint64_t& value)
  571 + {
  572 + integer_parser(text, value);
  573 + }
  574 +
  575 + void
  576 + parse_value(const std::string& text, int64_t& value)
  577 + {
  578 + integer_parser(text, value);
428 } 579 }
429 580
430 inline 581 inline
test/options.cpp
@@ -239,3 +239,100 @@ TEST_CASE(&quot;Empty with implicit value&quot;, &quot;[implicit]&quot;) @@ -239,3 +239,100 @@ TEST_CASE(&quot;Empty with implicit value&quot;, &quot;[implicit]&quot;)
239 REQUIRE(options.count("implicit") == 1); 239 REQUIRE(options.count("implicit") == 1);
240 REQUIRE(options["implicit"].as<std::string>() == ""); 240 REQUIRE(options["implicit"].as<std::string>() == "");
241 } 241 }
  242 +
  243 +TEST_CASE("Integers", "[options]")
  244 +{
  245 + cxxopts::Options options("parses_integers", "parses integers correctly");
  246 + options.add_options()
  247 + ("positional", "Integers", cxxopts::value<std::vector<int>>());
  248 +
  249 + Argv av({"ints", "--", "5", "6", "-6", "0", "0xab", "0xAf"});
  250 +
  251 + char** argv = av.argv();
  252 + auto argc = av.argc();
  253 +
  254 + options.parse_positional("positional");
  255 + options.parse(argc, argv);
  256 +
  257 + REQUIRE(options.count("positional") == 6);
  258 +
  259 + auto& positional = options["positional"].as<std::vector<int>>();
  260 + CHECK(positional[0] == 5);
  261 + CHECK(positional[1] == 6);
  262 + CHECK(positional[2] == -6);
  263 + CHECK(positional[3] == 0);
  264 + CHECK(positional[4] == 0xab);
  265 + CHECK(positional[5] == 0xaf);
  266 +}
  267 +
  268 +TEST_CASE("Unsigned integers", "[options]")
  269 +{
  270 + cxxopts::Options options("parses_unsigned", "detects unsigned errors");
  271 + options.add_options()
  272 + ("positional", "Integers", cxxopts::value<std::vector<unsigned int>>());
  273 +
  274 + Argv av({"ints", "--", "-2"});
  275 +
  276 + char** argv = av.argv();
  277 + auto argc = av.argc();
  278 +
  279 + options.parse_positional("positional");
  280 + CHECK_THROWS_AS(options.parse(argc, argv), cxxopts::argument_incorrect_type);
  281 +}
  282 +
  283 +TEST_CASE("Integer bounds", "[integer]")
  284 +{
  285 + cxxopts::Options options("integer_boundaries", "check min/max integer");
  286 + options.add_options()
  287 + ("positional", "Integers", cxxopts::value<std::vector<int8_t>>());
  288 +
  289 + SECTION("No overflow")
  290 + {
  291 + Argv av({"ints", "--", "127", "-128", "0x7f", "-0x80", "0x7e"});
  292 +
  293 + auto argv = av.argv();
  294 + auto argc = av.argc();
  295 +
  296 + options.parse_positional("positional");
  297 + options.parse(argc, argv);
  298 +
  299 + REQUIRE(options.count("positional") == 5);
  300 +
  301 + auto& positional = options["positional"].as<std::vector<int8_t>>();
  302 + CHECK(positional[0] == 127);
  303 + CHECK(positional[1] == -128);
  304 + CHECK(positional[2] == 0x7f);
  305 + CHECK(positional[3] == -0x80);
  306 + CHECK(positional[4] == 0x7e);
  307 + }
  308 +}
  309 +
  310 +TEST_CASE("Overflow on boundary", "[integer]")
  311 +{
  312 + using namespace cxxopts::values;
  313 +
  314 + int8_t si;
  315 + uint8_t ui;
  316 +
  317 + CHECK_THROWS_AS((integer_parser("128", si)), cxxopts::argument_incorrect_type);
  318 + CHECK_THROWS_AS((integer_parser("-129", si)), cxxopts::argument_incorrect_type);
  319 + CHECK_THROWS_AS((integer_parser("256", ui)), cxxopts::argument_incorrect_type);
  320 + CHECK_THROWS_AS((integer_parser("-0x81", si)), cxxopts::argument_incorrect_type);
  321 + CHECK_THROWS_AS((integer_parser("0x80", si)), cxxopts::argument_incorrect_type);
  322 + CHECK_THROWS_AS((integer_parser("0x100", ui)), cxxopts::argument_incorrect_type);
  323 +}
  324 +
  325 +TEST_CASE("Integer overflow", "[options]")
  326 +{
  327 + cxxopts::Options options("reject_overflow", "rejects overflowing integers");
  328 + options.add_options()
  329 + ("positional", "Integers", cxxopts::value<std::vector<int8_t>>());
  330 +
  331 + Argv av({"ints", "--", "128"});
  332 +
  333 + auto argv = av.argv();
  334 + auto argc = av.argc();
  335 +
  336 + options.parse_positional("positional");
  337 + CHECK_THROWS_AS(options.parse(argc, argv), cxxopts::argument_incorrect_type);
  338 +}