From d129a0e53f31bbbebd61b351d1c30e878db227ef Mon Sep 17 00:00:00 2001 From: barton26 Date: Mon, 8 Aug 2022 16:10:09 -0400 Subject: [PATCH] =?UTF-8?q?util:=20Replace=20use=20of=20locale=20dependent?= =?UTF-8?q?=20atoi(=E2=80=A6)=20with=20locale-independent=20std::from=5Fch?= =?UTF-8?q?ars(=E2=80=A6)=20(C++17)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/test/script_tests.cpp | 2 +- src/test/util_tests.cpp | 71 +++++++++++++++++++++++++++++ src/util/strencodings.cpp | 14 ------ src/util/strencodings.h | 31 ++++++++++++- src/util/system.cpp | 16 +++---- test/lint/lint-locale-dependence.sh | 2 - 6 files changed, 108 insertions(+), 28 deletions(-) diff --git a/src/test/script_tests.cpp b/src/test/script_tests.cpp index 659cc3ca38..2a35324da8 100644 --- a/src/test/script_tests.cpp +++ b/src/test/script_tests.cpp @@ -59,7 +59,7 @@ CScript ParseScript(string s) (starts_with(w, "-") && all(string(w.begin()+1, w.end()), ::IsDigit))) { // Number - int64_t n = atoi64(w); + int64_t n = LocaleIndependentAtoi(w); result << n; } else if (starts_with(w, "0x") && IsHex(string(w.begin()+2, w.end()))) diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp index 940bd3dacd..5a48ece1c0 100755 --- a/src/test/util_tests.cpp +++ b/src/test/util_tests.cpp @@ -544,6 +544,77 @@ BOOST_AUTO_TEST_CASE(test_IsDigit) BOOST_CHECK_EQUAL(IsDigit(9), false); } +BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi) +{ + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("1234"), 1'234); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("0"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("01234"), 1'234); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1234"), -1'234); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi(" 1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("1 "), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("1a"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("1.1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("1.9"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("+01.9"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1"), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi(" -1"), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1 "), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi(" -1 "), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("+1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi(" +1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi(" +1 "), 1); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("+-1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-+1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("++1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("--1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi(""), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("aap"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("0x1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-32482348723847471234"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("32482348723847471234"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-9223372036854775809"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-9223372036854775808"), -9'223'372'036'854'775'807LL - 1LL); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("9223372036854775807"), 9'223'372'036'854'775'807); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("9223372036854775808"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("18446744073709551615"), 18'446'744'073'709'551'615ULL); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("18446744073709551616"), 0U); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-2147483649"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-2147483648"), -2'147'483'648LL); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("2147483647"), 2'147'483'647); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("2147483648"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("4294967295"), 4'294'967'295U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("4294967296"), 0U); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-32769"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-32768"), -32'768); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("32767"), 32'767); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("32768"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("65535"), 65'535U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("65536"), 0U); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-129"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-128"), -128); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("127"), 127); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("128"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("255"), 255U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi("256"), 0U); +} + BOOST_AUTO_TEST_CASE(test_ParseInt32) { int32_t n; diff --git a/src/util/strencodings.cpp b/src/util/strencodings.cpp index 67902c84e7..3524174f41 100644 --- a/src/util/strencodings.cpp +++ b/src/util/strencodings.cpp @@ -440,20 +440,6 @@ std::string FormatParagraph(const std::string& in, size_t width, size_t indent) return out.str(); } -int64_t atoi64(const std::string& str) -{ -#ifdef _MSC_VER - return _atoi64(str.c_str()); -#else - return strtoll(str.c_str(), nullptr, 10); -#endif -} - -int atoi(const std::string& str) -{ - return atoi(str.c_str()); -} - /** Upper bound for mantissa. * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. * Larger integers cannot consist of arbitrary combinations of 0-9: diff --git a/src/util/strencodings.h b/src/util/strencodings.h index 5df6616be9..6bfa482f8a 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -11,8 +11,10 @@ #include #include +#include #include +#include #include #include #include @@ -55,8 +57,33 @@ std::string EncodeBase32(const unsigned char* pch, size_t len); std::string EncodeBase32(const std::string& str); void SplitHostPort(std::string in, int &portOut, std::string &hostOut); -int64_t atoi64(const std::string& str); -int atoi(const std::string& str); + +// LocaleIndependentAtoi is provided for backwards compatibility reasons. +// +// New code should use the ParseInt64/ParseUInt64/ParseInt32/ParseUInt32 functions +// which provide parse error feedback. +// +// The goal of LocaleIndependentAtoi is to replicate the exact defined behaviour +// of atoi and atoi64 as they behave under the "C" locale. +template +T LocaleIndependentAtoi(const std::string& str) +{ + static_assert(std::is_integral::value); + T result; + // Emulate atoi(...) handling of white space and leading +/-. + std::string s = TrimString(str); + if (!s.empty() && s[0] == '+') { + if (s.length() >= 2 && s[1] == '-') { + return 0; + } + s = s.substr(1); + } + auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result); + if (error_condition != std::errc{}) { + return 0; + } + return result; +} /** * Tests if the given character is a decimal digit. diff --git a/src/util/system.cpp b/src/util/system.cpp index 40632f3f2d..60a0a4e513 100644 --- a/src/util/system.cpp +++ b/src/util/system.cpp @@ -54,16 +54,14 @@ ArgsManager gArgs; /** * Interpret a string argument as a boolean. * - * The definition of atoi() requires that non-numeric string values like "foo", - * return 0. This means that if a user unintentionally supplies a non-integer - * argument here, the return value is always false. This means that -foo=false - * does what the user probably expects, but -foo=true is well defined but does - * not do what they probably expected. + * The definition of LocaleIndependentAtoi() requires that non-numeric string values + * like "foo", return 0. This means that if a user unintentionally supplies a + * non-integer argument here, the return value is always false. This means that + * -foo=false does what the user probably expects, but -foo=true is well defined + * but does not do what they probably expected. * - * The return value of atoi() is undefined when given input not representable as - * an int. On most systems this means string value between "-2147483648" and - * "2147483647" are well defined (this method will return true). Setting - * -txindex=2147483648 on most systems, however, is probably undefined. + * The return value of LocaleIndependentAtoi(...) is zero when given input not + * representable as an int. * * For a more extensive discussion of this topic (and a wide range of opinions * on the Right Way to change this code), see PR12713. diff --git a/test/lint/lint-locale-dependence.sh b/test/lint/lint-locale-dependence.sh index cd4b6e5be2..b17971235d 100755 --- a/test/lint/lint-locale-dependence.sh +++ b/test/lint/lint-locale-dependence.sh @@ -5,10 +5,8 @@ export LC_ALL=C KNOWN_VIOLATIONS=( - "src/util/strencodings.cpp:.*atoi" "src/util/strencodings.cpp:.*strtol" "src/util/strencodings.cpp:.*strtoul" - "src/util/strencodings.h:.*atoi" "src/logging.h:.*strftime" "src/gridcoin/backup.cpp:.*strftime" "src/rpc/protocol.cpp:.*strftime"