Skip to content

Commit

Permalink
Merge pull request #2592 from barton2526/locale-independent-parseint
Browse files Browse the repository at this point in the history
util: Make Parse{Int,UInt}{32,64} use locale independent std::from_chars(…) (C++17) instead of locale dependent strto{l,ll,ul,ull}
  • Loading branch information
jamescowens authored Nov 6, 2022
2 parents 0b7afb9 + 49027e5 commit 0294fa5
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 110 deletions.
8 changes: 4 additions & 4 deletions src/gridcoin/scraper/scraper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1289,7 +1289,7 @@ void ApplyCache(const std::string& key, T& result)
{
unsigned int out = 0;

if (!ParseUInt(entry.value, &out))
if (!ParseUInt32(entry.value, &out))
{
throw std::invalid_argument("Argument is not parseable as an unsigned int.");
}
Expand Down Expand Up @@ -3354,10 +3354,10 @@ bool LoadScraperFileManifest(const fs::path& file)
nhash.SetHex(vline[0].c_str());
LoadEntry.hash = nhash;

// We will use the ParseUInt from strencodings to avoid the locale specific stoi.
// We will use the ParseUInt32 from strencodings to avoid the locale specific stoi.
unsigned int parsed_current = 0;

if (!ParseUInt(vline[1], &parsed_current))
if (!ParseUInt32(vline[1], &parsed_current))
{
_log(logattribute::ERR, __func__, "The \"current\" field not parsed correctly for a manifest entry. Skipping.");
continue;
Expand All @@ -3380,7 +3380,7 @@ bool LoadScraperFileManifest(const fs::path& file)
// are to be maintained, such as team and host files.
unsigned int parsed_exclude = 0;

if (!ParseUInt(vline[5], &parsed_exclude))
if (!ParseUInt32(vline[5], &parsed_exclude))
{
// This shouldn't happen given the conditional above, but to be thorough...
_log(logattribute::ERR, __func__, "The \"excludefromcsmanifest\" field not parsed correctly for a manifest "
Expand Down
2 changes: 1 addition & 1 deletion src/gridcoin/superblock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ class LegacySuperblockParser
, m_averages(ExtractXML(packed, "<AVERAGES>", "</AVERAGES>"))
, m_zero_mags(0)
{
if (!ParseInt(ExtractXML(packed, "<ZERO>", "</ZERO>"), &m_zero_mags)) {
if (!ParseInt32(ExtractXML(packed, "<ZERO>", "</ZERO>"), &m_zero_mags)) {
error("%s: Failed to parse zero mag CPIDs.", __func__);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/gridcoin/upgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ bool Upgrade::CheckForLatestUpdate(std::string& client_message_out, bool ui_dial
{
int github_version = 0;

if (!ParseInt(GithubVersion[x], &github_version))
if (!ParseInt32(GithubVersion[x], &github_version))
{
throw std::invalid_argument("Failed to parse GitHub version from official GitHub project repo.");
}
Expand Down
8 changes: 4 additions & 4 deletions src/rpc/protocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ int ReadHTTPHeaders(std::basic_istream<char>& stream, std::map<std::string, std:
std::string strValue = str.substr(nColon+1);
strValue = TrimString(strValue);
mapHeadersRet[strHeader] = strValue;
if (strHeader == "content-length" && !ParseInt(strValue, &nLen)) {
if (strHeader == "content-length" && !ParseInt32(strValue, &nLen)) {
throw std::invalid_argument("Unable to parse content-length value.");
}
}
Expand Down Expand Up @@ -159,7 +159,7 @@ bool ReadHTTPRequestLine(std::basic_istream<char>& stream, int &proto,
if (start_pos != std::string::npos && length - start_pos > 7) {
strProto = strProto.substr(start_pos + 7);

if (!ParseInt(strProto, &proto)) {
if (!ParseInt32(strProto, &proto)) {
return error("%s: Unable to parse protocol in HTTP string: %s", __func__, strProto);
}
}
Expand All @@ -182,13 +182,13 @@ int ReadHTTPStatus(std::basic_istream<char>& stream, int &proto)
if (start_pos != std::string::npos && str.length() - start_pos > 7) {
str = str.substr(start_pos + 7);

if (!ParseInt(str, &proto)) {
if (!ParseInt32(str, &proto)) {
error("%s: Unable to parse protocol in HTTP string: %s", __func__, str);
}
}

int status = 0;
if (!ParseInt(vWords[1], &status)) {
if (!ParseInt32(vWords[1], &status)) {
error("%s: Unable to parse status: %s", __func__, vWords[1]);
}

Expand Down
2 changes: 1 addition & 1 deletion src/test/gridcoin/superblock_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct Legacy

// Append zero magnitude researchers so the beacon count matches
int num_zero_mag = 0;
if (!ParseInt(ExtractXML(sBlock,"<ZERO>","</ZERO>"), &num_zero_mag)) {
if (!ParseInt32(ExtractXML(sBlock,"<ZERO>","</ZERO>"), &num_zero_mag)) {
error("%s: Unable to parse number of zero magnitude researchers from legary binary superblock data.",
__func__);
};
Expand Down
75 changes: 75 additions & 0 deletions src/test/util_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,81 @@ BOOST_AUTO_TEST_CASE(test_ParseInt32)
BOOST_CHECK(!ParseInt32("32482348723847471234", nullptr));
}

BOOST_AUTO_TEST_CASE(test_ToIntegral)
{
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("1234").value(), 1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("0").value(), 0);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("01234").value(), 1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("00000000000000001234").value(), 1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-00000000000000001234").value(), -1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("00000000000000000000").value(), 0);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-00000000000000000000").value(), 0);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-1234").value(), -1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-1").value(), -1);

BOOST_CHECK(!ToIntegral<int32_t>(" 1"));
BOOST_CHECK(!ToIntegral<int32_t>("1 "));
BOOST_CHECK(!ToIntegral<int32_t>("1a"));
BOOST_CHECK(!ToIntegral<int32_t>("1.1"));
BOOST_CHECK(!ToIntegral<int32_t>("1.9"));
BOOST_CHECK(!ToIntegral<int32_t>("+01.9"));
BOOST_CHECK(!ToIntegral<int32_t>(" -1"));
BOOST_CHECK(!ToIntegral<int32_t>("-1 "));
BOOST_CHECK(!ToIntegral<int32_t>(" -1 "));
BOOST_CHECK(!ToIntegral<int32_t>("+1"));
BOOST_CHECK(!ToIntegral<int32_t>(" +1"));
BOOST_CHECK(!ToIntegral<int32_t>(" +1 "));
BOOST_CHECK(!ToIntegral<int32_t>("+-1"));
BOOST_CHECK(!ToIntegral<int32_t>("-+1"));
BOOST_CHECK(!ToIntegral<int32_t>("++1"));
BOOST_CHECK(!ToIntegral<int32_t>("--1"));
BOOST_CHECK(!ToIntegral<int32_t>(""));
BOOST_CHECK(!ToIntegral<int32_t>("aap"));
BOOST_CHECK(!ToIntegral<int32_t>("0x1"));
BOOST_CHECK(!ToIntegral<int32_t>("-32482348723847471234"));
BOOST_CHECK(!ToIntegral<int32_t>("32482348723847471234"));

BOOST_CHECK(!ToIntegral<int64_t>("-9223372036854775809"));
BOOST_CHECK_EQUAL(ToIntegral<int64_t>("-9223372036854775808").value(), -9'223'372'036'854'775'807LL - 1LL);
BOOST_CHECK_EQUAL(ToIntegral<int64_t>("9223372036854775807").value(), 9'223'372'036'854'775'807);
BOOST_CHECK(!ToIntegral<int64_t>("9223372036854775808"));

BOOST_CHECK(!ToIntegral<uint64_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint64_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint64_t>("18446744073709551615").value(), 18'446'744'073'709'551'615ULL);
BOOST_CHECK(!ToIntegral<uint64_t>("18446744073709551616"));

BOOST_CHECK(!ToIntegral<int32_t>("-2147483649"));
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-2147483648").value(), -2'147'483'648LL);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("2147483647").value(), 2'147'483'647);
BOOST_CHECK(!ToIntegral<int32_t>("2147483648"));

BOOST_CHECK(!ToIntegral<uint32_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint32_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint32_t>("4294967295").value(), 4'294'967'295U);
BOOST_CHECK(!ToIntegral<uint32_t>("4294967296"));

BOOST_CHECK(!ToIntegral<int16_t>("-32769"));
BOOST_CHECK_EQUAL(ToIntegral<int16_t>("-32768").value(), -32'768);
BOOST_CHECK_EQUAL(ToIntegral<int16_t>("32767").value(), 32'767);
BOOST_CHECK(!ToIntegral<int16_t>("32768"));

BOOST_CHECK(!ToIntegral<uint16_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint16_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint16_t>("65535").value(), 65'535U);
BOOST_CHECK(!ToIntegral<uint16_t>("65536"));

BOOST_CHECK(!ToIntegral<int8_t>("-129"));
BOOST_CHECK_EQUAL(ToIntegral<int8_t>("-128").value(), -128);
BOOST_CHECK_EQUAL(ToIntegral<int8_t>("127").value(), 127);
BOOST_CHECK(!ToIntegral<int8_t>("128"));

BOOST_CHECK(!ToIntegral<uint8_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint8_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint8_t>("255").value(), 255U);
BOOST_CHECK(!ToIntegral<uint8_t>("256"));
}

BOOST_AUTO_TEST_CASE(test_ParseInt64)
{
int64_t n;
Expand Down
121 changes: 34 additions & 87 deletions src/util/strencodings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <errno.h>
#include <limits>
#include <optional>

static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";

Expand Down Expand Up @@ -276,115 +275,63 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid)
return std::string((const char*)vchRet.data(), vchRet.size());
}

[[nodiscard]] static bool ParsePrechecks(const std::string& str)
[[nodiscard]] static bool ParsePrechecks(const std::string&);

namespace {
template <typename T>
bool ParseIntegral(const std::string& str, T* out)
{
if (str.empty()) // No empty string allowed
static_assert(std::is_integral<T>::value);
if (!ParsePrechecks(str)) {
return false;
if (str.size() >= 1 && (IsSpace(str[0]) || IsSpace(str[str.size()-1]))) // No padding allowed
}
// Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
// handling leading +/- for backwards compatibility.
if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
return false;
if (!ValidAsCString(str)) // No embedded NUL characters allowed
}
const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
if (!opt_int) {
return false;
}
if (out != nullptr) {
*out = *opt_int;
}
return true;
}
}; // namespace

bool ParseInt(const std::string& str, int *out)
[[nodiscard]] static bool ParsePrechecks(const std::string& str)
{
if (!ParsePrechecks(str))
if (str.empty()) // No empty string allowed
return false;
char *endp = nullptr;
errno = 0; // strtol will not set errno if valid
long int n = strtol(str.c_str(), &endp, 10);
if(out) *out = (int)n;
// Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int>::min() &&
n <= std::numeric_limits<int>::max();
}

bool ParseInt32(const std::string& str, int32_t *out)
{
if (!ParsePrechecks(str))
if (str.size() >= 1 && (IsSpace(str[0]) || IsSpace(str[str.size()-1]))) // No padding allowed
return false;
if (!ValidAsCString(str)) // No embedded NUL characters allowed
return false;
char *endp = nullptr;
errno = 0; // strtol will not set errno if valid
long int n = strtol(str.c_str(), &endp, 10);
if(out) *out = (int32_t)n;
// Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int32_t>::min() &&
n <= std::numeric_limits<int32_t>::max();
return true;
}

bool ParseInt64(const std::string& str, int64_t *out)
bool ParseInt32(const std::string& str, int32_t* out)
{
if (!ParsePrechecks(str))
return false;
char *endp = nullptr;
errno = 0; // strtoll will not set errno if valid
long long int n = strtoll(str.c_str(), &endp, 10);
if(out) *out = (int64_t)n;
// Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int64_t*.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int64_t>::min() &&
n <= std::numeric_limits<int64_t>::max();
return ParseIntegral<int32_t>(str, out);
}

bool ParseUInt(const std::string& str, unsigned int *out)
bool ParseInt64(const std::string& str, int64_t* out)
{
if (!ParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
return false;
char *endp = nullptr;
errno = 0; // strtoul will not set errno if valid
unsigned long int n = strtoul(str.c_str(), &endp, 10);
if(out) *out = (unsigned int)n;
// Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *unsigned int*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<unsigned int>::max();
return ParseIntegral<int64_t>(str, out);
}

bool ParseUInt32(const std::string& str, uint32_t *out)
bool ParseUInt32(const std::string& str, uint32_t* out)
{
if (!ParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
return false;
char *endp = nullptr;
errno = 0; // strtoul will not set errno if valid
unsigned long int n = strtoul(str.c_str(), &endp, 10);
if(out) *out = (uint32_t)n;
// Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint32_t>::max();
return ParseIntegral<uint32_t>(str, out);
}

bool ParseUInt64(const std::string& str, uint64_t *out)
bool ParseUInt64(const std::string& str, uint64_t* out)
{
if (!ParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range
return false;
char *endp = nullptr;
errno = 0; // strtoull will not set errno if valid
unsigned long long int n = strtoull(str.c_str(), &endp, 10);
if(out) *out = (uint64_t)n;
// Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *uint64_t*.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint64_t>::max();
return ParseIntegral<uint64_t>(str, out);
}


bool ParseDouble(const std::string& str, double *out)
{
if (!ParsePrechecks(str))
Expand Down
26 changes: 19 additions & 7 deletions src/util/strencodings.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <charconv>
#include <cstdint>
#include <iterator>
#include <optional>
#include <string>
#include <vector>

Expand Down Expand Up @@ -110,6 +111,24 @@ constexpr inline bool IsSpace(char c) noexcept {
return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
}

/**
* Convert string to integral type T.
*
* @returns std::nullopt if the entire string could not be parsed, or if the
* parsed value is not in the range representable by the type T.
*/
template <typename T>
std::optional<T> ToIntegral(const std::string& str)
{
static_assert(std::is_integral<T>::value);
T result;
const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
return std::nullopt;
}
return {result};
}

/**
* Convert string to signed integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
Expand All @@ -131,13 +150,6 @@ constexpr inline bool IsSpace(char c) noexcept {
*/
[[nodiscard]] bool ParseInt64(const std::string& str, int64_t *out);

/**
* Convert decimal string to unsigned integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
[[nodiscard]] bool ParseUInt(const std::string& str, unsigned int *out);

/**
* Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
Expand Down
4 changes: 2 additions & 2 deletions src/util/system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ static bool InterpretBool(const std::string& strValue)
if (strValue.empty())
return true;

// Maintaining the behavior as described above, but replacing the atoi with ParseInt.
// Maintaining the behavior as described above, but replacing the atoi with ParseInt32.
int value = 0;
if (!ParseInt(strValue, &value)) {
if (!ParseInt32(strValue, &value)) {
// Do nothing. The value will remain at zero if not parseable. This is to prevent
// a warning on [[nodiscard]]
}
Expand Down
Loading

0 comments on commit 0294fa5

Please sign in to comment.