From 3cf7376bcfdd17473a02db1c8715890a2d3a5b6b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 26 Aug 2022 10:18:26 +0200 Subject: [PATCH] Merge pull request #40620 from zvonand/zvonand-b58 Base58 fix handling leading 0 / '1' --- src/Common/base58.h | 45 +++++++++++++++---- src/Functions/FunctionBase58Conversion.h | 4 +- .../0_stateless/02337_base58.reference | 3 ++ tests/queries/0_stateless/02337_base58.sql | 5 ++- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/Common/base58.h b/src/Common/base58.h index 3d4b55a1fba8..bc3c3c7aee88 100644 --- a/src/Common/base58.h +++ b/src/Common/base58.h @@ -5,12 +5,22 @@ namespace DB { -inline size_t encodeBase58(const char8_t * src, char8_t * dst) +inline size_t encodeBase58(const char8_t * src, size_t srclen, char8_t * dst) { const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + size_t processed = 0; + size_t zeros = 0; + for (;*src == '\0' && processed < srclen-1; ++src) + { + ++processed; + ++zeros; + *dst++ = '1'; + } + size_t idx = 0; - for (; *src; ++src) + + while (processed < srclen-1) { unsigned int carry = static_cast(*src); for (size_t j = 0; j < idx; ++j) @@ -24,6 +34,8 @@ inline size_t encodeBase58(const char8_t * src, char8_t * dst) dst[idx++] = static_cast(carry % 58); carry /= 58; } + ++src; + ++processed; } size_t c_idx = idx >> 1; @@ -37,23 +49,38 @@ inline size_t encodeBase58(const char8_t * src, char8_t * dst) { dst[c_idx] = base58_encoding_alphabet[static_cast(dst[c_idx])]; } + dst[idx] = '\0'; - return idx + 1; + return zeros + idx + 1; } -inline size_t decodeBase58(const char8_t * src, char8_t * dst) +inline size_t decodeBase58(const char8_t * src, size_t srclen, char8_t * dst) { const signed char uint_max = UINT_MAX; const signed char map_digits[128] = {uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, - uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 0, 1, 2, 3, 4, 5, 6, 7, 8, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 9, 10, 11, 12, 13, 14, 15, 16, uint_max, 17, 18, 19, 20, 21, uint_max, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, uint_max, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, uint_max, uint_max, uint_max, uint_max, uint_max}; + uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 0, 1, 2, + 3, 4, 5, 6, 7, 8, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, + 9, 10, 11, 12, 13, 14, 15, 16, uint_max, 17, 18, 19, 20, + 21, uint_max, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, uint_max, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, uint_max, uint_max, uint_max, uint_max, uint_max}; + + size_t processed = 0; + size_t zeros = 0; + for (;*src == '1' && processed < srclen-1; ++src) + { + ++processed; + ++zeros; + *dst++ = '\0'; + } size_t idx = 0; - for (; *src; ++src) + while (processed < srclen-1) { unsigned int carry = map_digits[*src]; if (unlikely(carry == UINT_MAX)) @@ -71,6 +98,8 @@ inline size_t decodeBase58(const char8_t * src, char8_t * dst) dst[idx++] = static_cast(carry & 0xff); carry >>= 8; } + ++src; + ++processed; } size_t c_idx = idx >> 1; @@ -81,7 +110,7 @@ inline size_t decodeBase58(const char8_t * src, char8_t * dst) dst[idx - (i + 1)] = s; } dst[idx] = '\0'; - return idx + 1; + return zeros + idx + 1; } } diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index 82e2a2caac04..bc166f2c9f5a 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -48,7 +48,7 @@ struct Base58Encode for (size_t row = 0; row < input_rows_count; ++row) { size_t srclen = src_offsets[row] - src_offset_prev; - auto encoded_size = encodeBase58(src, dst_pos); + auto encoded_size = encodeBase58(src, srclen, dst_pos); src += srclen; dst_pos += encoded_size; @@ -90,7 +90,7 @@ struct Base58Decode { size_t srclen = src_offsets[row] - src_offset_prev; - auto decoded_size = decodeBase58(src, dst_pos); + auto decoded_size = decodeBase58(src, srclen, dst_pos); if (!decoded_size) throw Exception("Invalid Base58 value, cannot be decoded", ErrorCodes::BAD_ARGUMENTS); diff --git a/tests/queries/0_stateless/02337_base58.reference b/tests/queries/0_stateless/02337_base58.reference index bc666044388f..20b9124c1502 100644 --- a/tests/queries/0_stateless/02337_base58.reference +++ b/tests/queries/0_stateless/02337_base58.reference @@ -21,3 +21,6 @@ foo foob fooba foobar + +1 +1 diff --git a/tests/queries/0_stateless/02337_base58.sql b/tests/queries/0_stateless/02337_base58.sql index 9c9379a2854f..42b032c7601b 100644 --- a/tests/queries/0_stateless/02337_base58.sql +++ b/tests/queries/0_stateless/02337_base58.sql @@ -9,4 +9,7 @@ SELECT base58Decode('Hold my beer...'); -- { serverError 36 } SELECT base58Decode(encoded) FROM (SELECT base58Encode(val) as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); -SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val); +SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ', '']) val); + +SELECT base58Encode(base58Decode('1BWutmTvYPwDtmw9abTkS4Ssr8no61spGAvW1X6NDix')) == '1BWutmTvYPwDtmw9abTkS4Ssr8no61spGAvW1X6NDix'; +select base58Encode('\x00\x0b\xe3\xe1\xeb\xa1\x7a\x47\x3f\x89\xb0\xf7\xe8\xe2\x49\x40\xf2\x0a\xeb\x8e\xbc\xa7\x1a\x88\xfd\xe9\x5d\x4b\x83\xb7\x1a\x09') == '1BWutmTvYPwDtmw9abTkS4Ssr8no61spGAvW1X6NDix';