Skip to content

Commit

Permalink
Merge pull request ClickHouse#40620 from zvonand/zvonand-b58
Browse files Browse the repository at this point in the history
Base58 fix handling leading 0 / '1'
  • Loading branch information
rschu1ze authored and Enmk committed Feb 7, 2023
1 parent 2e2219f commit 3cf7376
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 11 deletions.
45 changes: 37 additions & 8 deletions src/Common/base58.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,22 @@
namespace DB
{

inline size_t encodeBase58(const char8_t * src, char8_t * dst)
inline size_t encodeBase58(const char8_t * src, size_t srclen, char8_t * dst)
{
const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";

size_t processed = 0;
size_t zeros = 0;
for (;*src == '\0' && processed < srclen-1; ++src)
{
++processed;
++zeros;
*dst++ = '1';
}

size_t idx = 0;
for (; *src; ++src)

while (processed < srclen-1)
{
unsigned int carry = static_cast<unsigned char>(*src);
for (size_t j = 0; j < idx; ++j)
Expand All @@ -24,6 +34,8 @@ inline size_t encodeBase58(const char8_t * src, char8_t * dst)
dst[idx++] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
++src;
++processed;
}

size_t c_idx = idx >> 1;
Expand All @@ -37,23 +49,38 @@ inline size_t encodeBase58(const char8_t * src, char8_t * dst)
{
dst[c_idx] = base58_encoding_alphabet[static_cast<unsigned char>(dst[c_idx])];
}

dst[idx] = '\0';
return idx + 1;
return zeros + idx + 1;
}

inline size_t decodeBase58(const char8_t * src, char8_t * dst)
inline size_t decodeBase58(const char8_t * src, size_t srclen, char8_t * dst)
{
const signed char uint_max = UINT_MAX;
const signed char map_digits[128]
= {uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 0, 1, 2, 3, 4, 5, 6, 7, 8, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 9, 10, 11, 12, 13, 14, 15, 16, uint_max, 17, 18, 19, 20, 21, uint_max, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, uint_max, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, uint_max, uint_max, uint_max, uint_max, uint_max};
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 0, 1, 2,
3, 4, 5, 6, 7, 8, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
9, 10, 11, 12, 13, 14, 15, 16, uint_max, 17, 18, 19, 20,
21, uint_max, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, uint_max, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, uint_max, uint_max, uint_max, uint_max, uint_max};

size_t processed = 0;
size_t zeros = 0;
for (;*src == '1' && processed < srclen-1; ++src)
{
++processed;
++zeros;
*dst++ = '\0';
}

size_t idx = 0;

for (; *src; ++src)
while (processed < srclen-1)
{
unsigned int carry = map_digits[*src];
if (unlikely(carry == UINT_MAX))
Expand All @@ -71,6 +98,8 @@ inline size_t decodeBase58(const char8_t * src, char8_t * dst)
dst[idx++] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
++src;
++processed;
}

size_t c_idx = idx >> 1;
Expand All @@ -81,7 +110,7 @@ inline size_t decodeBase58(const char8_t * src, char8_t * dst)
dst[idx - (i + 1)] = s;
}
dst[idx] = '\0';
return idx + 1;
return zeros + idx + 1;
}

}
4 changes: 2 additions & 2 deletions src/Functions/FunctionBase58Conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ struct Base58Encode
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev;
auto encoded_size = encodeBase58(src, dst_pos);
auto encoded_size = encodeBase58(src, srclen, dst_pos);

src += srclen;
dst_pos += encoded_size;
Expand Down Expand Up @@ -90,7 +90,7 @@ struct Base58Decode
{
size_t srclen = src_offsets[row] - src_offset_prev;

auto decoded_size = decodeBase58(src, dst_pos);
auto decoded_size = decodeBase58(src, srclen, dst_pos);
if (!decoded_size)
throw Exception("Invalid Base58 value, cannot be decoded", ErrorCodes::BAD_ARGUMENTS);

Expand Down
3 changes: 3 additions & 0 deletions tests/queries/0_stateless/02337_base58.reference
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ foo
foob
fooba
foobar

1
1
5 changes: 4 additions & 1 deletion tests/queries/0_stateless/02337_base58.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ SELECT base58Decode('Hold my beer...'); -- { serverError 36 }
SELECT base58Decode(encoded) FROM (SELECT base58Encode(val) as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));

SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val);
SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val);
SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ', '']) val);

SELECT base58Encode(base58Decode('1BWutmTvYPwDtmw9abTkS4Ssr8no61spGAvW1X6NDix')) == '1BWutmTvYPwDtmw9abTkS4Ssr8no61spGAvW1X6NDix';
select base58Encode('\x00\x0b\xe3\xe1\xeb\xa1\x7a\x47\x3f\x89\xb0\xf7\xe8\xe2\x49\x40\xf2\x0a\xeb\x8e\xbc\xa7\x1a\x88\xfd\xe9\x5d\x4b\x83\xb7\x1a\x09') == '1BWutmTvYPwDtmw9abTkS4Ssr8no61spGAvW1X6NDix';

0 comments on commit 3cf7376

Please sign in to comment.