From a087ba608b89ae327980fabf7947da9f5334db46 Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Mon, 25 Nov 2024 16:20:10 +0100 Subject: [PATCH] Begin unifying baseXX encodings. b64 / hex data strings can now be used with \` as well. --- lib/std/encoding/encoding.c3 | 7 +++ lib/std/encoding/hex.c3 | 76 ++++++++++++++------------------ releasenotes.md | 1 + src/compiler/lexer.c | 8 ++-- test/unit/stdlib/encoding/hex.c3 | 16 ++++--- 5 files changed, 56 insertions(+), 52 deletions(-) create mode 100644 lib/std/encoding/encoding.c3 diff --git a/lib/std/encoding/encoding.c3 b/lib/std/encoding/encoding.c3 new file mode 100644 index 000000000..b81b4e945 --- /dev/null +++ b/lib/std/encoding/encoding.c3 @@ -0,0 +1,7 @@ +module std::encoding; + +fault DecodingFailure +{ + INVALID_CHARACTER, + INVALID_PADDING, +} \ No newline at end of file diff --git a/lib/std/encoding/hex.c3 b/lib/std/encoding/hex.c3 index 1108d8bed..c34c2567f 100644 --- a/lib/std/encoding/hex.c3 +++ b/lib/std/encoding/hex.c3 @@ -1,10 +1,11 @@ module std::encoding::hex; +import std::encoding @norecurse; // The implementation is based on https://www.rfc-editor.org/rfc/rfc4648 -fn String! encode_buffer(char[] code, char[] buffer) +fn String encode_buffer(char[] code, char[] buffer) { - return (String)buffer[:encode_bytes(code, buffer)!]; + return (String)buffer[:encode_bytes(code, buffer)]; } fn char[]! decode_buffer(char[] code, char[] buffer) @@ -12,10 +13,10 @@ fn char[]! decode_buffer(char[] code, char[] buffer) return buffer[:decode_bytes(code, buffer)!]; } -fn String! encode(char[] code, Allocator allocator) +fn String encode(char[] code, Allocator allocator) { char[] data = allocator::alloc_array(allocator, char, encode_len(code.len)); - return (String)data[:encode_bytes(code, data)!]; + return (String)data[:encode_bytes(code, data)]; } fn char[]! decode(char[] code, Allocator allocator) @@ -24,16 +25,11 @@ fn char[]! decode(char[] code, Allocator allocator) return data[:decode_bytes(code, data)!]; } -fn String! encode_new(char[] code) @inline => encode(code, allocator::heap()); -fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp()); +fn String encode_new(char[] code) @inline => encode(code, allocator::heap()); +fn String encode_temp(char[] code) @inline => encode(code, allocator::temp()); fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap()); fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp()); -fault Errors -{ - INVALID_CHARACTER, -} - <* Calculate the size of the encoded data. @param n "Size of the input to be encoded." @@ -48,10 +44,11 @@ fn usz encode_len(usz n) => n * 2; @return "The encoded size." @require dst.len >= encode_len(src.len) "Destination array is not large enough" *> -fn usz! encode_bytes(char[] src, char[] dst) +fn usz encode_bytes(char[] src, char[] dst) { usz j = 0; - foreach (v : src) { + foreach (v : src) + { dst[j] = HEXALPHABET[v >> 4]; dst[j + 1] = HEXALPHABET[v & 0x0f]; j = j + 2; @@ -64,7 +61,7 @@ fn usz! encode_bytes(char[] src, char[] dst) @param n "Size of the input to be decoded." @return "The size of the input once decoded." *> -fn usz decode_len(usz n) => n / 2; +macro usz decode_len(usz n) => n / 2; <* Decodes src into bytes. Returns the actual number of bytes written to dst. @@ -76,21 +73,16 @@ fn usz decode_len(usz n) => n / 2; @param dst "The decoded input." @require src.len % 2 == 0 "src is not of even length" @require dst.len >= decode_len(src.len) "Destination array is not large enough" - @return! Errors.INVALID_CHARACTER + @return! DecodingFailure.INVALID_CHARACTER *> fn usz! decode_bytes(char[] src, char[] dst) { - usz i, j; - char a, b; - for (j = 1; j < src.len; j += 2) { - a = HEXREVERSE[src[j-1]]; - b = HEXREVERSE[src[j]]; - if (a > 0x0f) { - return Errors.INVALID_CHARACTER?; - } - if (b > 0x0f) { - return Errors.INVALID_CHARACTER?; - } + usz i; + for (usz j = 1; j < src.len; j += 2) + { + char a = HEXREVERSE[src[j - 1]]; + char b = HEXREVERSE[src[j]]; + if (a > 0x0f || b > 0x0f) return DecodingFailure.INVALID_CHARACTER?; dst[i] = (a << 4) | b; i++; } @@ -99,19 +91,19 @@ fn usz! decode_bytes(char[] src, char[] dst) const char[*] HEXALPHABET @private = "0123456789abcdef"; const char[*] HEXREVERSE @private = -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"00010203040506070809ffffffffffff" -x"ff0a0b0c0d0e0fffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ff0a0b0c0d0e0fffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff" -x"ffffffffffffffffffffffffffffffff"; +x`ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + 00010203040506070809ffffffffffff + ff0a0b0c0d0e0fffffffffffffffffff + ffffffffffffffffffffffffffffffff + ff0a0b0c0d0e0fffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff`; diff --git a/releasenotes.md b/releasenotes.md index 8edd32004..ff9afedfe 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -23,6 +23,7 @@ - SimpleHeapAllocator bug when splitting blocks allowed memory overrun. - Not possible to alias or take reference for extension methods on non-user defined types. #1637 - Prevent methods from using names of properties or fields. #1638 +- b64 / hex data strings can now be used with \` as well. ### Stdlib changes - Add `io::MultiReader`, `io::MultiWriter`, and `io::TeeReader` structs. diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 42ad93a06..3fb0e36fe 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -1033,7 +1033,7 @@ static inline bool scan_raw_string(Lexer *lexer) static inline bool scan_hex_array(Lexer *lexer) { char start_char = peek(lexer); - next(lexer); // Step past ' or " + next(lexer); // Step past ' or " ` char c; uint64_t len = 0; while (1) @@ -1081,7 +1081,7 @@ static inline bool scan_base64(Lexer *lexer) next(lexer); // Step past 6 next(lexer); // Step past 4 char start_char = peek(lexer); - next(lexer); // Step past ' or " + next(lexer); // Step past ' or " or ` char c; unsigned end_len = 0; uint64_t len = 0; @@ -1348,13 +1348,13 @@ static bool lexer_scan_token_inner(Lexer *lexer) if (match(lexer, '=')) return new_token(lexer, TOKEN_MINUS_ASSIGN, "-="); return new_token(lexer, TOKEN_MINUS, "-"); case 'x': - if ((peek(lexer) == '"' || peek(lexer) == '\'')) + if ((peek(lexer) == '"' || peek(lexer) == '\'' || peek(lexer) == '`')) { return scan_hex_array(lexer); } goto IDENT; case 'b': - if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"')) + if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"' || lexer->current[2] == '`')) { return scan_base64(lexer); } diff --git a/test/unit/stdlib/encoding/hex.c3 b/test/unit/stdlib/encoding/hex.c3 index f12114261..0be1fe40c 100644 --- a/test/unit/stdlib/encoding/hex.c3 +++ b/test/unit/stdlib/encoding/hex.c3 @@ -17,24 +17,28 @@ TestCase[] tests = { {{0xe3, 0xa1}, "E3A1"}, }; -fn void! encode() { +fn void! encode() +{ usz n; char[64] buf; - foreach (t : tests) { - n = hex::encode_bytes(t.dec, buf[..])!; + foreach (t : tests) + { + n = hex::encode_bytes(t.dec, buf[..]); String want = ((String)t.enc).temp_ascii_to_lower(); assert(want == buf[:n], "encode failed: got: %s, want: %s", buf[:n], want); @pool() { - assert(want == hex::encode_temp(t.dec)!); + assert(want == hex::encode_temp(t.dec)); }; } } -fn void! decode() { +fn void! decode() +{ usz n; char[64] buf; - foreach (t : tests) { + foreach (t : tests) + { n = hex::decode_bytes(t.enc, buf[..])!; assert(t.dec == buf[:n], "decode failed: got: %s, want: %s", buf[:n], t.dec); @pool()