From b0c0fd7dc838d4d59ce719ae8456a36338ce8d39 Mon Sep 17 00:00:00 2001 From: Koni Marti Date: Sun, 24 Nov 2024 17:55:14 +0100 Subject: [PATCH] encoding: implement hex encoding (base16) Implement hex encoding and decoding (base16) according to RFC 4648. Add unit tests. Link: https://www.rfc-editor.org/rfc/rfc4648 --- lib/std/encoding/hex.c3 | 117 +++++++++++++++++++++++++++++++ test/unit/stdlib/encoding/hex.c3 | 45 ++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 lib/std/encoding/hex.c3 create mode 100644 test/unit/stdlib/encoding/hex.c3 diff --git a/lib/std/encoding/hex.c3 b/lib/std/encoding/hex.c3 new file mode 100644 index 000000000..1108d8bed --- /dev/null +++ b/lib/std/encoding/hex.c3 @@ -0,0 +1,117 @@ +module std::encoding::hex; + +// The implementation is based on https://www.rfc-editor.org/rfc/rfc4648 + +fn String! encode_buffer(char[] code, char[] buffer) +{ + return (String)buffer[:encode_bytes(code, buffer)!]; +} + +fn char[]! decode_buffer(char[] code, char[] buffer) +{ + return buffer[:decode_bytes(code, buffer)!]; +} + +fn String! encode(char[] code, Allocator allocator) +{ + char[] data = allocator::alloc_array(allocator, char, encode_len(code.len)); + return (String)data[:encode_bytes(code, data)!]; +} + +fn char[]! decode(char[] code, Allocator allocator) +{ + char[] data = allocator::alloc_array(allocator, char, decode_len(code.len)); + return data[:decode_bytes(code, data)!]; +} + +fn String! encode_new(char[] code) @inline => encode(code, allocator::heap()); +fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp()); +fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap()); +fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp()); + +fault Errors +{ + INVALID_CHARACTER, +} + +<* + Calculate the size of the encoded data. + @param n "Size of the input to be encoded." + @return "The size of the input once encoded." +*> +fn usz encode_len(usz n) => n * 2; + +<* + Encode the content of src into dst, which must be properly sized. + @param src "The input to be encoded." + @param dst "The encoded input." + @return "The encoded size." + @require dst.len >= encode_len(src.len) "Destination array is not large enough" +*> +fn usz! encode_bytes(char[] src, char[] dst) +{ + usz j = 0; + foreach (v : src) { + dst[j] = HEXALPHABET[v >> 4]; + dst[j + 1] = HEXALPHABET[v & 0x0f]; + j = j + 2; + } + return src.len * 2; +} + +<* + Calculate the size of the decoded data. + @param n "Size of the input to be decoded." + @return "The size of the input once decoded." +*> +fn usz decode_len(usz n) => n / 2; + +<* + Decodes src into bytes. Returns the actual number of bytes written to dst. + + Expects that src only contains hexadecimal characters and that src has even + length. + + @param src "The input to be decoded." + @param dst "The decoded input." + @require src.len % 2 == 0 "src is not of even length" + @require dst.len >= decode_len(src.len) "Destination array is not large enough" + @return! Errors.INVALID_CHARACTER +*> +fn usz! decode_bytes(char[] src, char[] dst) +{ + usz i, j; + char a, b; + for (j = 1; j < src.len; j += 2) { + a = HEXREVERSE[src[j-1]]; + b = HEXREVERSE[src[j]]; + if (a > 0x0f) { + return Errors.INVALID_CHARACTER?; + } + if (b > 0x0f) { + return Errors.INVALID_CHARACTER?; + } + dst[i] = (a << 4) | b; + i++; + } + return i; +} + +const char[*] HEXALPHABET @private = "0123456789abcdef"; +const char[*] HEXREVERSE @private = +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"00010203040506070809ffffffffffff" +x"ff0a0b0c0d0e0fffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ff0a0b0c0d0e0fffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff" +x"ffffffffffffffffffffffffffffffff"; diff --git a/test/unit/stdlib/encoding/hex.c3 b/test/unit/stdlib/encoding/hex.c3 new file mode 100644 index 000000000..f12114261 --- /dev/null +++ b/test/unit/stdlib/encoding/hex.c3 @@ -0,0 +1,45 @@ +module encoding::hex @test; +import std::encoding::hex; + +struct TestCase +{ + char[] dec; + char[] enc; +} + +TestCase[] tests = { + {"", ""}, + {{'g'}, "67"}, + {{0,1,2,3,4,5,6,7}, "0001020304050607"}, + {{8,9,10,11,12,13,14,15}, "08090a0b0c0d0e0f"}, + {{0xf0, 0xf1, 0xf2, 0xf3}, "f0f1f2f3"}, + {{0xe3, 0xa1}, "e3a1"}, + {{0xe3, 0xa1}, "E3A1"}, +}; + +fn void! encode() { + usz n; + char[64] buf; + foreach (t : tests) { + n = hex::encode_bytes(t.dec, buf[..])!; + String want = ((String)t.enc).temp_ascii_to_lower(); + assert(want == buf[:n], "encode failed: got: %s, want: %s", buf[:n], want); + @pool() + { + assert(want == hex::encode_temp(t.dec)!); + }; + } +} + +fn void! decode() { + usz n; + char[64] buf; + foreach (t : tests) { + n = hex::decode_bytes(t.enc, buf[..])!; + assert(t.dec == buf[:n], "decode failed: got: %s, want: %s", buf[:n], t.dec); + @pool() + { + assert(t.dec == hex::decode_temp(t.enc)!); + }; + } +}