encoding: implement hex encoding (base16)

Implement hex encoding and decoding (base16) according to RFC 4648. Add unit tests. Link: https://www.rfc-editor.org/rfc/rfc4648
c3lang · Nov 25, 2024 · b0c0fd7 · b0c0fd7
1 parent c273f26
commit b0c0fd7
Show file tree

Hide file tree

Showing 2 changed files with 162 additions and 0 deletions.
diff --git a/lib/std/encoding/hex.c3 b/lib/std/encoding/hex.c3
@@ -0,0 +1,117 @@
+module std::encoding::hex;
+
+// The implementation is based on https://www.rfc-editor.org/rfc/rfc4648
+
+fn String! encode_buffer(char[] code, char[] buffer)
+{
+	return (String)buffer[:encode_bytes(code, buffer)!];
+}
+
+fn char[]! decode_buffer(char[] code, char[] buffer)
+{
+	return buffer[:decode_bytes(code, buffer)!];
+}
+
+fn String! encode(char[] code, Allocator allocator)
+{
+	char[] data = allocator::alloc_array(allocator, char, encode_len(code.len));
+	return (String)data[:encode_bytes(code, data)!];
+}
+
+fn char[]! decode(char[] code, Allocator allocator)
+{
+	char[] data = allocator::alloc_array(allocator, char, decode_len(code.len));
+	return data[:decode_bytes(code, data)!];
+}
+
+fn String! encode_new(char[] code) @inline => encode(code, allocator::heap());
+fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp());
+fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap());
+fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp());
+
+fault Errors
+{
+	INVALID_CHARACTER,
+}
+
+<*
+ Calculate the size of the encoded data.
+ @param n "Size of the input to be encoded."
+ @return "The size of the input once encoded."
+*>
+fn usz encode_len(usz n) => n * 2;
+
+<*
+ Encode the content of src into dst, which must be properly sized.
+ @param src "The input to be encoded."
+ @param dst "The encoded input."
+ @return "The encoded size."
+ @require dst.len >= encode_len(src.len) "Destination array is not large enough"
+*>
+fn usz! encode_bytes(char[] src, char[] dst)
+{
+	usz j = 0;
+	foreach (v : src) {
+		dst[j] = HEXALPHABET[v >> 4];
+		dst[j + 1] = HEXALPHABET[v & 0x0f];
+		j = j + 2;
+	}
+	return src.len * 2;
+}
+
+<*
+ Calculate the size of the decoded data.
+ @param n "Size of the input to be decoded."
+ @return "The size of the input once decoded."
+*>
+fn usz decode_len(usz n) => n / 2;
+
+<*
+ Decodes src into bytes. Returns the actual number of bytes written to dst.
+
+ Expects that src only contains hexadecimal characters and that src has even
+ length.
+
+ @param src "The input to be decoded."
+ @param dst "The decoded input."
+ @require src.len % 2 == 0 "src is not of even length"
+ @require dst.len >= decode_len(src.len) "Destination array is not large enough"
+ @return! Errors.INVALID_CHARACTER
+*>
+fn usz! decode_bytes(char[] src, char[] dst)
+{
+	usz i, j;
+	char a, b;
+	for (j = 1; j < src.len; j += 2) {
+		a = HEXREVERSE[src[j-1]];
+		b = HEXREVERSE[src[j]];
+		if (a > 0x0f) {
+			return Errors.INVALID_CHARACTER?;
+		}
+		if (b > 0x0f) {
+			return Errors.INVALID_CHARACTER?;
+		}
+		dst[i] = (a << 4) | b;
+		i++;
+	}
+	return i;
+}
+
+const char[*] HEXALPHABET @private = "0123456789abcdef";
+const char[*] HEXREVERSE @private =
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"00010203040506070809ffffffffffff"
+x"ff0a0b0c0d0e0fffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ff0a0b0c0d0e0fffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff"
+x"ffffffffffffffffffffffffffffffff";
diff --git a/test/unit/stdlib/encoding/hex.c3 b/test/unit/stdlib/encoding/hex.c3
@@ -0,0 +1,45 @@
+module encoding::hex @test;
+import std::encoding::hex;
+
+struct TestCase
+{
+    char[] dec;
+    char[] enc;
+}
+
+TestCase[] tests = {
+	{"", ""},
+	{{'g'}, "67"},
+	{{0,1,2,3,4,5,6,7}, "0001020304050607"},
+	{{8,9,10,11,12,13,14,15}, "08090a0b0c0d0e0f"},
+	{{0xf0, 0xf1, 0xf2, 0xf3}, "f0f1f2f3"},
+	{{0xe3, 0xa1}, "e3a1"},
+	{{0xe3, 0xa1}, "E3A1"},
+};
+
+fn void! encode() {
+	usz n;
+	char[64] buf;
+	foreach (t : tests) {
+		n = hex::encode_bytes(t.dec, buf[..])!;
+		String want = ((String)t.enc).temp_ascii_to_lower();
+		assert(want == buf[:n], "encode failed: got: %s, want: %s", buf[:n], want);
+		@pool()
+		{
+			assert(want == hex::encode_temp(t.dec)!);
+		};
+	}
+}
+
+fn void! decode() {
+	usz n;
+	char[64] buf;
+	foreach (t : tests) {
+		n = hex::decode_bytes(t.enc, buf[..])!;
+		assert(t.dec == buf[:n], "decode failed: got: %s, want: %s", buf[:n], t.dec);
+		@pool()
+		{
+			assert(t.dec == hex::decode_temp(t.enc)!);
+		};
+	}
+}