From a087ba608b89ae327980fabf7947da9f5334db46 Mon Sep 17 00:00:00 2001
From: Christoffer Lerno <christoffer@aegik.com>
Date: Mon, 25 Nov 2024 16:20:10 +0100
Subject: [PATCH] Begin unifying baseXX encodings. b64 / hex data strings can
 now be used with \` as well.

---
 lib/std/encoding/encoding.c3     |  7 +++
 lib/std/encoding/hex.c3          | 76 ++++++++++++++------------------
 releasenotes.md                  |  1 +
 src/compiler/lexer.c             |  8 ++--
 test/unit/stdlib/encoding/hex.c3 | 16 ++++---
 5 files changed, 56 insertions(+), 52 deletions(-)
 create mode 100644 lib/std/encoding/encoding.c3

diff --git a/lib/std/encoding/encoding.c3 b/lib/std/encoding/encoding.c3
new file mode 100644
index 000000000..b81b4e945
--- /dev/null
+++ b/lib/std/encoding/encoding.c3
@@ -0,0 +1,7 @@
+module std::encoding;
+
+fault DecodingFailure
+{
+	INVALID_CHARACTER,
+	INVALID_PADDING,
+}
\ No newline at end of file
diff --git a/lib/std/encoding/hex.c3 b/lib/std/encoding/hex.c3
index 1108d8bed..c34c2567f 100644
--- a/lib/std/encoding/hex.c3
+++ b/lib/std/encoding/hex.c3
@@ -1,10 +1,11 @@
 module std::encoding::hex;
+import std::encoding @norecurse;
 
 // The implementation is based on https://www.rfc-editor.org/rfc/rfc4648
 
-fn String! encode_buffer(char[] code, char[] buffer)
+fn String encode_buffer(char[] code, char[] buffer)
 {
-	return (String)buffer[:encode_bytes(code, buffer)!];
+	return (String)buffer[:encode_bytes(code, buffer)];
 }
 
 fn char[]! decode_buffer(char[] code, char[] buffer)
@@ -12,10 +13,10 @@ fn char[]! decode_buffer(char[] code, char[] buffer)
 	return buffer[:decode_bytes(code, buffer)!];
 }
 
-fn String! encode(char[] code, Allocator allocator)
+fn String encode(char[] code, Allocator allocator)
 {
 	char[] data = allocator::alloc_array(allocator, char, encode_len(code.len));
-	return (String)data[:encode_bytes(code, data)!];
+	return (String)data[:encode_bytes(code, data)];
 }
 
 fn char[]! decode(char[] code, Allocator allocator)
@@ -24,16 +25,11 @@ fn char[]! decode(char[] code, Allocator allocator)
 	return data[:decode_bytes(code, data)!];
 }
 
-fn String! encode_new(char[] code) @inline => encode(code, allocator::heap());
-fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp());
+fn String encode_new(char[] code) @inline => encode(code, allocator::heap());
+fn String encode_temp(char[] code) @inline => encode(code, allocator::temp());
 fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap());
 fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp());
 
-fault Errors
-{
-	INVALID_CHARACTER,
-}
-
 <*
  Calculate the size of the encoded data.
  @param n "Size of the input to be encoded."
@@ -48,10 +44,11 @@ fn usz encode_len(usz n) => n * 2;
  @return "The encoded size."
  @require dst.len >= encode_len(src.len) "Destination array is not large enough"
 *>
-fn usz! encode_bytes(char[] src, char[] dst)
+fn usz encode_bytes(char[] src, char[] dst)
 {
 	usz j = 0;
-	foreach (v : src) {
+	foreach (v : src)
+	{
 		dst[j] = HEXALPHABET[v >> 4];
 		dst[j + 1] = HEXALPHABET[v & 0x0f];
 		j = j + 2;
@@ -64,7 +61,7 @@ fn usz! encode_bytes(char[] src, char[] dst)
  @param n "Size of the input to be decoded."
  @return "The size of the input once decoded."
 *>
-fn usz decode_len(usz n) => n / 2;
+macro usz decode_len(usz n) => n / 2;
 
 <*
  Decodes src into bytes. Returns the actual number of bytes written to dst.
@@ -76,21 +73,16 @@ fn usz decode_len(usz n) => n / 2;
  @param dst "The decoded input."
  @require src.len % 2 == 0 "src is not of even length"
  @require dst.len >= decode_len(src.len) "Destination array is not large enough"
- @return! Errors.INVALID_CHARACTER
+ @return! DecodingFailure.INVALID_CHARACTER
 *>
 fn usz! decode_bytes(char[] src, char[] dst)
 {
-	usz i, j;
-	char a, b;
-	for (j = 1; j < src.len; j += 2) {
-		a = HEXREVERSE[src[j-1]];
-		b = HEXREVERSE[src[j]];
-		if (a > 0x0f) {
-			return Errors.INVALID_CHARACTER?;
-		}
-		if (b > 0x0f) {
-			return Errors.INVALID_CHARACTER?;
-		}
+	usz i;
+	for (usz j = 1; j < src.len; j += 2)
+	{
+		char a = HEXREVERSE[src[j - 1]];
+		char b = HEXREVERSE[src[j]];
+		if (a > 0x0f || b > 0x0f) return DecodingFailure.INVALID_CHARACTER?;
 		dst[i] = (a << 4) | b;
 		i++;
 	}
@@ -99,19 +91,19 @@ fn usz! decode_bytes(char[] src, char[] dst)
 
 const char[*] HEXALPHABET @private = "0123456789abcdef";
 const char[*] HEXREVERSE @private =
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"00010203040506070809ffffffffffff"
-x"ff0a0b0c0d0e0fffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ff0a0b0c0d0e0fffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff"
-x"ffffffffffffffffffffffffffffffff";
+x`ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  00010203040506070809ffffffffffff
+  ff0a0b0c0d0e0fffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ff0a0b0c0d0e0fffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff
+  ffffffffffffffffffffffffffffffff`;
diff --git a/releasenotes.md b/releasenotes.md
index 8edd32004..ff9afedfe 100644
--- a/releasenotes.md
+++ b/releasenotes.md
@@ -23,6 +23,7 @@
 - SimpleHeapAllocator bug when splitting blocks allowed memory overrun.
 - Not possible to alias or take reference for extension methods on non-user defined types. #1637
 - Prevent methods from using names of properties or fields. #1638
+- b64 / hex data strings can now be used with \` as well.
 
 ### Stdlib changes
 - Add `io::MultiReader`, `io::MultiWriter`, and `io::TeeReader` structs.
diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c
index 42ad93a06..3fb0e36fe 100644
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -1033,7 +1033,7 @@ static inline bool scan_raw_string(Lexer *lexer)
 static inline bool scan_hex_array(Lexer *lexer)
 {
 	char start_char = peek(lexer);
-	next(lexer); // Step past ' or "
+	next(lexer); // Step past ' or " `
 	char c;
 	uint64_t len = 0;
 	while (1)
@@ -1081,7 +1081,7 @@ static inline bool scan_base64(Lexer *lexer)
 	next(lexer); // Step past 6
 	next(lexer); // Step past 4
 	char start_char = peek(lexer);
-	next(lexer); // Step past ' or "
+	next(lexer); // Step past ' or " or `
 	char c;
 	unsigned end_len = 0;
 	uint64_t len = 0;
@@ -1348,13 +1348,13 @@ static bool lexer_scan_token_inner(Lexer *lexer)
 			if (match(lexer, '=')) return new_token(lexer, TOKEN_MINUS_ASSIGN, "-=");
 			return new_token(lexer, TOKEN_MINUS, "-");
 		case 'x':
-			if ((peek(lexer) == '"' || peek(lexer) == '\''))
+			if ((peek(lexer) == '"' || peek(lexer) == '\'' || peek(lexer) == '`'))
 			{
 				return scan_hex_array(lexer);
 			}
 			goto IDENT;
 		case 'b':
-			if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"'))
+			if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"' || lexer->current[2] == '`'))
 			{
 				return scan_base64(lexer);
 			}
diff --git a/test/unit/stdlib/encoding/hex.c3 b/test/unit/stdlib/encoding/hex.c3
index f12114261..0be1fe40c 100644
--- a/test/unit/stdlib/encoding/hex.c3
+++ b/test/unit/stdlib/encoding/hex.c3
@@ -17,24 +17,28 @@ TestCase[] tests = {
 	{{0xe3, 0xa1}, "E3A1"},
 };
 
-fn void! encode() {
+fn void! encode()
+{
 	usz n;
 	char[64] buf;
-	foreach (t : tests) {
-		n = hex::encode_bytes(t.dec, buf[..])!;
+	foreach (t : tests)
+	{
+		n = hex::encode_bytes(t.dec, buf[..]);
 		String want = ((String)t.enc).temp_ascii_to_lower();
 		assert(want == buf[:n], "encode failed: got: %s, want: %s", buf[:n], want);
 		@pool()
 		{
-			assert(want == hex::encode_temp(t.dec)!);
+			assert(want == hex::encode_temp(t.dec));
 		};
 	}
 }
 
-fn void! decode() {
+fn void! decode()
+{
 	usz n;
 	char[64] buf;
-	foreach (t : tests) {
+	foreach (t : tests)
+	{
 		n = hex::decode_bytes(t.enc, buf[..])!;
 		assert(t.dec == buf[:n], "decode failed: got: %s, want: %s", buf[:n], t.dec);
 		@pool()