Skip to content

Commit

Permalink
Begin unifying baseXX encodings. b64 / hex data strings can now be us…
Browse files Browse the repository at this point in the history
…ed with \` as well.
  • Loading branch information
lerno committed Nov 25, 2024
1 parent 9112d63 commit a087ba6
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 52 deletions.
7 changes: 7 additions & 0 deletions lib/std/encoding/encoding.c3
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module std::encoding;

fault DecodingFailure
{
INVALID_CHARACTER,
INVALID_PADDING,
}
76 changes: 34 additions & 42 deletions lib/std/encoding/hex.c3
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
module std::encoding::hex;
import std::encoding @norecurse;

// The implementation is based on https://www.rfc-editor.org/rfc/rfc4648

fn String! encode_buffer(char[] code, char[] buffer)
fn String encode_buffer(char[] code, char[] buffer)
{
return (String)buffer[:encode_bytes(code, buffer)!];
return (String)buffer[:encode_bytes(code, buffer)];
}

fn char[]! decode_buffer(char[] code, char[] buffer)
{
return buffer[:decode_bytes(code, buffer)!];
}

fn String! encode(char[] code, Allocator allocator)
fn String encode(char[] code, Allocator allocator)
{
char[] data = allocator::alloc_array(allocator, char, encode_len(code.len));
return (String)data[:encode_bytes(code, data)!];
return (String)data[:encode_bytes(code, data)];
}

fn char[]! decode(char[] code, Allocator allocator)
Expand All @@ -24,16 +25,11 @@ fn char[]! decode(char[] code, Allocator allocator)
return data[:decode_bytes(code, data)!];
}

fn String! encode_new(char[] code) @inline => encode(code, allocator::heap());
fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp());
fn String encode_new(char[] code) @inline => encode(code, allocator::heap());
fn String encode_temp(char[] code) @inline => encode(code, allocator::temp());
fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap());
fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp());

fault Errors
{
INVALID_CHARACTER,
}

<*
Calculate the size of the encoded data.
@param n "Size of the input to be encoded."
Expand All @@ -48,10 +44,11 @@ fn usz encode_len(usz n) => n * 2;
@return "The encoded size."
@require dst.len >= encode_len(src.len) "Destination array is not large enough"
*>
fn usz! encode_bytes(char[] src, char[] dst)
fn usz encode_bytes(char[] src, char[] dst)
{
usz j = 0;
foreach (v : src) {
foreach (v : src)
{
dst[j] = HEXALPHABET[v >> 4];
dst[j + 1] = HEXALPHABET[v & 0x0f];
j = j + 2;
Expand All @@ -64,7 +61,7 @@ fn usz! encode_bytes(char[] src, char[] dst)
@param n "Size of the input to be decoded."
@return "The size of the input once decoded."
*>
fn usz decode_len(usz n) => n / 2;
macro usz decode_len(usz n) => n / 2;

<*
Decodes src into bytes. Returns the actual number of bytes written to dst.
Expand All @@ -76,21 +73,16 @@ fn usz decode_len(usz n) => n / 2;
@param dst "The decoded input."
@require src.len % 2 == 0 "src is not of even length"
@require dst.len >= decode_len(src.len) "Destination array is not large enough"
@return! Errors.INVALID_CHARACTER
@return! DecodingFailure.INVALID_CHARACTER
*>
fn usz! decode_bytes(char[] src, char[] dst)
{
usz i, j;
char a, b;
for (j = 1; j < src.len; j += 2) {
a = HEXREVERSE[src[j-1]];
b = HEXREVERSE[src[j]];
if (a > 0x0f) {
return Errors.INVALID_CHARACTER?;
}
if (b > 0x0f) {
return Errors.INVALID_CHARACTER?;
}
usz i;
for (usz j = 1; j < src.len; j += 2)
{
char a = HEXREVERSE[src[j - 1]];
char b = HEXREVERSE[src[j]];
if (a > 0x0f || b > 0x0f) return DecodingFailure.INVALID_CHARACTER?;
dst[i] = (a << 4) | b;
i++;
}
Expand All @@ -99,19 +91,19 @@ fn usz! decode_bytes(char[] src, char[] dst)

const char[*] HEXALPHABET @private = "0123456789abcdef";
const char[*] HEXREVERSE @private =
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"00010203040506070809ffffffffffff"
x"ff0a0b0c0d0e0fffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ff0a0b0c0d0e0fffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff"
x"ffffffffffffffffffffffffffffffff";
x`ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
00010203040506070809ffffffffffff
ff0a0b0c0d0e0fffffffffffffffffff
ffffffffffffffffffffffffffffffff
ff0a0b0c0d0e0fffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff`;
1 change: 1 addition & 0 deletions releasenotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- SimpleHeapAllocator bug when splitting blocks allowed memory overrun.
- Not possible to alias or take reference for extension methods on non-user defined types. #1637
- Prevent methods from using names of properties or fields. #1638
- b64 / hex data strings can now be used with \` as well.

### Stdlib changes
- Add `io::MultiReader`, `io::MultiWriter`, and `io::TeeReader` structs.
Expand Down
8 changes: 4 additions & 4 deletions src/compiler/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ static inline bool scan_raw_string(Lexer *lexer)
static inline bool scan_hex_array(Lexer *lexer)
{
char start_char = peek(lexer);
next(lexer); // Step past ' or "
next(lexer); // Step past ' or " `
char c;
uint64_t len = 0;
while (1)
Expand Down Expand Up @@ -1081,7 +1081,7 @@ static inline bool scan_base64(Lexer *lexer)
next(lexer); // Step past 6
next(lexer); // Step past 4
char start_char = peek(lexer);
next(lexer); // Step past ' or "
next(lexer); // Step past ' or " or `
char c;
unsigned end_len = 0;
uint64_t len = 0;
Expand Down Expand Up @@ -1348,13 +1348,13 @@ static bool lexer_scan_token_inner(Lexer *lexer)
if (match(lexer, '=')) return new_token(lexer, TOKEN_MINUS_ASSIGN, "-=");
return new_token(lexer, TOKEN_MINUS, "-");
case 'x':
if ((peek(lexer) == '"' || peek(lexer) == '\''))
if ((peek(lexer) == '"' || peek(lexer) == '\'' || peek(lexer) == '`'))
{
return scan_hex_array(lexer);
}
goto IDENT;
case 'b':
if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"'))
if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"' || lexer->current[2] == '`'))
{
return scan_base64(lexer);
}
Expand Down
16 changes: 10 additions & 6 deletions test/unit/stdlib/encoding/hex.c3
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,28 @@ TestCase[] tests = {
{{0xe3, 0xa1}, "E3A1"},
};

fn void! encode() {
fn void! encode()
{
usz n;
char[64] buf;
foreach (t : tests) {
n = hex::encode_bytes(t.dec, buf[..])!;
foreach (t : tests)
{
n = hex::encode_bytes(t.dec, buf[..]);
String want = ((String)t.enc).temp_ascii_to_lower();
assert(want == buf[:n], "encode failed: got: %s, want: %s", buf[:n], want);
@pool()
{
assert(want == hex::encode_temp(t.dec)!);
assert(want == hex::encode_temp(t.dec));
};
}
}

fn void! decode() {
fn void! decode()
{
usz n;
char[64] buf;
foreach (t : tests) {
foreach (t : tests)
{
n = hex::decode_bytes(t.enc, buf[..])!;
assert(t.dec == buf[:n], "decode failed: got: %s, want: %s", buf[:n], t.dec);
@pool()
Expand Down

0 comments on commit a087ba6

Please sign in to comment.