Skip to content

Commit

Permalink
encoding: implement RFC4648 base32 encoding (#1596)
Browse files Browse the repository at this point in the history
Implement base32 encoding and decoding according to RFC 4648 with the
standard and extended hex alphabets. Add unit tests.

Link: https://www.rfc-editor.org/rfc/rfc4648
Signed-off-by: Koni Marti <koni.marti@gmail.com>
---------

Signed-off-by: Koni Marti <koni.marti@gmail.com>
  • Loading branch information
konimarti authored Nov 4, 2024
1 parent 6258cba commit 8154e27
Show file tree
Hide file tree
Showing 3 changed files with 411 additions and 0 deletions.
310 changes: 310 additions & 0 deletions lib/std/encoding/base32.c3
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
module std::encoding::base32;

// This module implements base32 encoding according to RFC 4648
// (https://www.rfc-editor.org/rfc/rfc4648)

distinct Alphabet = inline char[32];

// Standard base32 Alphabet
const Alphabet STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";

// Extended Hex Alphabet
const Alphabet HEX_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUV";

const uint MASK @private = 0b11111;
const char INVALID @private = 0xff;

const int STD_PADDING = '=';
const int NO_PADDING = -1;

fault Base32Error
{
DUPLICATE_IN_ALPHABET,
PADDING_IN_ALPHABET,
INVALID_CHARACTER_IN_ALPHABET,
DESTINATION_TOO_SMALL,
INVALID_PADDING,
CORRUPT_INPUT
}

struct Base32Encoder
{
Alphabet alphabet;
int padding;
}

<*
@param encoder "The 32-character alphabet for encoding."
@param padding "Set to a negative value to disable padding."
@require padding < 256
*>
fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding = STD_PADDING)
{
encoder.validate(padding)!;
*self = { .alphabet = encoder, .padding = padding };
}

<*
Calculate the length in bytes of the encoded data.
@param n "Length in bytes on input."
@return "Length in bytes of the encoded data."
*>
fn usz Base32Encoder.encode_len(&self, usz n)
{
// A character is encoded into 8 x 5-bit blocks.
if (self.padding >= 0)
{
// with padding
return (n + 4) / 5 * 8;
}
else
{
// no padding
usz trailing = n % 5;
return n / 5 * 8 + (trailing * 8 + 4) / 5;
}
}

<*
Encode the content of src into dst, which must be properly sized.
@param [in] src "The input to be encoded."
@param [inout] dst "The encoded input."
@return "The encoded size."
@return! Base32Error.DESTINATION_TOO_SMALL
*>
fn usz! Base32Encoder.encode(&self, char[] src, char[] dst)
{
if (src.len == 0) return 0;

usz n = (src.len / 5) * 5;
usz dn = self.encode_len(src.len);
if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?;

uint msb, lsb;
for (usz i = 0; i < n; i += 5)
{
// to fit 40 bits we need two 32-bit uints
msb = (uint)src[i] << 24 | (uint)src[i+1] << 16
| (uint)src[i+2] << 8 | (uint)src[i+3];
lsb = msb << 8 | (uint)src[i+4];

// now slice them into 5-bit chunks and translate to the
// alphabet.
dst[0] = self.alphabet[(msb >> 27) & MASK];
dst[1] = self.alphabet[(msb >> 22) & MASK];
dst[2] = self.alphabet[(msb >> 17) & MASK];
dst[3] = self.alphabet[(msb >> 12) & MASK];
dst[4] = self.alphabet[(msb >> 7) & MASK];
dst[5] = self.alphabet[(msb >> 2) & MASK];
dst[6] = self.alphabet[(lsb >> 5) & MASK];
dst[7] = self.alphabet[lsb & MASK];

dst = dst[8..];
}

usz trailing = src.len - n;
if (trailing == 0) return dn;

msb = 0;
switch (trailing)
{
case 4:
msb |= (uint)src[n+3];
lsb = msb << 8;
dst[6] = self.alphabet[(lsb >> 5) & MASK];
dst[5] = self.alphabet[(msb >> 2) & MASK];
nextcase 3;
case 3:
msb |= (uint)src[n+2] << 8;
dst[4] = self.alphabet[(msb >> 7) & MASK];
nextcase 2;
case 2:
msb |= (uint)src[n+1] << 16;
dst[3] = self.alphabet[(msb >> 12) & MASK];
dst[2] = self.alphabet[(msb >> 17) & MASK];
nextcase 1;
case 1:
msb |= (uint)src[n] << 24;
dst[1] = self.alphabet[(msb >> 22) & MASK];
dst[0] = self.alphabet[(msb >> 27) & MASK];
}

// add the padding
if (self.padding >= 0)
{
char pad = (char)self.padding;
for (usz i = (trailing * 8 / 5) + 1; i < 8; i++)
{
dst[i] = pad;
}
}

return dn;
}

struct Base32Decoder
{
Alphabet alphabet;
int padding;
char[256] reverse;
}

<*
@param decoder "The alphabet used for decoding."
@param padding "Set to a negative value to disable padding."
@require padding < 256
*>
fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding = STD_PADDING)
{
decoder.validate(padding)!;
*self = { .alphabet = decoder, .padding = padding };

self.reverse[..] = INVALID;
foreach (char i, c : decoder)
{
self.reverse[c] = i;
}
}

<*
Calculate the length in bytes of the decoded data.
@param n "Length in bytes of input."
@return "Length in bytes of the decoded data."
*>
fn usz Base32Decoder.decode_len(&self, usz n)
{
if (self.padding >= 0)
{
// with padding
return (n / 8) * 5;
}
else
{
// no padding
usz trailing = n % 8;
return n / 8 * 5 + (trailing * 5 ) / 8;
}
}

<*
Decode the content of src into dst, which must be properly sized.
@param src "The input to be decoded."
@param dst "The decoded input."
@return "The decoded size."
@return! Base32Error.DESTINATION_TOO_SMALL, Base32Error.CORRUPT_INPUT
*>
fn usz! Base32Decoder.decode(&self, char[] src, char[] dst)
{
if (src.len == 0) return 0;
usz dn = self.decode_len(src.len);
if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?;

usz j, n;
char[8] buf;
while (src.len > 0 && dst.len > 0)
{

// load 8 bytes into buffer
for (j = 0; j < 8; j++)
{
if (src.len == 0)
{
if (self.padding >= 0)
{
return Base32Error.CORRUPT_INPUT?;
}
break;
}
if (src[0] == (char)self.padding)
{
break;
}
buf[j] = self.reverse[src[0]];
if (buf[j] == INVALID)
{
return Base32Error.CORRUPT_INPUT?;
}
src = src[1..];
}

// extract 5-bytes from the buffer which contains 8 x 5 bit chunks
switch (j)
{
case 8:
// |66677777| dst[4]
// | 77777| buf[7]
// |666 | buf[6] << 5
dst[4] = buf[7] | buf[6] << 5;
n++;
nextcase 7;
case 7:
// |45555566| dst[3]
// | 66| buf[6] >> 3
// | 55555 | buf[5] << 2
// |4 | buf[4] << 7
dst[3] = buf[6] >> 3 | buf[5] << 2 | buf[4] << 7;
n++;
nextcase 5;
case 5:
// |33334444| dst[2]
// | 4444| buf[4] >> 1
// |3333 | buf[3] << 4
dst[2] = buf[4] >> 1 | buf[3] << 4;
n++;
nextcase 4;
case 4:
// |11222223| dst[1]
// | 3| buf[3] >> 4
// | 22222 | buf[2] << 1
// |11 | buf[1] << 6
dst[1] = buf[3] >> 4 | buf[2] << 1 | buf[1] << 6;
n++;
nextcase 2;
case 2:
// |00000111| dst[0]
// | 111| buf[1] >> 2
// |00000 | buf[0] << 3
dst[0] = buf[1] >> 2 | buf[0] << 3;
n++;
default:
return Base32Error.CORRUPT_INPUT?;
}

if (dst.len < 5) break;
dst = dst[5..];
}

return n;
}


// Validate the 32-character alphabet to make sure that no character occurs
// twice and that the padding is not present in the alphabet.
fn void! Alphabet.validate(&self, int padding)
{
bool[256] checked;
foreach (c : self)
{
if (checked[c])
{
return Base32Error.DUPLICATE_IN_ALPHABET?;
}
checked[c] = true;
if (c == '\r' || c == '\n')
{
return Base32Error.INVALID_CHARACTER_IN_ALPHABET?;
}
}
if (padding >= 0)
{
char pad = (char)padding;
if (pad == '\r' || pad == '\n')
{
return Base32Error.INVALID_PADDING?;
}
if (checked[pad])
{
return Base32Error.PADDING_IN_ALPHABET?;
}
}
}
1 change: 1 addition & 0 deletions releasenotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
- Add read/write to stream with big endian ints.
- Move accidently hidden "wrap_bytes".
- Added CBool #1530.
- Added encoding/base32 module.

## 0.6.3 Change list

Expand Down
Loading

0 comments on commit 8154e27

Please sign in to comment.