Skip to content

Commit

Permalink
start merging base32 and zbase32 - wip
Browse files Browse the repository at this point in the history
  • Loading branch information
jbesraa committed May 12, 2023
1 parent 5da926a commit ea1b296
Showing 1 changed file with 256 additions and 66 deletions.
322 changes: 256 additions & 66 deletions lightning/src/util/base32.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
// Source: https://crates.io/crates/base32 v0.4.0
// License: MIT or Apache-2.0
// Copyright (c) 2015 The base32 Developers
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:

// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// (reference https://github.com/andreasots/base32/blob/master/LICENSE-MIT)
// This is a modification of base32 encoding to support the zbase32 alphabet.
// The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
// The original portions of this software are Copyright (c) 2015 The base32 Developers

/* This file is licensed under either of
* Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
* MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
* at your option.
*/

use crate::prelude::*;

Expand All @@ -30,19 +18,29 @@ pub enum Alphabet {
/// Whether to use padding.
padding: bool
},
ZBase32
}

/// RFC4648 base32 encoding with padding.
const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";

/// Encode a byte slice into a base32 string.
pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
let (alphabet, padding) = match alphabet {
Alphabet::RFC4648 { padding } => (RFC4648_ALPHABET, padding),
};
// ASCII 0-Z
const ZBASE_INV_ALPHABET: [i8; 43] = [
-1, 18, -1, 25, 26, 27, 30, 29, 7, 31, -1, -1, -1, -1, -1, -1, -1, 24, 1, 12, 3, 8, 5, 6, 28,
21, 9, 10, -1, 11, 2, 16, 13, 14, 4, 22, 17, 19, -1, 20, 15, 0, 23,
];

// zbase alphabet
const ZBASE_ALPHABET: &'static [u8] = b"ybndrfg8ejkmcpqxot1uwisza345h769";

/// Inverse RFC4648 lookup table for decoding.
const RFC4648_INV_ALPHABET: [i8; 43] = [
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
];

let mut ret = Vec::with_capacity((data.len() + 3) / 4 * 5);
/// RFC4648 base32 encoding with padding.
const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";

fn map_alphabet(mut ret: Vec<u8>, data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
for chunk in data.chunks(5) {
let buf = {
let mut buf = [0u8; 5];
Expand All @@ -61,6 +59,12 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
ret.push(alphabet[(buf[4] & 0x1F) as usize]);
}

ret
}


fn rfc4648_encode(data: &[u8], padding: bool) -> String {
let mut ret = map_alphabet(Vec::with_capacity((data.len() + 3) / 4 * 5), data, RFC4648_ALPHABET);
if data.len() % 5 != 0 {
let len = ret.len();
let num_extra = 8 - (data.len() % 5 * 8 + 4) / 5;
Expand All @@ -72,50 +76,163 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
ret.truncate(len - num_extra);
}
}
String::from_utf8(ret).unwrap()

}

fn zbase32_encode(data: &[u8])-> String {
let mut ret = Vec::with_capacity((data.len() + 4) / 5 * 8);
ret = map_alphabet(ret, data, RFC4648_ALPHABET);
ret.truncate((data.len() * 8 + 4) / 5);

// Check that our capacity calculation doesn't under-shoot in fuzzing
#[cfg(fuzzing)]
assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);

String::from_utf8(ret).unwrap()
}

/// Inverse RFC4648 lookup table for decoding.
const RFC4648_INV_ALPHABET: [i8; 43] = [
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
];
/// Encode a byte slice into a base32 string.
pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
match alphabet {
Alphabet::RFC4648 { padding } =>{
rfc4648_encode(data, padding)
},
Alphabet::ZBase32 => {
zbase32_encode(data)
}
}
}


/// Decode a base32 string into a byte vector.
pub fn decode(alphabet: Alphabet, data: &str) -> Option<Vec<u8>> {
let data = data.as_bytes();
let alphabet = match alphabet {
Alphabet::RFC4648 { .. } => RFC4648_INV_ALPHABET,
};
let mut unpadded_data_length = data.len();
data.iter().rev().take(6).for_each(|&c| {
if c != b'=' {
return;
}
unpadded_data_length -= 1;
});
let output_length = unpadded_data_length * 5 / 8;
let mut ret = Vec::with_capacity((output_length + 4) / 5 * 5);
for chunk in data.chunks(8) {
let buf = {
let mut buf = [0u8; 8];
for (i, &c) in chunk.iter().enumerate() {
match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
Some(&-1) | None => return None,
Some(&value) => buf[i] = value as u8,
pub fn decode(alphabet: Alphabet, data: &str) -> Result<Vec<u8>, ()> {
match alphabet {
Alphabet::RFC4648 { .. } => {
let alphabet = RFC4648_INV_ALPHABET;
let data = data.as_bytes();
let mut unpadded_data_length = data.len();
data.iter().rev().take(6).for_each(|&c| {
if c != b'=' {
return;
}
unpadded_data_length -= 1;
});
let output_length = unpadded_data_length * 5 / 8;
let mut ret = Vec::with_capacity((output_length + 4) / 5 * 5);
for chunk in data.chunks(8) {
let buf = {
let mut buf = [0u8; 8];
for (i, &c) in chunk.iter().enumerate() {
match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
Some(&-1) | None => return Err(()),
Some(&value) => buf[i] = value as u8,
};
}
buf
};
ret.push((buf[0] << 3) | (buf[1] >> 2));
ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
ret.push((buf[3] << 4) | (buf[4] >> 1));
ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
ret.push((buf[6] << 5) | buf[7]);
}
buf
};
ret.push((buf[0] << 3) | (buf[1] >> 2));
ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
ret.push((buf[3] << 4) | (buf[4] >> 1));
ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
ret.push((buf[6] << 5) | buf[7]);
ret.truncate(output_length);
Ok(ret)

},
Alphabet::ZBase32 => {
if !data.is_ascii() {
return Err(());
}
let alphabet = ZBASE_INV_ALPHABET;

let data = data.as_bytes();
let output_length = data.len() * 5 / 8;
if data.len() > (output_length * 8 + 4) / 5 {
// If the string has more charachters than are required to alphabet_encode the number of bytes
// decodable, treat the string as invalid.
return Err(());
}

let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);

for chunk in data.chunks(8) {
let buf = {
let mut buf = [0u8; 8];
for (i, &c) in chunk.iter().enumerate() {
match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
Some(&-1) | None => return Err(()),
Some(&value) => buf[i] = value as u8,
};
}
buf
};
ret.push((buf[0] << 3) | (buf[1] >> 2));
ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
ret.push((buf[3] << 4) | (buf[4] >> 1));
ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
ret.push((buf[6] << 5) | buf[7]);
}
for c in ret.drain(output_length..) {
if c != 0 {
// If the original string had any bits set at positions outside of the encoded data,
// treat the string as invalid.
return Err(());
}
}

// Check that our capacity calculation doesn't under-shoot in fuzzing
#[cfg(fuzzing)]
assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);

Ok(ret)
},
}
}

#[cfg(test)]
mod tests {
use super::*;

const TEST_DATA: &[(&str, &[u8])] = &[
("", &[]),
("yy", &[0x00]),
("oy", &[0x80]),
("tqrey", &[0x8b, 0x88, 0x80]),
("6n9hq", &[0xf0, 0xbf, 0xc7]),
("4t7ye", &[0xd4, 0x7a, 0x04]),
("6im5sdy", &[0xf5, 0x57, 0xbb, 0x0c]),
("ybndrfg8ejkmcpqxot1uwisza345h769", &[0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6,
0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7, 0xc6,
0x75, 0xbe, 0x77, 0xdf])
];

#[test]
fn test_encode() {
for &(zbase32, data) in TEST_DATA {
assert_eq!(encode(Alphabet::ZBase32, data), zbase32);
}
}

#[test]
fn test_decode() {
for &(zbase32, data) in TEST_DATA {
assert_eq!(decode(Alphabet::ZBase32, zbase32).unwrap(), data);
}
}

#[test]
fn test_decode_wrong() {
const WRONG_DATA: &[&str] = &["00", "l1", "?", "="];

for &data in WRONG_DATA {
match decode(Alphabet::ZBase32, data) {
Ok(_) => assert!(false, "Data shouldn't be decodable"),
Err(_) => assert!(true),
}
}
}
ret.truncate(output_length);
Some(ret)
}

#[cfg(test)]
Expand Down Expand Up @@ -203,11 +320,84 @@ mod test {

#[test]
fn invalid_chars_rfc4648() {
assert_eq!(decode(RFC4648 { padding: true }, ","), None)
assert_eq!(decode(RFC4648 { padding: true }, ",").is_err(), true)
}

#[test]
fn invalid_chars_unpadded_rfc4648() {
assert_eq!(decode(RFC4648 { padding: false }, ","), None)
assert_eq!(decode(RFC4648 { padding: false }, ",").is_err(), true)
}
}

// Decodes a zbase32 string to the original bytes, failing if the string was not encoded by a
// proper zbase32 encoder.
// pub fn alphabet_decode(data: &str) -> Result<Vec<u8>, ()> {
// if !data.is_ascii() {
// return Err(());
// }

// let data = data.as_bytes();
// let output_length = data.len() * 5 / 8;
// if data.len() > (output_length * 8 + 4) / 5 {
// // If the string has more charachters than are required to alphabet_encode the number of bytes
// // decodable, treat the string as invalid.
// return Err(());
// }

// let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);

// for chunk in data.chunks(8) {
// let buf = {
// let mut buf = [0u8; 8];
// for (i, &c) in chunk.iter().enumerate() {
// match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
// Some(&-1) | None => return Err(()),
// Some(&value) => buf[i] = value as u8,
// };
// }
// buf
// };
// ret.push((buf[0] << 3) | (buf[1] >> 2));
// ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
// ret.push((buf[3] << 4) | (buf[4] >> 1));
// ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
// ret.push((buf[6] << 5) | buf[7]);
// }
// for c in ret.drain(output_length..) {
// if c != 0 {
// // If the original string had any bits set at positions outside of the encoded data,
// // treat the string as invalid.
// return Err(());
// }
// }

// // Check that our capacity calculation doesn't under-shoot in fuzzing
// #[cfg(fuzzing)]
// assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);

// Ok(ret)
// }
//
//
//
// fn inv_map_alphabet(mut ret: Vec<u8>, data: &str, alphabet: &'static [u8]) -> Result<Vec<u8>, ()> {
// let data = data.as_bytes();
// for chunk in data.chunks(8) {
// let buf = {
// let mut buf = [0u8; 8];
// for (i, &c) in chunk.iter().enumerate() {
// match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
// Some(&-1) | None => return Err(()),
// Some(&value) => buf[i] = value as u8,
// };
// }
// buf
// };
// ret.push((buf[0] << 3) | (buf[1] >> 2));
// ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
// ret.push((buf[3] << 4) | (buf[4] >> 1));
// ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
// ret.push((buf[6] << 5) | buf[7]);
// }
// Ok(ret)
// }

0 comments on commit ea1b296

Please sign in to comment.