Skip to content

Commit

Permalink
Enforce maximum string length
Browse files Browse the repository at this point in the history
BIP-173 states that a bech32 string must not exceed 90 characters but
this is a bip specification, the bech32/bech32m checksum algos can
actually cover more characters than that.

To keep the `segwit` stuff bip compliant and also keep the library
general add an associated `CODE_LENGTH` const to the `Checksum` trait.
Then in the `segwit` module and types enforce the 90 character limit but
in the general API and modules enforce the `Ck::CODE_LENGTH` limit.

FTR in `bech32 v0.9.0` no lengths were not enforced.
  • Loading branch information
tcharding committed Oct 26, 2023
1 parent f5bd09b commit 6ca06b1
Show file tree
Hide file tree
Showing 6 changed files with 363 additions and 41 deletions.
180 changes: 163 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
//! The original description in [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki)
//! has more details. See also [BIP-0350](https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki).
//!
//! # Deviation from spec
//!
//! We do not enforce the 90 character limit specified by [BIP-173], instead we enforce the code
//! length for the respective checksum algorithm (see [`Checksum::CODE_LENGTH`]). We do however
//! enforce the 90 character limit within the `segwit` modules and types.
//!
//! # Examples
//!
//! ## Encoding
Expand Down Expand Up @@ -100,6 +106,7 @@
//! impl Checksum for Codex32 {
//! type MidstateRepr = u128;
//! const CHECKSUM_LENGTH: usize = 13;
//! const CODE_LENGTH: usize = 93;
//! // Copied from BIP-93
//! const GENERATOR_SH: [u128; 5] = [
//! 0x19dc500ce73fde210,
Expand All @@ -113,6 +120,8 @@
//!
//! # }
//! ```
//!
//! [`Checksum::CODE_LENGTH`]: crate::primitives::checksum::Checksum::CODE_LENGTH
#![cfg_attr(all(not(feature = "std"), not(test)), no_std)]
// Experimental features we need.
Expand Down Expand Up @@ -142,14 +151,12 @@ pub mod segwit;
use alloc::{string::String, vec::Vec};
use core::fmt;

#[cfg(feature = "alloc")]
use crate::error::write_err;
#[cfg(doc)]
use crate::primitives::decode::CheckedHrpstring;
use crate::primitives::decode::CodeLengthError;
#[cfg(feature = "alloc")]
use crate::primitives::decode::UncheckedHrpstringError;
#[cfg(feature = "alloc")]
use crate::primitives::decode::{ChecksumError, UncheckedHrpstring};
use crate::primitives::decode::{ChecksumError, UncheckedHrpstring, UncheckedHrpstringError};

#[rustfmt::skip] // Keep public re-exports separate.
#[doc(inline)]
Expand Down Expand Up @@ -216,7 +223,7 @@ pub fn decode(s: &str) -> Result<(Hrp, Vec<u8>), DecodeError> {
/// `Ck` algorithm (`NoChecksum` to exclude checksum all together).
#[cfg(feature = "alloc")]
#[inline]
pub fn encode<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, fmt::Error> {
pub fn encode<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, EncodeError> {
encode_lower::<Ck>(hrp, data)
}

Expand All @@ -226,7 +233,7 @@ pub fn encode<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, fmt::Error>
/// `Ck` algorithm (`NoChecksum` to exclude checksum all together).
#[cfg(feature = "alloc")]
#[inline]
pub fn encode_lower<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, fmt::Error> {
pub fn encode_lower<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, EncodeError> {
let mut buf = String::new();
encode_lower_to_fmt::<Ck, String>(&mut buf, hrp, data)?;
Ok(buf)
Expand All @@ -238,7 +245,7 @@ pub fn encode_lower<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, fmt::
/// `Ck` algorithm (`NoChecksum` to exclude checksum all together).
#[cfg(feature = "alloc")]
#[inline]
pub fn encode_upper<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, fmt::Error> {
pub fn encode_upper<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<String, EncodeError> {
let mut buf = String::new();
encode_upper_to_fmt::<Ck, String>(&mut buf, hrp, data)?;
Ok(buf)
Expand All @@ -253,7 +260,7 @@ pub fn encode_to_fmt<Ck: Checksum, W: fmt::Write>(
fmt: &mut W,
hrp: Hrp,
data: &[u8],
) -> Result<(), fmt::Error> {
) -> Result<(), EncodeError> {
encode_lower_to_fmt::<Ck, W>(fmt, hrp, data)
}

Expand All @@ -266,7 +273,9 @@ pub fn encode_lower_to_fmt<Ck: Checksum, W: fmt::Write>(
fmt: &mut W,
hrp: Hrp,
data: &[u8],
) -> Result<(), fmt::Error> {
) -> Result<(), EncodeError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();
for c in chars {
Expand All @@ -284,7 +293,9 @@ pub fn encode_upper_to_fmt<Ck: Checksum, W: fmt::Write>(
fmt: &mut W,
hrp: Hrp,
data: &[u8],
) -> Result<(), fmt::Error> {
) -> Result<(), EncodeError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();
for c in chars {
Expand All @@ -303,7 +314,7 @@ pub fn encode_to_writer<Ck: Checksum, W: std::io::Write>(
w: &mut W,
hrp: Hrp,
data: &[u8],
) -> Result<(), std::io::Error> {
) -> Result<(), EncodeIoError> {
encode_lower_to_writer::<Ck, W>(w, hrp, data)
}

Expand All @@ -317,7 +328,9 @@ pub fn encode_lower_to_writer<Ck: Checksum, W: std::io::Write>(
w: &mut W,
hrp: Hrp,
data: &[u8],
) -> Result<(), std::io::Error> {
) -> Result<(), EncodeIoError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();
for c in chars {
Expand All @@ -336,7 +349,9 @@ pub fn encode_upper_to_writer<Ck: Checksum, W: std::io::Write>(
w: &mut W,
hrp: Hrp,
data: &[u8],
) -> Result<(), std::io::Error> {
) -> Result<(), EncodeIoError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();
for c in chars {
Expand All @@ -345,10 +360,23 @@ pub fn encode_upper_to_writer<Ck: Checksum, W: std::io::Write>(
Ok(())
}

/// Returns the length of the bech32 string after encoding `hrp` and `data` (incl. checksum).
pub fn encoded_length<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> usize {
/// Checks that encoding `hrp` and `data` creates a code that is less than the code length for `Ck`.
///
/// The length of the code is how long a coded message can be (including the checksum!) for the code
/// to retain its error-correcting properties.
///
/// # Returns
///
/// `Ok(encoded_string_length)` if the encoded length is less than or equal to `Ck::CODE_LENGTH`
/// otherwise a [`CodeLengthError`] containing the encoded length and the maximum allowed.
pub fn encoded_length<Ck: Checksum>(hrp: Hrp, data: &[u8]) -> Result<usize, CodeLengthError> {
let iter = data.iter().copied().bytes_to_fes();
hrp.len() + 1 + iter.len() + Ck::CHECKSUM_LENGTH // +1 for separator
let len = hrp.len() + 1 + iter.len() + Ck::CHECKSUM_LENGTH; // +1 for separator
if len > Ck::CODE_LENGTH {
Err(CodeLengthError { encoded_length: len, code_length: Ck::CODE_LENGTH })
} else {
Ok(len)
}
}

/// An error while decoding a bech32 string.
Expand Down Expand Up @@ -392,6 +420,96 @@ impl From<UncheckedHrpstringError> for DecodeError {
fn from(e: UncheckedHrpstringError) -> Self { Self::Parse(e) }
}

/// An error while encoding a bech32 string.
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum EncodeError {
/// Encoding HRP and data into a bech32 string exceeds maximum allowed.
TooLong(CodeLengthError),
/// Encode to formatter failed.
Fmt(fmt::Error),
}

impl fmt::Display for EncodeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use EncodeError::*;

match *self {
TooLong(ref e) => write_err!(f, "encode error"; e),
Fmt(ref e) => write_err!(f, "encode to formatter failed"; e),
}
}
}

#[cfg(feature = "std")]
impl std::error::Error for EncodeError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
use EncodeError::*;

match *self {
TooLong(ref e) => Some(e),
Fmt(ref e) => Some(e),
}
}
}

impl From<CodeLengthError> for EncodeError {
#[inline]
fn from(e: CodeLengthError) -> Self { Self::TooLong(e) }
}

impl From<fmt::Error> for EncodeError {
#[inline]
fn from(e: fmt::Error) -> Self { Self::Fmt(e) }
}

/// An error while encoding a bech32 string.
#[cfg(feature = "std")]
#[derive(Debug)]
#[non_exhaustive]
pub enum EncodeIoError {
/// Encoding HRP and data into a bech32 string exceeds maximum allowed.
TooLong(CodeLengthError),
/// Encode to writer failed.
Write(std::io::Error),
}

#[cfg(feature = "std")]
impl fmt::Display for EncodeIoError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use EncodeIoError::*;

match *self {
TooLong(ref e) => write_err!(f, "encode error"; e),
Write(ref e) => write_err!(f, "encode to writer failed"; e),
}
}
}

#[cfg(feature = "std")]
impl std::error::Error for EncodeIoError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
use EncodeIoError::*;

match *self {
TooLong(ref e) => Some(e),
Write(ref e) => Some(e),
}
}
}

#[cfg(feature = "std")]
impl From<CodeLengthError> for EncodeIoError {
#[inline]
fn from(e: CodeLengthError) -> Self { Self::TooLong(e) }
}

#[cfg(feature = "std")]
impl From<std::io::Error> for EncodeIoError {
#[inline]
fn from(e: std::io::Error) -> Self { Self::Write(e) }
}

#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
Expand Down Expand Up @@ -489,8 +607,36 @@ mod tests {

let encoded = encode::<Bech32m>(hrp, &data).expect("valid data");
let want = encoded.len();
let got = encoded_length::<Bech32m>(hrp, &data);
let got = encoded_length::<Bech32m>(hrp, &data).expect("encoded length");

assert_eq!(got, want);
}

#[test]
fn can_encode_maximum_length_string() {
let data = [0_u8; 632];
let hrp = Hrp::parse_unchecked("abcd");
let s = encode::<Bech32m>(hrp, &data).expect("valid data");
assert_eq!(s.len(), 1023);
}

#[test]
fn can_not_encode_string_too_long() {
let data = [0_u8; 632];
let hrp = Hrp::parse_unchecked("abcde");

match encode::<Bech32m>(hrp, &data) {
Ok(_) => panic!("false positive"),
Err(EncodeError::TooLong(CodeLengthError { encoded_length, code_length: _ })) =>
assert_eq!(encoded_length, 1024),
_ => panic!("false negative"),
}
}

#[test]
fn can_decode_segwit_too_long_string() {
// A 91 character long string, greater than the segwit enforced maximum of 90.
let s = "abcd1qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrw9z3s";
assert!(decode(s).is_ok());
}
}
9 changes: 7 additions & 2 deletions src/primitives/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@ pub trait Checksum {
/// be pretty efficient no matter what.
type MidstateRepr: PackedFe32;

/// The length of the code.
///
/// The length of the code is how long a coded message can be (including the
/// checksum!) for the code to retain its error-correcting properties.
const CODE_LENGTH: usize;

/// The number of characters in the checksum.
///
/// Alternately, the degree of the generator polynomial. This is **not** the same
/// as the "length of the code", which is the maximum number of characters that
/// the checksum can usefully cover.
/// as `Self::CODE_LENGTH`.
const CHECKSUM_LENGTH: usize;

/// The coefficients of the generator polynomial, except the leading monic term,
Expand Down
Loading

0 comments on commit 6ca06b1

Please sign in to comment.