Skip to content

Commit

Permalink
Shrunk TokenKind from 16 bytes to 2 by replacing `IntegerWithBase(u…
Browse files Browse the repository at this point in the history
…size)` with `IntegerWithBase(u8)` (max base is 16, no way do we exceed 255)

Removed Box from base-n digit predicate function by switching to function pointers
  • Loading branch information
rben01 authored and sharkdp committed Oct 2, 2024
1 parent aa564a3 commit d9e0e2c
Showing 1 changed file with 22 additions and 14 deletions.
36 changes: 22 additions & 14 deletions numbat/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ pub enum TokenizerErrorKind {
ExpectedDigit { character: Option<char> },

#[error("Expected base-{base} digit")]
ExpectedDigitInBase {
base: usize,
character: Option<char>,
},
ExpectedDigitInBase { base: u8, character: Option<char> },

#[error("Unterminated string")]
UnterminatedString,
Expand Down Expand Up @@ -125,7 +122,7 @@ pub enum TokenKind {

// Variable-length tokens
Number,
IntegerWithBase(usize),
IntegerWithBase(u8),
Identifier,

// A normal string without interpolation: `"hello world"`
Expand Down Expand Up @@ -378,6 +375,18 @@ impl Tokenizer {
}

fn scan_single_token<'a>(&mut self, input: &'a str) -> Result<Option<Token<'a>>> {
fn is_ascii_hex_digit(c: char) -> bool {
c.is_ascii_hexdigit()
}

fn is_ascii_octal_digit(c: char) -> bool {
('0'..='7').contains(&c)
}

fn is_ascii_binary_digit(c: char) -> bool {
c == '0' || c == '1'
}

static KEYWORDS: OnceLock<HashMap<&'static str, TokenKind>> = OnceLock::new();
let keywords = KEYWORDS.get_or_init(|| {
let mut m = HashMap::new();
Expand Down Expand Up @@ -463,18 +472,17 @@ impl Tokenizer {
.map(|c| c == 'x' || c == 'o' || c == 'b')
.unwrap_or(false) =>
{
let (base, is_digit_in_base): (_, Box<dyn Fn(char) -> bool>) =
match self.peek(input).unwrap() {
'x' => (16, Box::new(|c| c.is_ascii_hexdigit())),
'o' => (8, Box::new(|c| ('0'..='7').contains(&c))),
'b' => (2, Box::new(|c| c == '0' || c == '1')),
_ => unreachable!(),
};
let (base, is_digit_in_base) = match self.peek(input).unwrap() {
'x' => (16, is_ascii_hex_digit as fn(char) -> bool),
'o' => (8, is_ascii_octal_digit as _),
'b' => (2, is_ascii_binary_digit as _),
_ => unreachable!(),
};

self.advance(input); // skip over the x/o/b

// If the first character is not a digits, that's an error.
if !self.peek(input).map(&is_digit_in_base).unwrap_or(false) {
// If the first character is not a digit, that's an error.
if !self.peek(input).map(is_digit_in_base).unwrap_or(false) {
return tokenizer_error(
self.current,
TokenizerErrorKind::ExpectedDigitInBase {
Expand Down

0 comments on commit d9e0e2c

Please sign in to comment.