Skip to content

Commit

Permalink
perf(lexer): dedupe numeric separator check (#3283)
Browse files Browse the repository at this point in the history
## What This PR Does

Updates numeric literal token lexing to record when separator characters
(`_`) are found in a new `Token` flag. This then gets passed to
`parse_int` and `parse_float`, removing the need for a second `_` check
in those two functions.

When run locally, I see no change to lexer benchmarks and minor
improvements to codegen benchmarks. For some reason, semantic and source
map benches seem to be doing slightly worse.

Note that I attempted to implement this with `bitflags!` (making
`escaped` and `is_on_newline` flags as well) and this caused performance
degradation. My best guess is that it turned reads on these flags from a
`mov` to a `mov` + a binary and.

---------

Co-authored-by: Boshen <boshenc@gmail.com>
  • Loading branch information
DonIsaac and Boshen authored May 15, 2024
1 parent dad47a5 commit 508dae6
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 10 deletions.
8 changes: 6 additions & 2 deletions crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,12 @@ impl<'a> ParserImpl<'a> {
let token = self.cur_token();
let src = self.cur_src();
let value = match token.kind {
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => parse_int(src, token.kind),
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => parse_float(src),
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => {
parse_int(src, token.kind, token.has_separator())
}
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => {
parse_float(src, token.has_separator())
}
_ => unreachable!(),
}
.map_err(|err| diagnostics::invalid_number(err, token.span()))?;
Expand Down
40 changes: 33 additions & 7 deletions crates/oxc_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,35 @@ use std::borrow::Cow;

use super::kind::Kind;

// the string passed in has `_` removed from the lexer
pub fn parse_int(s: &str, kind: Kind) -> Result<f64, &'static str> {
pub fn parse_int(s: &str, kind: Kind, has_sep: bool) -> Result<f64, &'static str> {
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
let s = s.as_ref();
debug_assert!(!s.contains('_'));

// SAFETY: we just checked that `s` has no `_` characters
unsafe { parse_int_without_underscores_unchecked(s, kind) }
}

pub fn parse_float(s: &str, has_sep: bool) -> Result<f64, &'static str> {
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
debug_assert!(!s.contains('_'));

// SAFETY: we just checked that `s` has no `_` characters
unsafe { parse_float_without_underscores_unchecked(&s) }
}

/// # Safety
///
/// This function assumes that all `_` characters have been stripped from `s`.
/// Violating this assumption does _not_ cause UB. However, this function is
/// marked as unsafe to ensure consumers are aware of the assumption.
unsafe fn parse_int_without_underscores_unchecked(
s: &str,
kind: Kind,
) -> Result<f64, &'static str> {
if kind == Kind::Decimal {
return parse_float(s);
return parse_float_without_underscores_unchecked(s);
}
let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
let s = s.as_ref();
match kind {
Kind::Binary => Ok(parse_binary(&s[2..])),
Kind::Octal => {
Expand All @@ -28,8 +50,12 @@ pub fn parse_int(s: &str, kind: Kind) -> Result<f64, &'static str> {
}
}

pub fn parse_float(s: &str) -> Result<f64, &'static str> {
let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
/// # Safety
///
/// This function assumes that all `_` characters have been stripped from `s`.
/// Violating this assumption does _not_ cause UB. However, this function is
/// marked as unsafe to ensure consumers are aware of the assumption.
unsafe fn parse_float_without_underscores_unchecked(s: &str) -> Result<f64, &'static str> {
s.parse::<f64>().map_err(|_| "invalid float")
}

Expand Down
10 changes: 10 additions & 0 deletions crates/oxc_parser/src/lexer/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ impl<'a> Lexer<'a> {
match c {
'_' => {
self.consume_char();
// NOTE: it looks invalid numeric tokens are still parsed.
// This seems to be a waste. It also requires us to put this
// call here instead of after we ensure the next character
// is a number character
self.token.set_has_separator();
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
self.consume_char();
} else {
Expand Down Expand Up @@ -134,6 +139,11 @@ impl<'a> Lexer<'a> {
match c {
'_' => {
self.consume_char();
// NOTE: it looks invalid numeric tokens are still parsed.
// This seems to be a waste. It also requires us to put this
// call here instead of after we ensure the next character
// is an ASCII digit
self.token.set_has_separator();
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.consume_char();
} else {
Expand Down
19 changes: 18 additions & 1 deletion crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@ pub struct Token {
/// [Lexer::escaped_templates]: [super::Lexer::escaped_templates]
pub escaped: bool,

/// True if for numeric literal tokens that contain separator characters (`_`).
///
/// Numeric literals are defined in Section 12.9.3 of the ECMAScript
/// standard and include [`Kind::Decimal`], [`Kind::Binary`],
/// [`Kind::Octal`], [`Kind::Hex`], etc.
has_separator: bool,

// Padding to fill to 16 bytes.
// This makes copying a `Token` 1 x xmmword load & store, rather than 1 x dword + 1 x qword
// and `Token::default()` is 1 x xmmword store, rather than 1 x dword + 1 x qword.
_padding1: u8,
_padding2: u32,
}

Expand All @@ -50,4 +56,15 @@ impl Token {
pub fn escaped(&self) -> bool {
self.escaped
}

#[inline]
pub fn has_separator(&self) -> bool {
debug_assert!(!self.has_separator || self.kind.is_number());
self.has_separator
}

pub(crate) fn set_has_separator(&mut self) {
debug_assert!(!self.has_separator || self.kind.is_number() || self.kind == Kind::default());
self.has_separator = true;
}
}

0 comments on commit 508dae6

Please sign in to comment.