Skip to content

Commit

Permalink
perf(parser): lex identifiers as bytes not chars
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Feb 9, 2024
1 parent 593a602 commit 070d3c4
Show file tree
Hide file tree
Showing 10 changed files with 841 additions and 66 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ regex = { version = "1.10.3" }
rustc-hash = { version = "1.1.0", default-features = false, features = ["std"] }
ryu-js = { version = "1.0.0" }
ropey = { version = "1.6.1" }
seq-macro = { version = "0.3.5" }
serde = { version = "1.0.196" }
serde_json = { version = "1.0.113" }
syn = { version = "=1.0.109" }
Expand Down
1 change: 1 addition & 0 deletions crates/oxc_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ assert-unchecked = { workspace = true }
bitflags = { workspace = true }
rustc-hash = { workspace = true }
num-bigint = { workspace = true }
seq-macro = { workspace = true }

[dev-dependencies]
oxc_ast = { workspace = true, features = ["serde"] }
Expand Down
20 changes: 10 additions & 10 deletions crates/oxc_parser/src/lexer/byte_handlers.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{AutoCow, Kind, Lexer, LexerContext};
use super::{Kind, Lexer, LexerContext};
use crate::diagnostics;

#[allow(clippy::unnecessary_safety_comment)]
Expand Down Expand Up @@ -137,7 +137,10 @@ macro_rules! ascii_byte_handler {
/// (`a`-`z`, `A`-`Z`, `$` or `_`).
///
/// Macro calls `Lexer::identifier_name_handler` to get the text of the identifier,
/// and slices off first character.
/// minus its first character.
///
/// `Lexer::identifier_name_handler` is an unsafe function, but if byte being consumed is ASCII,
/// its requirements are met.
///
/// # SAFETY
/// Only use this macro to define byte handlers for ASCII characters.
Expand All @@ -156,7 +159,8 @@ macro_rules! ascii_byte_handler {
/// const L_G: ByteHandler = {
/// #[allow(non_snake_case)]
/// fn L_G(lexer: &mut Lexer) -> Kind {
/// let id_without_first_char = &lexer.identifier_name_handler()[1..];
/// // SAFETY: This macro is only used for ASCII characters
/// let id_without_first_char = unsafe { lexer.identifier_name_handler() };
/// match id_without_first_char {
/// "et" => Kind::Get,
/// "lobal" => Kind::Global,
Expand All @@ -169,7 +173,8 @@ macro_rules! ascii_byte_handler {
macro_rules! ascii_identifier_handler {
($id:ident($str:ident) $body:expr) => {
byte_handler!($id(lexer) {
let $str = &lexer.identifier_name_handler()[1..];
// SAFETY: This macro is only used for ASCII characters
let $str = unsafe { lexer.identifier_name_handler() };
$body
});
};
Expand Down Expand Up @@ -439,12 +444,7 @@ ascii_byte_handler!(BTO(lexer) {

// \
ascii_byte_handler!(ESC(lexer) {
let mut builder = AutoCow::new(lexer);
lexer.consume_char();
builder.force_allocation_without_current_ascii_char(lexer);
lexer.identifier_unicode_escape_sequence(&mut builder, true);
let text = lexer.identifier_name(builder);
Kind::match_keyword(text)
lexer.identifier_backslash_handler()
});

// ]
Expand Down
Loading

0 comments on commit 070d3c4

Please sign in to comment.