diff --git a/Cargo.lock b/Cargo.lock index c242a4aab67..c98c6faf4cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,9 +28,21 @@ dependencies = [ "ryu-js", "serde", "serde_json", + "string-interner", "unicode-normalization", ] +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -493,6 +505,9 @@ name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -1233,6 +1248,17 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d44a3643b4ff9caf57abcee9c2c621d6c03d9135e0d8b589bd9afb5992cb176a" +[[package]] +name = "string-interner" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e2531d8525b29b514d25e275a43581320d587b86db302b9a7e464bac579648" +dependencies = [ + "cfg-if", + "hashbrown", + "serde", +] + [[package]] name = "strsim" version = "0.8.0" diff --git a/boa/Cargo.toml b/boa/Cargo.toml index c990a3d3706..7e943f2cdf2 100644 --- a/boa/Cargo.toml +++ b/boa/Cargo.toml @@ -40,6 +40,7 @@ once_cell = "1.9.0" # Optional Dependencies measureme = { version = "10.0.0", optional = true } +string-interner = "0.14.0" [target.wasm32-unknown-unknown.dependencies] getrandom = { version = "0.2.3", features = ["js"] } diff --git a/boa/src/context.rs b/boa/src/context.rs index df9658f4f6a..a85dd18b68c 100644 --- a/boa/src/context.rs +++ b/boa/src/context.rs @@ -11,7 +11,7 @@ use crate::{ realm::Realm, syntax::Parser, vm::{FinallyReturn, Vm}, - BoaProfiler, JsResult, JsString, JsValue, + BoaProfiler, Interner, JsResult, JsString, JsValue, }; #[cfg(feature = "console")] @@ -366,6 +366,9 @@ pub struct Context { /// realm holds both the global object and the environment pub(crate) realm: Realm, + /// String interner + interner: Interner, + /// console object state. #[cfg(feature = "console")] console: Console, @@ -393,6 +396,7 @@ impl Default for Context { let realm = Realm::create(); let mut context = Self { realm, + interner: Interner::new(), #[cfg(feature = "console")] console: Console::default(), iterator_prototypes: IteratorPrototypes::default(), @@ -434,6 +438,16 @@ impl Context { Default::default() } + /// Gets the interner of the context. + pub fn interner(&self) -> &Interner { + &self.interner + } + + /// Gets the interner of the context mutably. + pub fn interner_mut(&mut self) -> &mut Interner { + &mut self.interner + } + /// A helper function for getting an immutable reference to the `console` object. #[cfg(feature = "console")] pub(crate) fn console(&self) -> &Console { @@ -875,7 +889,7 @@ impl Context { let src_bytes: &[u8] = src.as_ref(); let parsing_result = Parser::new(src_bytes, false) - .parse_all() + .parse_all(&mut self.interner) .map_err(|e| e.to_string()); let statement_list = match parsing_result { diff --git a/boa/src/lib.rs b/boa/src/lib.rs index e4e1ca7194b..b3fba4374d1 100644 --- a/boa/src/lib.rs +++ b/boa/src/lib.rs @@ -64,9 +64,12 @@ pub mod prelude { pub use crate::{object::JsObject, Context, JsBigInt, JsResult, JsString, JsValue}; } -use std::result::Result as StdResult; - pub(crate) use crate::profiler::BoaProfiler; +use std::{num::NonZeroUsize, result::Result as StdResult}; +use string_interner::{backend::BufferBackend, StringInterner, Symbol}; + +#[cfg(feature = "deser")] +use serde::{Deserialize, Serialize}; // Export things to root level #[doc(inline)] @@ -83,14 +86,40 @@ use crate::syntax::{ #[must_use] pub type JsResult = StdResult; +/// The string interner for Boa. +pub type Interner = StringInterner>; + +/// The string symbol type for Boa. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "deser", serde(transparent))] +pub struct Sym { + value: NonZeroUsize, +} + +impl Symbol for Sym { + #[inline] + fn try_from_usize(index: usize) -> Option { + NonZeroUsize::new(index.wrapping_add(1)).map(|value| Self { value }) + } + + #[inline] + fn to_usize(self) -> usize { + self.value.get() - 1 + } +} + /// Parses the given source code. /// /// It will return either the statement list AST node for the code, or a parsing error if something /// goes wrong. #[inline] -pub fn parse>(src: T, strict_mode: bool) -> StdResult { - let src_bytes: &[u8] = src.as_ref(); - Parser::new(src_bytes, strict_mode).parse_all() +pub fn parse>( + src: T, + strict_mode: bool, + interner: &mut Interner, +) -> StdResult { + Parser::new(src.as_ref(), strict_mode).parse_all(interner) } /// Execute the code using an existing Context diff --git a/boa/src/syntax/ast/node/mod.rs b/boa/src/syntax/ast/node/mod.rs index dfe1e890582..c450a7f76cd 100644 --- a/boa/src/syntax/ast/node/mod.rs +++ b/boa/src/syntax/ast/node/mod.rs @@ -667,6 +667,8 @@ unsafe impl Trace for PropertyName { /// level. #[cfg(test)] fn test_formatting(source: &'static str) { + use crate::Interner; + // Remove preceding newline. let source = &source[1..]; @@ -680,7 +682,8 @@ fn test_formatting(source: &'static str) { .map(|l| &l[characters_to_remove..]) // Remove preceding whitespace from each line .collect::>() .join("\n"); - let result = format!("{}", crate::parse(&scenario, false).unwrap()); + let mut interner = Interner::new(); + let result = format!("{}", crate::parse(&scenario, false, &mut interner).unwrap()); if scenario != result { eprint!("========= Expected:\n{}", scenario); eprint!("========= Got:\n{}", result); diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs index 24c94c1903a..8ebe66b43b6 100644 --- a/boa/src/syntax/lexer/comment.rs +++ b/boa/src/syntax/lexer/comment.rs @@ -7,6 +7,7 @@ use crate::{ ast::{Position, Span}, lexer::{Token, TokenKind}, }, + Interner, }; use core::convert::TryFrom; use std::io::Read; @@ -24,7 +25,12 @@ use std::io::Read; pub(super) struct SingleLineComment; impl Tokenizer for SingleLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + interner: &mut Interner, + ) -> Result where R: Read, { @@ -59,7 +65,12 @@ impl Tokenizer for SingleLineComment { pub(super) struct MultiLineComment; impl Tokenizer for MultiLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + _interner: &mut Interner, + ) -> Result where R: Read, { @@ -103,7 +114,12 @@ impl Tokenizer for MultiLineComment { pub(super) struct HashbangComment; impl Tokenizer for HashbangComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + _interner: &mut Interner, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs index 5fef7bf7d13..48cf2d8e462 100644 --- a/boa/src/syntax/lexer/identifier.rs +++ b/boa/src/syntax/lexer/identifier.rs @@ -7,6 +7,7 @@ use crate::{ ast::{Keyword, Position, Span}, lexer::{StringLiteral, Token, TokenKind}, }, + Interner, }; use boa_unicode::UnicodeProperties; use core::convert::TryFrom; @@ -80,7 +81,12 @@ impl Identifier { } impl Tokenizer for Identifier { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + interner: &mut Interner, + ) -> Result where R: Read, { @@ -123,7 +129,7 @@ impl Tokenizer for Identifier { start_pos, )); } - TokenKind::identifier(identifier_name.into_boxed_str()) + TokenKind::identifier(interner.get_or_intern(identifier_name)) }; Ok(Token::new(token_kind, Span::new(start_pos, cursor.pos()))) diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index e2ed8227e65..496bd041a33 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -40,8 +40,11 @@ use self::{ string::StringLiteral, template::TemplateLiteral, }; -use crate::syntax::ast::{Punctuator, Span}; pub use crate::{profiler::BoaProfiler, syntax::ast::Position}; +use crate::{ + syntax::ast::{Punctuator, Span}, + Interner, +}; use core::convert::TryFrom; pub use error::Error; use std::io::Read; @@ -49,7 +52,12 @@ pub use token::{Token, TokenKind}; trait Tokenizer { /// Lexes the next token. - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + interner: &mut Interner, + ) -> Result where R: Read; } @@ -120,7 +128,11 @@ impl Lexer { // that means it could be multiple different tokens depending on the input token. // // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar - pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result + pub(crate) fn lex_slash_token( + &mut self, + start: Position, + interner: &mut Interner, + ) -> Result where R: Read, { @@ -130,11 +142,11 @@ impl Lexer { match c { b'/' => { self.cursor.next_byte()?.expect("/ token vanished"); // Consume the '/' - SingleLineComment.lex(&mut self.cursor, start) + SingleLineComment.lex(&mut self.cursor, start, interner) } b'*' => { self.cursor.next_byte()?.expect("* token vanished"); // Consume the '*' - MultiLineComment.lex(&mut self.cursor, start) + MultiLineComment.lex(&mut self.cursor, start, interner) } ch => { match self.get_goal() { @@ -157,7 +169,7 @@ impl Lexer { } InputElement::RegExp => { // Can be a regular expression. - RegexLiteral.lex(&mut self.cursor, start) + RegexLiteral.lex(&mut self.cursor, start, interner) } } } @@ -173,7 +185,7 @@ impl Lexer { /// Retrieves the next token from the lexer. // We intentionally don't implement Iterator trait as Result