Removed RefCells for dispatcher.

We now explicitly pass that state around. This will reduce the number of `Mutex`es in a `Send` version of the rewriter.
cloudflare · Aug 22, 2024 · ee4b343 · ee4b343
1 parent 9a62d7b
commit ee4b343
Show file tree

Hide file tree

Showing 13 changed files with 370 additions and 270 deletions.
diff --git a/src/parser/lexer/actions.rs b/src/parser/lexer/actions.rs
@@ -17,17 +17,23 @@ macro_rules! get_token_part_range {
 }
 
 impl<S: LexemeSink> StateMachineActions for Lexer<S> {
+    type Context = ParserContext<S>;
+
     impl_common_sm_actions!();
 
     #[inline]
-    fn emit_eof(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_eof(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
         let lexeme = self.create_lexeme_with_raw_exclusive(input, Some(Eof));
 
-        self.emit_lexeme(&lexeme)
+        self.emit_lexeme(context, &lexeme)
     }
 
     #[inline]
-    fn emit_text(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_text(
+        &mut self,
+        context: &mut ParserContext<S>,
+        input: &[u8],
+    ) -> ActionResult {
         if self.pos() > self.lexeme_start {
             // NOTE: unlike any other tokens (except EOF), text tokens don't have
             // any lexical symbols that determine their bounds. Therefore,
@@ -37,29 +43,33 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
             let lexeme =
                 self.create_lexeme_with_raw_exclusive(input, Some(Text(self.last_text_type)));
 
-            self.emit_lexeme(&lexeme)?;
+            self.emit_lexeme(context, &lexeme)?;
         }
 
         Ok(())
     }
 
     #[inline]
-    fn emit_current_token(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_current_token(
+        &mut self,
+        context: &mut ParserContext<S>,
+        input: &[u8],
+    ) -> ActionResult {
         let token = self.current_non_tag_content_token.take();
         let lexeme = self.create_lexeme_with_raw_inclusive(input, token);
 
-        self.emit_lexeme(&lexeme)
+        self.emit_lexeme(context, &lexeme)
     }
 
     #[inline]
-    fn emit_tag(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_tag(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
         let token = self
             .current_tag_token
             .take()
             .expect("Tag token should exist at this point");
 
         let feedback = self
-            .try_get_tree_builder_feedback(&token)
+            .try_get_tree_builder_feedback(context, &token)
             .map_err(ActionError::from)?;
 
         let mut lexeme = self.create_lexeme_with_raw_inclusive(input, token);
@@ -69,7 +79,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
         self.set_last_text_type(TextType::Data);
 
         if let Some(feedback) = feedback {
-            self.handle_tree_builder_feedback(feedback, &lexeme);
+            self.handle_tree_builder_feedback(context, feedback, &lexeme);
         }
 
         if let StartTag {
@@ -79,11 +89,11 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
         } = lexeme.token_outline
         {
             self.last_start_tag_name_hash = name_hash;
-            *ns = self.tree_builder_simulator.borrow().current_ns();
+            *ns = context.tree_builder_simulator.current_ns();
         }
 
         match self
-            .emit_tag_lexeme(&lexeme)
+            .emit_tag_lexeme(context, &lexeme)
             .map_err(ActionError::RewritingError)?
         {
             ParserDirective::Lex => Ok(()),
@@ -96,32 +106,44 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn emit_current_token_and_eof(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_current_token_and_eof(
+        &mut self,
+        context: &mut ParserContext<S>,
+        input: &[u8],
+    ) -> ActionResult {
         let token = self.current_non_tag_content_token.take();
         let lexeme = self.create_lexeme_with_raw_exclusive(input, token);
 
-        self.emit_lexeme(&lexeme)?;
-        self.emit_eof(input)
+        self.emit_lexeme(context, &lexeme)?;
+        self.emit_eof(context, input)
     }
 
     #[inline]
-    fn emit_raw_without_token(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_raw_without_token(
+        &mut self,
+        context: &mut ParserContext<S>,
+        input: &[u8],
+    ) -> ActionResult {
         let lexeme = self.create_lexeme_with_raw_inclusive(input, None);
 
-        self.emit_lexeme(&lexeme)
+        self.emit_lexeme(context, &lexeme)
     }
 
     #[inline]
-    fn emit_raw_without_token_and_eof(&mut self, input: &[u8]) -> ActionResult {
+    fn emit_raw_without_token_and_eof(
+        &mut self,
+        context: &mut ParserContext<S>,
+        input: &[u8],
+    ) -> ActionResult {
         // NOTE: since we are at EOF we use exclusive range for token's raw.
         let lexeme = self.create_lexeme_with_raw_exclusive(input, None);
 
-        self.emit_lexeme(&lexeme)?;
-        self.emit_eof(input)
+        self.emit_lexeme(context, &lexeme)?;
+        self.emit_eof(context, input)
     }
 
     #[inline]
-    fn create_start_tag(&mut self, _input: &[u8]) {
+    fn create_start_tag(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         self.attr_buffer.borrow_mut().clear();
 
         self.current_tag_token = Some(StartTag {
@@ -134,15 +156,15 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn create_end_tag(&mut self, _input: &[u8]) {
+    fn create_end_tag(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         self.current_tag_token = Some(EndTag {
             name: Range::default(),
             name_hash: LocalNameHash::new(),
         });
     }
 
     #[inline]
-    fn create_doctype(&mut self, _input: &[u8]) {
+    fn create_doctype(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         self.current_non_tag_content_token = Some(Doctype {
             name: None,
             public_id: None,
@@ -152,31 +174,36 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn create_comment(&mut self, _input: &[u8]) {
+    fn create_comment(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         self.current_non_tag_content_token = Some(Comment(Range::default()));
     }
 
     #[inline]
-    fn start_token_part(&mut self, _input: &[u8]) {
+    fn start_token_part(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         self.token_part_start = self.pos();
     }
 
     #[inline]
-    fn mark_comment_text_end(&mut self, _input: &[u8]) {
+    fn mark_comment_text_end(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         if let Some(Comment(ref mut text)) = self.current_non_tag_content_token {
             *text = get_token_part_range!(self);
         }
     }
 
     #[inline]
-    fn shift_comment_text_end_by(&mut self, _input: &[u8], offset: usize) {
+    fn shift_comment_text_end_by(
+        &mut self,
+        _context: &mut ParserContext<S>,
+        _input: &[u8],
+        offset: usize,
+    ) {
         if let Some(Comment(ref mut text)) = self.current_non_tag_content_token {
             text.end += offset;
         }
     }
 
     #[inline]
-    fn set_force_quirks(&mut self, _input: &[u8]) {
+    fn set_force_quirks(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         if let Some(Doctype {
             ref mut force_quirks,
             ..
@@ -187,14 +214,18 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn finish_doctype_name(&mut self, _input: &[u8]) {
+    fn finish_doctype_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         if let Some(Doctype { ref mut name, .. }) = self.current_non_tag_content_token {
             *name = Some(get_token_part_range!(self));
         }
     }
 
     #[inline]
-    fn finish_doctype_public_id(&mut self, _input: &[u8]) {
+    fn finish_doctype_public_id(
+        &mut self,
+        _context: &mut ParserContext<S>,
+        _input: &[u8],
+    ) {
         if let Some(Doctype {
             ref mut public_id, ..
         }) = self.current_non_tag_content_token
@@ -204,7 +235,11 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn finish_doctype_system_id(&mut self, _input: &[u8]) {
+    fn finish_doctype_system_id(
+        &mut self,
+        _context: &mut ParserContext<S>,
+        _input: &[u8],
+    ) {
         if let Some(Doctype {
             ref mut system_id, ..
         }) = self.current_non_tag_content_token
@@ -214,7 +249,11 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn finish_tag_name(&mut self, _input: &[u8]) -> ActionResult {
+    fn finish_tag_name(
+        &mut self,
+        _context: &mut ParserContext<S>,
+        _input: &[u8],
+    ) -> ActionResult {
         match self.current_tag_token {
             Some(StartTag { ref mut name, .. }) | Some(EndTag { ref mut name, .. }) => {
                 *name = get_token_part_range!(self)
@@ -226,7 +265,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn update_tag_name_hash(&mut self, input: &[u8]) {
+    fn update_tag_name_hash(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
         if let Some(ch) = input.get(self.pos()).copied() {
             match self.current_tag_token {
                 Some(StartTag {
@@ -241,7 +280,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn mark_as_self_closing(&mut self, _input: &[u8]) {
+    fn mark_as_self_closing(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         if let Some(StartTag {
             ref mut self_closing,
             ..
@@ -252,17 +291,17 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn start_attr(&mut self, input: &[u8]) {
+    fn start_attr(&mut self, context: &mut ParserContext<S>, input: &[u8]) {
         // NOTE: create attribute only if we are parsing a start tag
         if let Some(StartTag { .. }) = self.current_tag_token {
             self.current_attr = Some(AttributeOutline::default());
 
-            self.start_token_part(input);
+            self.start_token_part(context, input);
         }
     }
 
     #[inline]
-    fn finish_attr_name(&mut self, _input: &[u8]) {
+    fn finish_attr_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         if let Some(AttributeOutline {
             ref mut name,
             ref mut raw_range,
@@ -275,7 +314,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn finish_attr_value(&mut self, input: &[u8]) {
+    fn finish_attr_value(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
         if let Some(AttributeOutline {
             ref mut value,
             ref mut raw_range,
@@ -293,7 +332,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
     }
 
     #[inline]
-    fn finish_attr(&mut self, _input: &[u8]) {
+    fn finish_attr(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
         if let Some(attr) = self.current_attr.take() {
             self.attr_buffer.borrow_mut().push(attr);
         }