Skip to content

Commit

Permalink
Removed RefCells for dispatcher.
Browse files Browse the repository at this point in the history
We now explicitly pass that state around. This will reduce the number of
`Mutex`es in a `Send` version of the rewriter.
  • Loading branch information
orium committed Aug 22, 2024
1 parent 9a62d7b commit ee4b343
Show file tree
Hide file tree
Showing 13 changed files with 370 additions and 270 deletions.
115 changes: 77 additions & 38 deletions src/parser/lexer/actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,23 @@ macro_rules! get_token_part_range {
}

impl<S: LexemeSink> StateMachineActions for Lexer<S> {
type Context = ParserContext<S>;

impl_common_sm_actions!();

#[inline]
fn emit_eof(&mut self, input: &[u8]) -> ActionResult {
fn emit_eof(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
let lexeme = self.create_lexeme_with_raw_exclusive(input, Some(Eof));

self.emit_lexeme(&lexeme)
self.emit_lexeme(context, &lexeme)
}

#[inline]
fn emit_text(&mut self, input: &[u8]) -> ActionResult {
fn emit_text(
&mut self,
context: &mut ParserContext<S>,
input: &[u8],
) -> ActionResult {
if self.pos() > self.lexeme_start {
// NOTE: unlike any other tokens (except EOF), text tokens don't have
// any lexical symbols that determine their bounds. Therefore,
Expand All @@ -37,29 +43,33 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
let lexeme =
self.create_lexeme_with_raw_exclusive(input, Some(Text(self.last_text_type)));

self.emit_lexeme(&lexeme)?;
self.emit_lexeme(context, &lexeme)?;
}

Ok(())
}

#[inline]
fn emit_current_token(&mut self, input: &[u8]) -> ActionResult {
fn emit_current_token(
&mut self,
context: &mut ParserContext<S>,
input: &[u8],
) -> ActionResult {
let token = self.current_non_tag_content_token.take();
let lexeme = self.create_lexeme_with_raw_inclusive(input, token);

self.emit_lexeme(&lexeme)
self.emit_lexeme(context, &lexeme)
}

#[inline]
fn emit_tag(&mut self, input: &[u8]) -> ActionResult {
fn emit_tag(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
let token = self
.current_tag_token
.take()
.expect("Tag token should exist at this point");

let feedback = self
.try_get_tree_builder_feedback(&token)
.try_get_tree_builder_feedback(context, &token)
.map_err(ActionError::from)?;

let mut lexeme = self.create_lexeme_with_raw_inclusive(input, token);
Expand All @@ -69,7 +79,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
self.set_last_text_type(TextType::Data);

if let Some(feedback) = feedback {
self.handle_tree_builder_feedback(feedback, &lexeme);
self.handle_tree_builder_feedback(context, feedback, &lexeme);
}

if let StartTag {
Expand All @@ -79,11 +89,11 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
} = lexeme.token_outline
{
self.last_start_tag_name_hash = name_hash;
*ns = self.tree_builder_simulator.borrow().current_ns();
*ns = context.tree_builder_simulator.current_ns();
}

match self
.emit_tag_lexeme(&lexeme)
.emit_tag_lexeme(context, &lexeme)
.map_err(ActionError::RewritingError)?
{
ParserDirective::Lex => Ok(()),
Expand All @@ -96,32 +106,44 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn emit_current_token_and_eof(&mut self, input: &[u8]) -> ActionResult {
fn emit_current_token_and_eof(
&mut self,
context: &mut ParserContext<S>,
input: &[u8],
) -> ActionResult {
let token = self.current_non_tag_content_token.take();
let lexeme = self.create_lexeme_with_raw_exclusive(input, token);

self.emit_lexeme(&lexeme)?;
self.emit_eof(input)
self.emit_lexeme(context, &lexeme)?;
self.emit_eof(context, input)
}

#[inline]
fn emit_raw_without_token(&mut self, input: &[u8]) -> ActionResult {
fn emit_raw_without_token(
&mut self,
context: &mut ParserContext<S>,
input: &[u8],
) -> ActionResult {
let lexeme = self.create_lexeme_with_raw_inclusive(input, None);

self.emit_lexeme(&lexeme)
self.emit_lexeme(context, &lexeme)
}

#[inline]
fn emit_raw_without_token_and_eof(&mut self, input: &[u8]) -> ActionResult {
fn emit_raw_without_token_and_eof(
&mut self,
context: &mut ParserContext<S>,
input: &[u8],
) -> ActionResult {
// NOTE: since we are at EOF we use exclusive range for token's raw.
let lexeme = self.create_lexeme_with_raw_exclusive(input, None);

self.emit_lexeme(&lexeme)?;
self.emit_eof(input)
self.emit_lexeme(context, &lexeme)?;
self.emit_eof(context, input)
}

#[inline]
fn create_start_tag(&mut self, _input: &[u8]) {
fn create_start_tag(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
self.attr_buffer.borrow_mut().clear();

self.current_tag_token = Some(StartTag {
Expand All @@ -134,15 +156,15 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn create_end_tag(&mut self, _input: &[u8]) {
fn create_end_tag(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
self.current_tag_token = Some(EndTag {
name: Range::default(),
name_hash: LocalNameHash::new(),
});
}

#[inline]
fn create_doctype(&mut self, _input: &[u8]) {
fn create_doctype(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
self.current_non_tag_content_token = Some(Doctype {
name: None,
public_id: None,
Expand All @@ -152,31 +174,36 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn create_comment(&mut self, _input: &[u8]) {
fn create_comment(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
self.current_non_tag_content_token = Some(Comment(Range::default()));
}

#[inline]
fn start_token_part(&mut self, _input: &[u8]) {
fn start_token_part(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
self.token_part_start = self.pos();
}

#[inline]
fn mark_comment_text_end(&mut self, _input: &[u8]) {
fn mark_comment_text_end(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
if let Some(Comment(ref mut text)) = self.current_non_tag_content_token {
*text = get_token_part_range!(self);
}
}

#[inline]
fn shift_comment_text_end_by(&mut self, _input: &[u8], offset: usize) {
fn shift_comment_text_end_by(
&mut self,
_context: &mut ParserContext<S>,
_input: &[u8],
offset: usize,
) {
if let Some(Comment(ref mut text)) = self.current_non_tag_content_token {
text.end += offset;
}
}

#[inline]
fn set_force_quirks(&mut self, _input: &[u8]) {
fn set_force_quirks(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
if let Some(Doctype {
ref mut force_quirks,
..
Expand All @@ -187,14 +214,18 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn finish_doctype_name(&mut self, _input: &[u8]) {
fn finish_doctype_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
if let Some(Doctype { ref mut name, .. }) = self.current_non_tag_content_token {
*name = Some(get_token_part_range!(self));
}
}

#[inline]
fn finish_doctype_public_id(&mut self, _input: &[u8]) {
fn finish_doctype_public_id(
&mut self,
_context: &mut ParserContext<S>,
_input: &[u8],
) {
if let Some(Doctype {
ref mut public_id, ..
}) = self.current_non_tag_content_token
Expand All @@ -204,7 +235,11 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn finish_doctype_system_id(&mut self, _input: &[u8]) {
fn finish_doctype_system_id(
&mut self,
_context: &mut ParserContext<S>,
_input: &[u8],
) {
if let Some(Doctype {
ref mut system_id, ..
}) = self.current_non_tag_content_token
Expand All @@ -214,7 +249,11 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn finish_tag_name(&mut self, _input: &[u8]) -> ActionResult {
fn finish_tag_name(
&mut self,
_context: &mut ParserContext<S>,
_input: &[u8],
) -> ActionResult {
match self.current_tag_token {
Some(StartTag { ref mut name, .. }) | Some(EndTag { ref mut name, .. }) => {
*name = get_token_part_range!(self)
Expand All @@ -226,7 +265,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn update_tag_name_hash(&mut self, input: &[u8]) {
fn update_tag_name_hash(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
if let Some(ch) = input.get(self.pos()).copied() {
match self.current_tag_token {
Some(StartTag {
Expand All @@ -241,7 +280,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn mark_as_self_closing(&mut self, _input: &[u8]) {
fn mark_as_self_closing(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
if let Some(StartTag {
ref mut self_closing,
..
Expand All @@ -252,17 +291,17 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn start_attr(&mut self, input: &[u8]) {
fn start_attr(&mut self, context: &mut ParserContext<S>, input: &[u8]) {
// NOTE: create attribute only if we are parsing a start tag
if let Some(StartTag { .. }) = self.current_tag_token {
self.current_attr = Some(AttributeOutline::default());

self.start_token_part(input);
self.start_token_part(context, input);
}
}

#[inline]
fn finish_attr_name(&mut self, _input: &[u8]) {
fn finish_attr_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
if let Some(AttributeOutline {
ref mut name,
ref mut raw_range,
Expand All @@ -275,7 +314,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn finish_attr_value(&mut self, input: &[u8]) {
fn finish_attr_value(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
if let Some(AttributeOutline {
ref mut value,
ref mut raw_range,
Expand All @@ -293,7 +332,7 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
}

#[inline]
fn finish_attr(&mut self, _input: &[u8]) {
fn finish_attr(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
if let Some(attr) = self.current_attr.take() {
self.attr_buffer.borrow_mut().push(attr);
}
Expand Down
Loading

0 comments on commit ee4b343

Please sign in to comment.