From 61d33f9943732f7ad428517d3bf5a9c67ec31be6 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Tue, 6 Aug 2024 17:50:03 +0100 Subject: [PATCH] WIP Make `HtmlRewriter` `Send + Sync`. --- src/base/encoding.rs | 11 ++--- src/memory/arena.rs | 34 ++++++------- src/memory/limited_vec.rs | 39 ++++++++------- src/memory/limiter.rs | 11 ++--- src/parser/lexer/actions.rs | 8 +-- src/parser/lexer/lexeme/token_outline.rs | 2 +- src/parser/lexer/mod.rs | 20 +++----- src/parser/mod.rs | 42 ++++++++-------- src/parser/tag_scanner/mod.rs | 14 ++---- src/parser/tree_builder_simulator/mod.rs | 13 +++-- src/rewritable_units/document_end.rs | 2 +- src/rewritable_units/element.rs | 2 +- src/rewritable_units/tokens/attributes.rs | 5 +- .../tokens/capturer/to_token.rs | 4 +- src/rewritable_units/tokens/comment.rs | 2 +- src/rewritable_units/tokens/doctype.rs | 2 +- src/rewritable_units/tokens/text_chunk.rs | 2 +- src/rewriter/mod.rs | 32 +++++++++--- src/rewriter/rewrite_controller.rs | 29 ++++++----- src/rewriter/settings.rs | 49 +++++++++++++------ src/selectors_vm/attribute_matcher.rs | 3 +- src/selectors_vm/compiler.rs | 8 +-- src/selectors_vm/mod.rs | 16 +++--- src/transform_stream/dispatcher.rs | 8 +-- src/transform_stream/mod.rs | 19 ++++--- 25 files changed, 211 insertions(+), 166 deletions(-) diff --git a/src/base/encoding.rs b/src/base/encoding.rs index 34878f0f..ee39306d 100644 --- a/src/base/encoding.rs +++ b/src/base/encoding.rs @@ -1,8 +1,7 @@ use crate::rewriter::AsciiCompatibleEncoding; use encoding_rs::Encoding; -use std::cell::Cell; use std::ops::Deref; -use std::rc::Rc; +use std::sync::{Arc, Mutex}; /// A charset encoding that can be shared and modified. /// @@ -11,22 +10,22 @@ use std::rc::Rc; /// [crate::Settings::adjust_charset_on_meta_tag]). #[derive(Clone)] pub struct SharedEncoding { - encoding: Rc>, + encoding: Arc>, } impl SharedEncoding { pub fn new(encoding: AsciiCompatibleEncoding) -> SharedEncoding { SharedEncoding { - encoding: Rc::new(Cell::new(encoding)), + encoding: Arc::new(Mutex::new(encoding)), } } pub fn get(&self) -> &'static Encoding { - self.encoding.get().into() + (*self.encoding.lock().unwrap().deref()).into() } pub fn set(&self, encoding: AsciiCompatibleEncoding) { - self.encoding.set(encoding); + *self.encoding.lock().unwrap() = encoding; } } diff --git a/src/memory/arena.rs b/src/memory/arena.rs index 1cb32f2f..524cbd42 100644 --- a/src/memory/arena.rs +++ b/src/memory/arena.rs @@ -10,7 +10,7 @@ pub struct Arena { impl Arena { pub fn new(limiter: SharedMemoryLimiter, preallocated_size: usize) -> Self { - limiter.borrow_mut().preallocate(preallocated_size); + limiter.lock().unwrap().preallocate(preallocated_size); Arena { limiter, @@ -27,7 +27,7 @@ impl Arena { // NOTE: approximate usage, as `Vec::reserve_exact` doesn't // give guarantees about exact capacity value :). - self.limiter.borrow_mut().increase_usage(additional)?; + self.limiter.lock().unwrap().increase_usage(additional)?; // NOTE: with wicely choosen preallocated size this branch should be // executed quite rarely. We can't afford to use double capacity @@ -60,24 +60,24 @@ impl Arena { mod tests { use super::super::limiter::MemoryLimiter; use super::*; - use std::rc::Rc; + use std::sync::Arc; #[test] fn append() { let limiter = MemoryLimiter::new_shared(10); - let mut arena = Arena::new(Rc::clone(&limiter), 2); + let mut arena = Arena::new(Arc::clone(&limiter), 2); arena.append(&[1, 2]).unwrap(); assert_eq!(arena.bytes(), &[1, 2]); - assert_eq!(limiter.borrow().current_usage(), 2); + assert_eq!(limiter.lock().unwrap().current_usage(), 2); arena.append(&[3, 4]).unwrap(); assert_eq!(arena.bytes(), &[1, 2, 3, 4]); - assert_eq!(limiter.borrow().current_usage(), 4); + assert_eq!(limiter.lock().unwrap().current_usage(), 4); arena.append(&[5, 6, 7, 8, 9, 10]).unwrap(); assert_eq!(arena.bytes(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - assert_eq!(limiter.borrow().current_usage(), 10); + assert_eq!(limiter.lock().unwrap().current_usage(), 10); let err = arena.append(&[11]).unwrap_err(); @@ -87,23 +87,23 @@ mod tests { #[test] fn init_with() { let limiter = MemoryLimiter::new_shared(5); - let mut arena = Arena::new(Rc::clone(&limiter), 0); + let mut arena = Arena::new(Arc::clone(&limiter), 0); arena.init_with(&[1]).unwrap(); assert_eq!(arena.bytes(), &[1]); - assert_eq!(limiter.borrow().current_usage(), 1); + assert_eq!(limiter.lock().unwrap().current_usage(), 1); arena.append(&[1, 2]).unwrap(); assert_eq!(arena.bytes(), &[1, 1, 2]); - assert_eq!(limiter.borrow().current_usage(), 3); + assert_eq!(limiter.lock().unwrap().current_usage(), 3); arena.init_with(&[1, 2, 3]).unwrap(); assert_eq!(arena.bytes(), &[1, 2, 3]); - assert_eq!(limiter.borrow().current_usage(), 3); + assert_eq!(limiter.lock().unwrap().current_usage(), 3); arena.init_with(&[]).unwrap(); assert_eq!(arena.bytes(), &[]); - assert_eq!(limiter.borrow().current_usage(), 3); + assert_eq!(limiter.lock().unwrap().current_usage(), 3); let err = arena.init_with(&[1, 2, 3, 4, 5, 6, 7]).unwrap_err(); @@ -113,24 +113,24 @@ mod tests { #[test] fn shift() { let limiter = MemoryLimiter::new_shared(10); - let mut arena = Arena::new(Rc::clone(&limiter), 0); + let mut arena = Arena::new(Arc::clone(&limiter), 0); arena.append(&[0, 1, 2, 3]).unwrap(); arena.shift(2); assert_eq!(arena.bytes(), &[2, 3]); - assert_eq!(limiter.borrow().current_usage(), 4); + assert_eq!(limiter.lock().unwrap().current_usage(), 4); arena.append(&[0, 1]).unwrap(); assert_eq!(arena.bytes(), &[2, 3, 0, 1]); - assert_eq!(limiter.borrow().current_usage(), 4); + assert_eq!(limiter.lock().unwrap().current_usage(), 4); arena.shift(3); assert_eq!(arena.bytes(), &[1]); - assert_eq!(limiter.borrow().current_usage(), 4); + assert_eq!(limiter.lock().unwrap().current_usage(), 4); arena.append(&[2, 3, 4, 5]).unwrap(); arena.shift(1); assert_eq!(arena.bytes(), &[2, 3, 4, 5]); - assert_eq!(limiter.borrow().current_usage(), 5); + assert_eq!(limiter.lock().unwrap().current_usage(), 5); } } diff --git a/src/memory/limited_vec.rs b/src/memory/limited_vec.rs index a79f8340..e2f58584 100644 --- a/src/memory/limited_vec.rs +++ b/src/memory/limited_vec.rs @@ -21,7 +21,10 @@ impl LimitedVec { } pub fn push(&mut self, element: T) -> Result<(), MemoryLimitExceededError> { - self.limiter.borrow_mut().increase_usage(size_of::())?; + self.limiter + .lock() + .unwrap() + .increase_usage(size_of::())?; self.vec.push(element); Ok(()) } @@ -65,7 +68,8 @@ impl LimitedVec { }; self.limiter - .borrow_mut() + .lock() + .unwrap() .decrease_usage(size_of::() * (end - start)); self.vec.drain(range) @@ -92,7 +96,8 @@ impl Index for LimitedVec { impl Drop for LimitedVec { fn drop(&mut self) { self.limiter - .borrow_mut() + .lock() + .unwrap() .decrease_usage(size_of::() * self.vec.len()); } } @@ -101,33 +106,33 @@ impl Drop for LimitedVec { mod tests { use super::super::MemoryLimiter; use super::*; - use std::rc::Rc; + use std::sync::Arc; #[test] fn current_usage() { { let limiter = MemoryLimiter::new_shared(10); - let mut vec_u8: LimitedVec = LimitedVec::new(Rc::clone(&limiter)); + let mut vec_u8: LimitedVec = LimitedVec::new(Arc::clone(&limiter)); vec_u8.push(1).unwrap(); vec_u8.push(2).unwrap(); - assert_eq!(limiter.borrow().current_usage(), 2); + assert_eq!(limiter.lock().unwrap().current_usage(), 2); } { let limiter = MemoryLimiter::new_shared(10); - let mut vec_u32: LimitedVec = LimitedVec::new(Rc::clone(&limiter)); + let mut vec_u32: LimitedVec = LimitedVec::new(Arc::clone(&limiter)); vec_u32.push(1).unwrap(); vec_u32.push(2).unwrap(); - assert_eq!(limiter.borrow().current_usage(), 8); + assert_eq!(limiter.lock().unwrap().current_usage(), 8); } } #[test] fn max_limit() { let limiter = MemoryLimiter::new_shared(2); - let mut vector: LimitedVec = LimitedVec::new(Rc::clone(&limiter)); + let mut vector: LimitedVec = LimitedVec::new(Arc::clone(&limiter)); vector.push(1).unwrap(); vector.push(2).unwrap(); @@ -142,35 +147,35 @@ mod tests { let limiter = MemoryLimiter::new_shared(1); { - let mut vector: LimitedVec = LimitedVec::new(Rc::clone(&limiter)); + let mut vector: LimitedVec = LimitedVec::new(Arc::clone(&limiter)); vector.push(1).unwrap(); - assert_eq!(limiter.borrow().current_usage(), 1); + assert_eq!(limiter.lock().unwrap().current_usage(), 1); } - assert_eq!(limiter.borrow().current_usage(), 0); + assert_eq!(limiter.lock().unwrap().current_usage(), 0); } #[test] fn drain() { let limiter = MemoryLimiter::new_shared(10); - let mut vector: LimitedVec = LimitedVec::new(Rc::clone(&limiter)); + let mut vector: LimitedVec = LimitedVec::new(Arc::clone(&limiter)); vector.push(1).unwrap(); vector.push(2).unwrap(); vector.push(3).unwrap(); - assert_eq!(limiter.borrow().current_usage(), 3); + assert_eq!(limiter.lock().unwrap().current_usage(), 3); vector.drain(0..3); - assert_eq!(limiter.borrow().current_usage(), 0); + assert_eq!(limiter.lock().unwrap().current_usage(), 0); vector.push(1).unwrap(); vector.push(2).unwrap(); vector.push(3).unwrap(); vector.push(4).unwrap(); - assert_eq!(limiter.borrow().current_usage(), 4); + assert_eq!(limiter.lock().unwrap().current_usage(), 4); vector.drain(1..=2); - assert_eq!(limiter.borrow().current_usage(), 2); + assert_eq!(limiter.lock().unwrap().current_usage(), 2); } } diff --git a/src/memory/limiter.rs b/src/memory/limiter.rs index b3ebc9e2..012b872d 100644 --- a/src/memory/limiter.rs +++ b/src/memory/limiter.rs @@ -1,8 +1,7 @@ -use std::cell::RefCell; -use std::rc::Rc; +use std::sync::{Arc, Mutex}; use thiserror::Error; -pub type SharedMemoryLimiter = Rc>; +pub type SharedMemoryLimiter = Arc>; /// An error that occures when rewriter exceedes the memory limit specified in the /// [`MemorySettings`]. @@ -20,7 +19,7 @@ pub struct MemoryLimiter { impl MemoryLimiter { pub fn new_shared(max: usize) -> SharedMemoryLimiter { - Rc::new(RefCell::new(MemoryLimiter { + Arc::new(Mutex::new(MemoryLimiter { max, current_usage: 0, })) @@ -62,7 +61,7 @@ mod tests { #[test] fn current_usage() { let limiter = MemoryLimiter::new_shared(10); - let mut limiter = limiter.borrow_mut(); + let mut limiter = limiter.lock().unwrap(); assert_eq!(limiter.current_usage(), 0); @@ -86,7 +85,7 @@ mod tests { )] fn preallocate() { let limiter = MemoryLimiter::new_shared(10); - let mut limiter = limiter.borrow_mut(); + let mut limiter = limiter.lock().unwrap(); limiter.preallocate(8); assert_eq!(limiter.current_usage(), 8); diff --git a/src/parser/lexer/actions.rs b/src/parser/lexer/actions.rs index e5389a75..e438b6cc 100644 --- a/src/parser/lexer/actions.rs +++ b/src/parser/lexer/actions.rs @@ -79,7 +79,7 @@ impl StateMachineActions for Lexer { } = lexeme.token_outline { self.last_start_tag_name_hash = name_hash; - *ns = self.tree_builder_simulator.borrow().current_ns(); + *ns = self.tree_builder_simulator.lock().unwrap().current_ns(); } match self @@ -122,13 +122,13 @@ impl StateMachineActions for Lexer { #[inline] fn create_start_tag(&mut self, _input: &[u8]) { - self.attr_buffer.borrow_mut().clear(); + self.attr_buffer.lock().unwrap().clear(); self.current_tag_token = Some(StartTag { name: Range::default(), name_hash: LocalNameHash::new(), ns: Namespace::default(), - attributes: Rc::clone(&self.attr_buffer), + attributes: Arc::clone(&self.attr_buffer), self_closing: false, }); } @@ -295,7 +295,7 @@ impl StateMachineActions for Lexer { #[inline] fn finish_attr(&mut self, _input: &[u8]) { if let Some(attr) = self.current_attr.take() { - self.attr_buffer.borrow_mut().push(attr); + self.attr_buffer.lock().unwrap().push(attr); } } diff --git a/src/parser/lexer/lexeme/token_outline.rs b/src/parser/lexer/lexeme/token_outline.rs index f1ad1765..e1ed0de8 100644 --- a/src/parser/lexer/lexeme/token_outline.rs +++ b/src/parser/lexer/lexeme/token_outline.rs @@ -57,7 +57,7 @@ impl Align for TagTokenOutline { name, attributes, .. } => { name.align(offset); - attributes.borrow_mut().align(offset); + attributes.lock().unwrap().align(offset); } TagTokenOutline::EndTag { name, .. } => name.align(offset), } diff --git a/src/parser/lexer/mod.rs b/src/parser/lexer/mod.rs index 491e6c15..bbe60b77 100644 --- a/src/parser/lexer/mod.rs +++ b/src/parser/lexer/mod.rs @@ -4,6 +4,7 @@ mod actions; mod conditions; mod lexeme; +pub use self::lexeme::*; use crate::base::{Align, Range}; use crate::html::{LocalNameHash, Namespace, TextType}; use crate::parser::state_machine::{ @@ -13,10 +14,7 @@ use crate::parser::{ ParserDirective, ParsingAmbiguityError, TreeBuilderFeedback, TreeBuilderSimulator, }; use crate::rewriter::RewritingError; -use std::cell::RefCell; -use std::rc::Rc; - -pub use self::lexeme::*; +use std::sync::{Arc, Mutex}; const DEFAULT_ATTR_BUFFER_CAPACITY: usize = 256; @@ -29,7 +27,7 @@ pub trait LexemeSink { } pub type State = fn(&mut Lexer, &[u8]) -> StateResult; -pub type SharedAttributeBuffer = Rc>>; +pub type SharedAttributeBuffer = Arc>>; pub struct Lexer { next_pos: usize, @@ -46,13 +44,13 @@ pub struct Lexer { last_start_tag_name_hash: LocalNameHash, closing_quote: u8, attr_buffer: SharedAttributeBuffer, - tree_builder_simulator: Rc>, + tree_builder_simulator: Arc>, last_text_type: TextType, feedback_directive: FeedbackDirective, } impl Lexer { - pub fn new(lexeme_sink: S, tree_builder_simulator: Rc>) -> Self { + pub fn new(lexeme_sink: S, tree_builder_simulator: Arc>) -> Self { Lexer { next_pos: 0, is_last_input: false, @@ -67,9 +65,7 @@ impl Lexer { current_attr: None, last_start_tag_name_hash: LocalNameHash::default(), closing_quote: b'"', - attr_buffer: Rc::new(RefCell::new(Vec::with_capacity( - DEFAULT_ATTR_BUFFER_CAPACITY, - ))), + attr_buffer: Arc::new(Mutex::new(Vec::with_capacity(DEFAULT_ATTR_BUFFER_CAPACITY))), tree_builder_simulator, last_text_type: TextType::Data, feedback_directive: FeedbackDirective::None, @@ -84,7 +80,7 @@ impl Lexer { FeedbackDirective::ApplyUnhandledFeedback(feedback) => Some(feedback), FeedbackDirective::Skip => None, FeedbackDirective::None => Some({ - let mut simulator = self.tree_builder_simulator.borrow_mut(); + let mut simulator = self.tree_builder_simulator.lock().unwrap(); match *token { TagTokenOutline::StartTag { name_hash, .. } => { @@ -103,7 +99,7 @@ impl Lexer { TreeBuilderFeedback::SwitchTextType(text_type) => self.set_last_text_type(text_type), TreeBuilderFeedback::SetAllowCdata(cdata_allowed) => self.cdata_allowed = cdata_allowed, TreeBuilderFeedback::RequestLexeme(mut callback) => { - let feedback = callback(&mut self.tree_builder_simulator.borrow_mut(), lexeme); + let feedback = callback(&mut self.tree_builder_simulator.lock().unwrap(), lexeme); self.handle_tree_builder_feedback(feedback, lexeme); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8da6f7c2..e7d45573 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6,21 +6,19 @@ mod tag_scanner; mod tree_builder_simulator; use self::lexer::Lexer; -use self::state_machine::{ActionError, ParsingTermination, StateMachine}; -use self::tag_scanner::TagScanner; -use self::tree_builder_simulator::{TreeBuilderFeedback, TreeBuilderSimulator}; -use crate::html::{LocalName, Namespace}; -use crate::rewriter::RewritingError; -use cfg_if::cfg_if; -use std::cell::RefCell; -use std::rc::Rc; - pub use self::lexer::{ AttributeOutline, Lexeme, LexemeSink, NonTagContentLexeme, NonTagContentTokenOutline, SharedAttributeBuffer, TagLexeme, TagTokenOutline, }; +use self::state_machine::{ActionError, ParsingTermination, StateMachine}; pub use self::tag_scanner::TagHintSink; +use self::tag_scanner::TagScanner; pub use self::tree_builder_simulator::ParsingAmbiguityError; +use self::tree_builder_simulator::{TreeBuilderFeedback, TreeBuilderSimulator}; +use crate::html::{LocalName, Namespace}; +use crate::rewriter::RewritingError; +use cfg_if::cfg_if; +use std::sync::{Arc, Mutex}; // NOTE: tag scanner can implicitly force parser to switch to // the lexer mode if it fails to get tree builder feedback. It's up @@ -32,10 +30,10 @@ pub enum ParserDirective { Lex, } -impl LexemeSink for Rc> { +impl LexemeSink for Arc> { #[inline] fn handle_tag(&mut self, lexeme: &TagLexeme) -> Result { - self.borrow_mut().handle_tag(lexeme) + self.lock().unwrap().handle_tag(lexeme) } #[inline] @@ -43,31 +41,31 @@ impl LexemeSink for Rc> { &mut self, lexeme: &NonTagContentLexeme, ) -> Result<(), RewritingError> { - self.borrow_mut().handle_non_tag_content(lexeme) + self.lock().unwrap().handle_non_tag_content(lexeme) } } -impl TagHintSink for Rc> { +impl TagHintSink for Arc> { #[inline] fn handle_start_tag_hint( &mut self, name: LocalName, ns: Namespace, ) -> Result { - self.borrow_mut().handle_start_tag_hint(name, ns) + self.lock().unwrap().handle_start_tag_hint(name, ns) } #[inline] fn handle_end_tag_hint(&mut self, name: LocalName) -> Result { - self.borrow_mut().handle_end_tag_hint(name) + self.lock().unwrap().handle_end_tag_hint(name) } } pub trait ParserOutputSink: LexemeSink + TagHintSink {} pub struct Parser { - lexer: Lexer>>, - tag_scanner: TagScanner>>, + lexer: Lexer>>, + tag_scanner: TagScanner>>, current_directive: ParserDirective, } @@ -85,17 +83,17 @@ macro_rules! with_current_sm { impl Parser { pub fn new( - output_sink: &Rc>, + output_sink: &Arc>, initial_directive: ParserDirective, strict: bool, ) -> Self { - let tree_builder_simulator = Rc::new(RefCell::new(TreeBuilderSimulator::new(strict))); + let tree_builder_simulator = Arc::new(Mutex::new(TreeBuilderSimulator::new(strict))); Parser { - lexer: Lexer::new(Rc::clone(output_sink), Rc::clone(&tree_builder_simulator)), + lexer: Lexer::new(Arc::clone(output_sink), Arc::clone(&tree_builder_simulator)), tag_scanner: TagScanner::new( - Rc::clone(output_sink), - Rc::clone(&tree_builder_simulator), + Arc::clone(output_sink), + Arc::clone(&tree_builder_simulator), ), current_directive: initial_directive, } diff --git a/src/parser/tag_scanner/mod.rs b/src/parser/tag_scanner/mod.rs index ec60f836..67a88a4d 100644 --- a/src/parser/tag_scanner/mod.rs +++ b/src/parser/tag_scanner/mod.rs @@ -9,9 +9,8 @@ use crate::parser::{ ParserDirective, ParsingAmbiguityError, TreeBuilderFeedback, TreeBuilderSimulator, }; use crate::rewriter::RewritingError; -use std::cell::RefCell; use std::cmp::min; -use std::rc::Rc; +use std::sync::{Arc, Mutex}; pub trait TagHintSink { fn handle_start_tag_hint( @@ -49,16 +48,13 @@ pub struct TagScanner { tag_hint_sink: S, state: State, closing_quote: u8, - tree_builder_simulator: Rc>, + tree_builder_simulator: Arc>, pending_text_type_change: Option, last_text_type: TextType, } impl TagScanner { - pub fn new( - tag_hint_sink: S, - tree_builder_simulator: Rc>, - ) -> Self { + pub fn new(tag_hint_sink: S, tree_builder_simulator: Arc>) -> Self { TagScanner { next_pos: 0, is_last_input: false, @@ -99,7 +95,7 @@ impl TagScanner { } else { self.last_start_tag_name_hash = self.tag_name_hash; - let ns = self.tree_builder_simulator.borrow_mut().current_ns(); + let ns = self.tree_builder_simulator.lock().unwrap().current_ns(); self.tag_hint_sink.handle_start_tag_hint(name, ns) } @@ -109,7 +105,7 @@ impl TagScanner { fn try_apply_tree_builder_feedback( &mut self, ) -> Result, ParsingAmbiguityError> { - let mut tree_builder_simulator = self.tree_builder_simulator.borrow_mut(); + let mut tree_builder_simulator = self.tree_builder_simulator.lock().unwrap(); let feedback = if self.is_in_end_tag { tree_builder_simulator.get_feedback_for_end_tag(self.tag_name_hash) diff --git a/src/parser/tree_builder_simulator/mod.rs b/src/parser/tree_builder_simulator/mod.rs index d224aa78..9792d1a5 100644 --- a/src/parser/tree_builder_simulator/mod.rs +++ b/src/parser/tree_builder_simulator/mod.rs @@ -28,7 +28,9 @@ pub enum TreeBuilderFeedback { SwitchTextType(TextType), SetAllowCdata(bool), #[allow(clippy::type_complexity)] - RequestLexeme(Box TreeBuilderFeedback>), + RequestLexeme( + Box TreeBuilderFeedback + Sync + Send>, + ), None, } @@ -41,7 +43,10 @@ impl From for TreeBuilderFeedback { #[inline] fn request_lexeme( - callback: impl FnMut(&mut TreeBuilderSimulator, &TagLexeme) -> TreeBuilderFeedback + 'static, + callback: impl FnMut(&mut TreeBuilderSimulator, &TagLexeme) -> TreeBuilderFeedback + + 'static + + Send + + Sync, ) -> TreeBuilderFeedback { TreeBuilderFeedback::RequestLexeme(Box::new(callback)) } @@ -250,7 +255,7 @@ impl TreeBuilderSimulator { // to decide on foreign context exit return request_lexeme(|this, lexeme| { expect_tag!(lexeme, StartTag { ref attributes, .. } => { - for attr in attributes.borrow().iter() { + for attr in attributes.lock().unwrap().iter() { let name = lexeme.part(attr.name); if eq_case_insensitive(&name, b"color") @@ -279,7 +284,7 @@ impl TreeBuilderSimulator { let name = lexeme.part(name); if !self_closing && eq_case_insensitive(&name, b"annotation-xml") { - for attr in attributes.borrow().iter() { + for attr in attributes.lock().unwrap().iter() { let name = lexeme.part(attr.name); let value = lexeme.part(attr.value); diff --git a/src/rewritable_units/document_end.rs b/src/rewritable_units/document_end.rs index daa5af2c..49b58bf3 100644 --- a/src/rewritable_units/document_end.rs +++ b/src/rewritable_units/document_end.rs @@ -64,7 +64,7 @@ mod tests { fn rewrite_on_end( html: &[u8], encoding: &'static Encoding, - mut handler: impl FnMut(&mut DocumentEnd), + mut handler: impl FnMut(&mut DocumentEnd) + Send, ) -> String { let mut handler_called = false; diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index ed3bb295..854f05f4 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -595,7 +595,7 @@ mod tests { html: &[u8], encoding: &'static Encoding, selector: &str, - mut handler: impl FnMut(&mut Element), + mut handler: impl FnMut(&mut Element) + Send, ) -> String { let mut handler_called = false; diff --git a/src/rewritable_units/tokens/attributes.rs b/src/rewritable_units/tokens/attributes.rs index 8ad8d11b..083d8893 100644 --- a/src/rewritable_units/tokens/attributes.rs +++ b/src/rewritable_units/tokens/attributes.rs @@ -196,7 +196,8 @@ impl<'i> Attributes<'i> { fn init_items(&self) -> Vec> { self.attribute_buffer - .borrow() + .lock() + .unwrap() .iter() .map(|a| { Attribute::new( @@ -228,7 +229,7 @@ impl<'i> Attributes<'i> { #[cfg(test)] pub fn raw_attributes(&self) -> (&'i Bytes<'i>, SharedAttributeBuffer) { - (self.input, std::rc::Rc::clone(&self.attribute_buffer)) + (self.input, std::sync::Arc::clone(&self.attribute_buffer)) } } diff --git a/src/rewritable_units/tokens/capturer/to_token.rs b/src/rewritable_units/tokens/capturer/to_token.rs index 70a2923e..dbecb493 100644 --- a/src/rewritable_units/tokens/capturer/to_token.rs +++ b/src/rewritable_units/tokens/capturer/to_token.rs @@ -2,7 +2,7 @@ use super::*; use crate::html::TextType; use crate::parser::{NonTagContentLexeme, NonTagContentTokenOutline, TagLexeme, TagTokenOutline}; use encoding_rs::Encoding; -use std::rc::Rc; +use std::sync::Arc; pub enum ToTokenResult<'i> { Token(Box>), @@ -44,7 +44,7 @@ impl ToToken for TagLexeme<'_> { StartTag::new_token( self.part(name), - Attributes::new(self.input(), Rc::clone(attributes), encoding), + Attributes::new(self.input(), Arc::clone(attributes), encoding), ns, self_closing, self.raw(), diff --git a/src/rewritable_units/tokens/comment.rs b/src/rewritable_units/tokens/comment.rs index 4af85822..3c6025d0 100644 --- a/src/rewritable_units/tokens/comment.rs +++ b/src/rewritable_units/tokens/comment.rs @@ -216,7 +216,7 @@ mod tests { fn rewrite_comment( html: &[u8], encoding: &'static Encoding, - mut handler: impl FnMut(&mut Comment), + mut handler: impl FnMut(&mut Comment) + Send, ) -> String { let mut handler_called = false; diff --git a/src/rewritable_units/tokens/doctype.rs b/src/rewritable_units/tokens/doctype.rs index 3e3fb27c..81e80d10 100644 --- a/src/rewritable_units/tokens/doctype.rs +++ b/src/rewritable_units/tokens/doctype.rs @@ -136,7 +136,7 @@ mod tests { fn rewrite_doctype( html: &[u8], encoding: &'static Encoding, - mut handler: impl FnMut(&mut Doctype), + mut handler: impl FnMut(&mut Doctype) + Send, ) -> String { let mut handler_called = false; diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index 7d24f332..da3ac66e 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -297,7 +297,7 @@ mod tests { fn rewrite_text_chunk( html: &[u8], encoding: &'static Encoding, - mut handler: impl FnMut(&mut TextChunk), + mut handler: impl FnMut(&mut TextChunk) + Send, ) -> String { let mut handler_called = false; diff --git a/src/rewriter/mod.rs b/src/rewriter/mod.rs index 0cb733b8..6821bb16 100644 --- a/src/rewriter/mod.rs +++ b/src/rewriter/mod.rs @@ -17,7 +17,7 @@ use mime::Mime; use std::borrow::Cow; use std::error::Error as StdError; use std::fmt::{self, Debug}; -use std::rc::Rc; +use std::sync::Arc; use thiserror::Error; pub use self::settings::*; @@ -198,7 +198,7 @@ impl<'h, O: OutputSink> HtmlRewriter<'h, O> { Some(SelectorMatchingVm::new( selectors_ast, settings.encoding.into(), - Rc::clone(&memory_limiter), + Arc::clone(&memory_limiter), settings.enable_esi_tags, )) } else { @@ -344,9 +344,8 @@ mod tests { use crate::test_utils::{Output, ASCII_COMPATIBLE_ENCODINGS, NON_ASCII_COMPATIBLE_ENCODINGS}; use encoding_rs::Encoding; use itertools::Itertools; - use std::cell::RefCell; use std::convert::TryInto; - use std::rc::Rc; + use std::sync::Mutex; fn write_chunks( mut rewriter: HtmlRewriter, @@ -373,6 +372,23 @@ mod tests { out } + // WIP! statis assert + #[test] + fn foobar() { + struct Foo {} + impl OutputSink for Foo { + fn handle_chunk(&mut self, _: &[u8]) {} + } + + fn is_send() {} + + is_send::>(); + + fn is_sync() {} + + is_sync::>(); + } + #[test] fn rewrite_html_str() { let res = rewrite_str( @@ -573,15 +589,15 @@ mod tests { #[test] fn handler_invocation_order() { - let handlers_executed = Rc::new(RefCell::new(Vec::default())); + let handlers_executed = Arc::new(Mutex::new(Vec::default())); macro_rules! create_handlers { ($sel:expr, $idx:expr) => { element!($sel, { - let handlers_executed = Rc::clone(&handlers_executed); + let handlers_executed = std::sync::Arc::clone(&handlers_executed); move |_| { - handlers_executed.borrow_mut().push($idx); + handlers_executed.lock().unwrap().push($idx); Ok(()) } }) @@ -603,7 +619,7 @@ mod tests { ) .unwrap(); - assert_eq!(*handlers_executed.borrow(), vec![0, 1, 2, 3, 4]); + assert_eq!(*handlers_executed.lock().unwrap(), vec![0, 1, 2, 3, 4]); } #[test] diff --git a/src/rewriter/rewrite_controller.rs b/src/rewriter/rewrite_controller.rs index 1ba76bb5..3d956f1d 100644 --- a/src/rewriter/rewrite_controller.rs +++ b/src/rewriter/rewrite_controller.rs @@ -5,8 +5,7 @@ use crate::rewritable_units::{DocumentEnd, Token, TokenCaptureFlags}; use crate::selectors_vm::{AuxStartTagInfoRequest, ElementData, SelectorMatchingVm, VmError}; use crate::transform_stream::*; use hashbrown::HashSet; -use std::cell::RefCell; -use std::rc::Rc; +use std::sync::{Arc, Mutex}; #[derive(Default)] pub struct ElementDescriptor { @@ -25,7 +24,7 @@ impl ElementData for ElementDescriptor { } pub struct HtmlRewriteController<'h> { - handlers_dispatcher: Rc>>, + handlers_dispatcher: Arc>>, selector_matching_vm: Option>, } @@ -36,7 +35,7 @@ impl<'h> HtmlRewriteController<'h> { selector_matching_vm: Option>, ) -> Self { HtmlRewriteController { - handlers_dispatcher: Rc::new(RefCell::new(handlers_dispatcher)), + handlers_dispatcher: Arc::new(Mutex::new(handlers_dispatcher)), selector_matching_vm, } } @@ -46,9 +45,9 @@ impl<'h> HtmlRewriteController<'h> { // when we hold a mutable reference for the selector matching VM. macro_rules! create_match_handler { ($self:tt) => {{ - let handlers_dispatcher = Rc::clone(&$self.handlers_dispatcher); + let handlers_dispatcher = Arc::clone(&$self.handlers_dispatcher); - move |m| handlers_dispatcher.borrow_mut().start_matching(m) + move |m| handlers_dispatcher.lock().unwrap().start_matching(m) }}; } @@ -73,7 +72,10 @@ impl<'h> HtmlRewriteController<'h> { #[inline] fn get_capture_flags(&self) -> TokenCaptureFlags { - self.handlers_dispatcher.borrow().get_token_capture_flags() + self.handlers_dispatcher + .lock() + .unwrap() + .get_token_capture_flags() } } @@ -108,10 +110,10 @@ impl TransformController for HtmlRewriteController<'_> { fn handle_end_tag(&mut self, local_name: LocalName) -> TokenCaptureFlags { if let Some(ref mut vm) = self.selector_matching_vm { - let handlers_dispatcher = Rc::clone(&self.handlers_dispatcher); + let handlers_dispatcher = Arc::clone(&self.handlers_dispatcher); vm.exec_for_end_tag(local_name, move |elem_desc| { - handlers_dispatcher.borrow_mut().stop_matching(elem_desc); + handlers_dispatcher.lock().unwrap().stop_matching(elem_desc); }); } @@ -126,14 +128,16 @@ impl TransformController for HtmlRewriteController<'_> { .and_then(SelectorMatchingVm::current_element_data_mut); self.handlers_dispatcher - .borrow_mut() + .lock() + .unwrap() .handle_token(token, current_element_data) .map_err(RewritingError::ContentHandlerError) } fn handle_end(&mut self, document_end: &mut DocumentEnd) -> Result<(), RewritingError> { self.handlers_dispatcher - .borrow_mut() + .lock() + .unwrap() .handle_end(document_end) .map_err(RewritingError::ContentHandlerError) } @@ -142,7 +146,8 @@ impl TransformController for HtmlRewriteController<'_> { fn should_emit_content(&self) -> bool { !self .handlers_dispatcher - .borrow() + .lock() + .unwrap() .has_matched_elements_with_removed_content() } } diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index 89d1cf55..c970f64b 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -10,17 +10,17 @@ pub type HandlerResult = Result<(), Box>; /// Handler for the [document type declaration]. /// /// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype -pub type DoctypeHandler<'h> = Box HandlerResult + 'h>; +pub type DoctypeHandler<'h> = Box HandlerResult + 'h + Send>; /// Handler for HTML comments. -pub type CommentHandler<'h> = Box HandlerResult + 'h>; +pub type CommentHandler<'h> = Box HandlerResult + 'h + Send>; /// Handler for text chunks present the HTML. -pub type TextHandler<'h> = Box HandlerResult + 'h>; +pub type TextHandler<'h> = Box HandlerResult + 'h + Send>; /// Handler for elements matched by a selector. -pub type ElementHandler<'h> = Box HandlerResult + 'h>; +pub type ElementHandler<'h> = Box HandlerResult + 'h + Send>; /// Handler for an end tag. -pub type EndTagHandler<'h> = Box HandlerResult + 'h>; +pub type EndTagHandler<'h> = Box HandlerResult + 'h + Send>; /// Handler for the document end, which is called after the last chunk is processed. -pub type EndHandler<'h> = Box HandlerResult + 'h>; +pub type EndHandler<'h> = Box HandlerResult + 'h + Send>; /// Specifies element content handlers associated with a selector. #[derive(Default)] @@ -36,7 +36,10 @@ pub struct ElementContentHandlers<'h> { impl<'h> ElementContentHandlers<'h> { /// Sets a handler for elements matched by a selector. #[inline] - pub fn element(mut self, handler: impl FnMut(&mut Element) -> HandlerResult + 'h) -> Self { + pub fn element( + mut self, + handler: impl FnMut(&mut Element) -> HandlerResult + 'h + Send, + ) -> Self { self.element = Some(Box::new(handler)); self @@ -44,7 +47,10 @@ impl<'h> ElementContentHandlers<'h> { /// Sets a handler for HTML comments in the inner content of elements matched by a selector. #[inline] - pub fn comments(mut self, handler: impl FnMut(&mut Comment) -> HandlerResult + 'h) -> Self { + pub fn comments( + mut self, + handler: impl FnMut(&mut Comment) -> HandlerResult + 'h + Send, + ) -> Self { self.comments = Some(Box::new(handler)); self @@ -52,7 +58,10 @@ impl<'h> ElementContentHandlers<'h> { /// Sets a handler for text chunks in the inner content of elements matched by a selector. #[inline] - pub fn text(mut self, handler: impl FnMut(&mut TextChunk) -> HandlerResult + 'h) -> Self { + pub fn text( + mut self, + handler: impl FnMut(&mut TextChunk) -> HandlerResult + 'h + Send, + ) -> Self { self.text = Some(Box::new(handler)); self @@ -91,7 +100,10 @@ impl<'h> DocumentContentHandlers<'h> { /// /// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype #[inline] - pub fn doctype(mut self, handler: impl FnMut(&mut Doctype) -> HandlerResult + 'h) -> Self { + pub fn doctype( + mut self, + handler: impl FnMut(&mut Doctype) -> HandlerResult + 'h + Send, + ) -> Self { self.doctype = Some(Box::new(handler)); self @@ -99,7 +111,10 @@ impl<'h> DocumentContentHandlers<'h> { /// Sets a handler for all HTML comments present in the input HTML markup. #[inline] - pub fn comments(mut self, handler: impl FnMut(&mut Comment) -> HandlerResult + 'h) -> Self { + pub fn comments( + mut self, + handler: impl FnMut(&mut Comment) -> HandlerResult + 'h + Send, + ) -> Self { self.comments = Some(Box::new(handler)); self @@ -107,7 +122,10 @@ impl<'h> DocumentContentHandlers<'h> { /// Sets a handler for all text chunks present in the input HTML markup. #[inline] - pub fn text(mut self, handler: impl FnMut(&mut TextChunk) -> HandlerResult + 'h) -> Self { + pub fn text( + mut self, + handler: impl FnMut(&mut TextChunk) -> HandlerResult + 'h + Send, + ) -> Self { self.text = Some(Box::new(handler)); self @@ -115,7 +133,10 @@ impl<'h> DocumentContentHandlers<'h> { /// Sets a handler for the document end, which is called after the last chunk is processed. #[inline] - pub fn end(mut self, handler: impl FnMut(&mut DocumentEnd) -> HandlerResult + 'h) -> Self { + pub fn end( + mut self, + handler: impl FnMut(&mut DocumentEnd) -> HandlerResult + 'h + Send, + ) -> Self { self.end = Some(Box::new(handler)); self @@ -418,7 +439,7 @@ impl Default for MemorySettings { fn default() -> Self { MemorySettings { preallocated_parsing_buffer_size: 1024, - max_allowed_memory_usage: std::usize::MAX, + max_allowed_memory_usage: usize::MAX, } } } diff --git a/src/selectors_vm/attribute_matcher.rs b/src/selectors_vm/attribute_matcher.rs index 3dd3cecc..d85b0e26 100644 --- a/src/selectors_vm/attribute_matcher.rs +++ b/src/selectors_vm/attribute_matcher.rs @@ -43,7 +43,8 @@ impl<'i> AttributeMatcher<'i> { #[inline] fn find(&self, lowercased_name: &Bytes) -> Option { self.attributes - .borrow() + .lock() + .unwrap() .iter() .find(|a| { if lowercased_name.len() != a.name.end - a.name.start { diff --git a/src/selectors_vm/compiler.rs b/src/selectors_vm/compiler.rs index c1558a2b..be4d1d46 100644 --- a/src/selectors_vm/compiler.rs +++ b/src/selectors_vm/compiler.rs @@ -13,9 +13,9 @@ use std::hash::Hash; use std::iter; /// An expression using only the tag name of an element. -pub type CompiledLocalNameExpr = Box bool>; +pub type CompiledLocalNameExpr = Box bool + Send>; /// An expression using the attributes of an element. -pub type CompiledAttributeExpr = Box bool>; +pub type CompiledAttributeExpr = Box bool + Send>; #[derive(Default)] struct ExprSet { @@ -31,7 +31,7 @@ pub struct AttrExprOperands { impl Expr { #[inline] - pub fn compile_expr bool + 'static>( + pub fn compile_expr bool + 'static + Send>( &self, f: F, ) -> CompiledLocalNameExpr { @@ -92,7 +92,7 @@ impl Compilable for Expr { impl Expr { #[inline] - pub fn compile_expr bool + 'static>( + pub fn compile_expr bool + 'static + Send>( &self, f: F, ) -> CompiledAttributeExpr { diff --git a/src/selectors_vm/mod.rs b/src/selectors_vm/mod.rs index 61c9f8ca..706b5368 100644 --- a/src/selectors_vm/mod.rs +++ b/src/selectors_vm/mod.rs @@ -28,10 +28,11 @@ pub struct MatchInfo

{ pub type AuxStartTagInfoRequest = Box< dyn FnOnce( - &mut SelectorMatchingVm, - AuxStartTagInfo, - &mut dyn FnMut(MatchInfo

), - ) -> Result<(), MemoryLimitExceededError>, + &mut SelectorMatchingVm, + AuxStartTagInfo, + &mut dyn FnMut(MatchInfo

), + ) -> Result<(), MemoryLimitExceededError> + + Send, >; pub enum VmError { @@ -143,7 +144,10 @@ pub struct SelectorMatchingVm { enable_esi_tags: bool, } -impl SelectorMatchingVm { +impl SelectorMatchingVm +where + E: Send, +{ #[inline] pub fn new( ast: Ast, @@ -238,7 +242,7 @@ impl SelectorMatchingVm { Ok(()) } - fn bailout( + fn bailout( ctx: ExecutionCtx, bailout: Bailout, recovery_point_handler: RecoveryPointHandler, diff --git a/src/transform_stream/dispatcher.rs b/src/transform_stream/dispatcher.rs index bd624719..c4b2c655 100644 --- a/src/transform_stream/dispatcher.rs +++ b/src/transform_stream/dispatcher.rs @@ -9,7 +9,6 @@ use crate::rewritable_units::{ DocumentEnd, Serialize, ToToken, Token, TokenCaptureFlags, TokenCapturer, TokenCapturerEvent, }; use crate::rewriter::RewritingError; -use std::rc::Rc; use TagTokenOutline::*; @@ -19,8 +18,9 @@ pub struct AuxStartTagInfo<'i> { pub self_closing: bool, } -type AuxStartTagInfoRequest = - Box) -> Result>; +type AuxStartTagInfoRequest = Box< + dyn FnOnce(&mut C, AuxStartTagInfo<'_>) -> Result + Send, +>; pub enum DispatcherError { InfoRequest(AuxStartTagInfoRequest), @@ -189,7 +189,7 @@ where &mut self.transform_controller, AuxStartTagInfo { input, - attr_buffer: Rc::clone($attributes), + attr_buffer: Arc::clone($attributes), self_closing: $self_closing, }, ) diff --git a/src/transform_stream/mod.rs b/src/transform_stream/mod.rs index 4e8257dc..4f3a9023 100644 --- a/src/transform_stream/mod.rs +++ b/src/transform_stream/mod.rs @@ -1,16 +1,14 @@ mod dispatcher; use self::dispatcher::Dispatcher; +pub use self::dispatcher::{ + AuxStartTagInfo, DispatcherError, OutputSink, StartTagHandlingResult, TransformController, +}; use crate::base::SharedEncoding; use crate::memory::{Arena, SharedMemoryLimiter}; use crate::parser::{Parser, ParserDirective, SharedAttributeBuffer}; use crate::rewriter::RewritingError; -use std::cell::RefCell; -use std::rc::Rc; - -pub use self::dispatcher::{ - AuxStartTagInfo, DispatcherError, OutputSink, StartTagHandlingResult, TransformController, -}; +use std::sync::{Arc, Mutex}; pub struct TransformStreamSettings where @@ -30,7 +28,7 @@ where C: TransformController, O: OutputSink, { - dispatcher: Rc>>, + dispatcher: Arc>>, parser: Parser>, buffer: Arena, has_buffered_data: bool, @@ -52,7 +50,7 @@ where ParserDirective::Lex }; - let dispatcher = Rc::new(RefCell::new(Dispatcher::new( + let dispatcher = Arc::new(Mutex::new(Dispatcher::new( settings.transform_controller, settings.output_sink, settings.encoding, @@ -111,7 +109,8 @@ where let consumed_byte_count = self.parser.parse(chunk, false)?; self.dispatcher - .borrow_mut() + .lock() + .unwrap() .flush_remaining_input(chunk, consumed_byte_count); if consumed_byte_count < chunk.len() { @@ -135,7 +134,7 @@ where trace!(@chunk chunk); self.parser.parse(chunk, true)?; - self.dispatcher.borrow_mut().finish(chunk) + self.dispatcher.lock().unwrap().finish(chunk) } #[cfg(feature = "integration_test")]