diff --git a/Cargo.toml b/Cargo.toml index cc0023bd..16f60c38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,7 +94,7 @@ harness = false default = ["embedded-domain-resolver", "full-regex-handling", "single-thread"] full-regex-handling = [] single-thread = [] # disables `Send` and `Sync` on `Engine`. -regex-debug-info = [] +debug-info = [] css-validation = ["cssparser", "selectors"] content-blocking = [] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. diff --git a/js/src/lib.rs b/js/src/lib.rs index ee8a7add..1a65c986 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -323,18 +323,17 @@ fn engine_clear_tags(mut cx: FunctionContext) -> JsResult { Ok(JsNull::new(&mut cx)) } -fn engine_add_resource(mut cx: FunctionContext) -> JsResult { +fn engine_use_resources_list(mut cx: FunctionContext) -> JsResult { let this = cx.argument::>(0)?; - - let resource_arg = cx.argument::(1)?; - let resource: Resource = json_ffi::from_js(&mut cx, resource_arg)?; - - let success = if let Ok(mut engine) = this.0.lock() { - engine.add_resource(resource).is_ok() + let resources_arg = cx.argument::(1)?; + let resources: Vec = json_ffi::from_js(&mut cx, resources_arg)?; + if let Ok(mut engine) = this.0.lock() { + let resource_storage = adblock::resources::ResourceStorage::from_resources(resources); + engine.use_resource_storage(resource_storage) } else { cx.throw_error("Failed to acquire lock on engine")? }; - Ok(cx.boolean(success)) + Ok(JsNull::new(&mut cx)) } fn validate_request(mut cx: FunctionContext) -> JsResult { @@ -424,7 +423,7 @@ register_module!(mut m, { m.export_function("Engine_useResources", engine_use_resources)?; m.export_function("Engine_tagExists", engine_tag_exists)?; m.export_function("Engine_clearTags", engine_clear_tags)?; - m.export_function("Engine_addResource", engine_add_resource)?; + m.export_function("Engine_useResourcesList", engine_use_resources_list)?; m.export_function("validateRequest", validate_request)?; m.export_function("uBlockResources", ublock_resources)?; diff --git a/src/blocker.rs b/src/blocker.rs index d83861bf..dfb8d62d 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -487,13 +487,13 @@ impl Blocker { regex_manager.set_discard_policy(new_discard_policy); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&self, regex_id: u64) { let mut regex_manager = self.borrow_regex_manager(); regex_manager.discard_regex(regex_id); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { let regex_manager = self.borrow_regex_manager(); regex_manager.get_debug_info() diff --git a/src/engine.rs b/src/engine.rs index bc99908b..0050e04e 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -14,7 +14,7 @@ use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::lists::{FilterSet, ParseOptions}; use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; -use crate::resources::{Resource, ResourceStorage}; +use crate::resources::{Resource, ResourceStorage, ResourceStorageRef}; use std::collections::HashSet; @@ -54,10 +54,18 @@ use std::collections::HashSet; pub struct Engine { blocker: Blocker, cosmetic_cache: CosmeticFilterCache, - resources: ResourceStorage, + resources: ResourceStorageRef, filter_data_context: FilterDataContextRef, } +#[cfg(feature = "debug-info")] +pub struct EngineDebugInfo { + pub regex_debug_info: crate::regex_manager::RegexDebugInfo, + + pub flatbuffer_size: usize, + pub resources_total_length: usize, +} + impl Default for Engine { fn default() -> Self { Self::from_filter_set(FilterSet::new(false), false) @@ -122,7 +130,7 @@ impl Engine { cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone( &filter_data_context, )), - resources: ResourceStorage::default(), + resources: ResourceStorage::from_resources(vec![]), filter_data_context, } } @@ -188,16 +196,20 @@ impl Engine { } /// Sets this engine's resources to be _only_ the ones provided in `resources`. + /// Avoid using this method if you have multiple engines. Use `use_resource_storage` instead. pub fn use_resources(&mut self, resources: impl IntoIterator) { self.resources = ResourceStorage::from_resources(resources); } - /// Sets this engine's resources to additionally include `resource`. - pub fn add_resource( - &mut self, - resource: Resource, - ) -> Result<(), crate::resources::AddResourceError> { - self.resources.add_resource(resource) + /// Sets this engine's resources storage. + /// Use this method to share resources between multiple engines. + pub fn use_resource_storage(&mut self, resources: ResourceStorageRef) { + self.resources = resources; + } + + #[cfg(test)] + pub fn use_resources_list(&mut self, resources: impl IntoIterator) { + self.use_resource_storage(ResourceStorage::from_resources(resources)); } // Cosmetic filter functionality @@ -241,14 +253,18 @@ impl Engine { self.blocker.set_regex_discard_policy(new_discard_policy); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.blocker.discard_regex(regex_id); } - #[cfg(feature = "regex-debug-info")] - pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { - self.blocker.get_regex_debug_info() + #[cfg(feature = "debug-info")] + pub fn get_debug_info(&self) -> EngineDebugInfo { + EngineDebugInfo { + regex_debug_info: self.blocker.get_regex_debug_info(), + flatbuffer_size: self.filter_data_context.memory.data().len(), + resources_total_length: self.resources.resources_total_length(), + } } /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. diff --git a/src/regex_manager.rs b/src/regex_manager.rs index e54da304..20ff910e 100644 --- a/src/regex_manager.rs +++ b/src/regex_manager.rs @@ -39,7 +39,7 @@ const DEFAULT_DISCARD_UNUSED_TIME: Duration = Duration::from_secs(180); /// Reports [`RegexManager`] metrics that may be useful for creating an optimized /// [`RegexManagerDiscardPolicy`]. -#[cfg(feature = "regex-debug-info")] +#[cfg(feature = "debug-info")] pub struct RegexDebugInfo { /// Information about each regex contained in the [`RegexManager`]. pub regex_data: Vec, @@ -48,7 +48,7 @@ pub struct RegexDebugInfo { } /// Describes metrics about a single regex from the [`RegexManager`]. -#[cfg(feature = "regex-debug-info")] +#[cfg(feature = "debug-info")] pub struct RegexDebugEntry { /// Id for this particular regex, which is constant and unique for its lifetime. /// @@ -312,7 +312,7 @@ impl RegexManager { } /// Discard one regex, identified by its id from a [`RegexDebugEntry`]. - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.map .iter_mut() @@ -322,7 +322,7 @@ impl RegexManager { }); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub(crate) fn get_debug_regex_data(&self) -> Vec { use itertools::Itertools; self.map @@ -336,13 +336,13 @@ impl RegexManager { .collect_vec() } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub(crate) fn get_compiled_regex_count(&self) -> usize { self.compiled_regex_count } /// Collect metrics that may be useful for creating an optimized [`RegexManagerDiscardPolicy`]. - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn get_debug_info(&self) -> RegexDebugInfo { RegexDebugInfo { regex_data: self.get_debug_regex_data(), diff --git a/src/resources/mod.rs b/src/resources/mod.rs index eb7c2321..c8b5c873 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -15,7 +15,9 @@ pub mod resource_assembler; mod resource_storage; pub(crate) use resource_storage::parse_scriptlet_args; #[doc(inline)] -pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError}; +pub use resource_storage::{ + AddResourceError, ResourceStorage, ResourceStorageRef, ScriptletResourceError, +}; use memchr::memrchr as find_char_reverse; use serde::{Deserialize, Serialize}; @@ -34,7 +36,7 @@ use serde::{Deserialize, Serialize}; /// ``` /// # use adblock::Engine; /// # use adblock::lists::ParseOptions; -/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceType}; +/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceStorage, ResourceType}; /// # let mut filter_set = adblock::lists::FilterSet::default(); /// # let untrusted_filters = vec![""]; /// # let trusted_filters = vec![""]; @@ -59,14 +61,15 @@ use serde::{Deserialize, Serialize}; /// let mut engine = Engine::from_filter_set(filter_set, true); /// // The `trusted-set-cookie` scriptlet cannot be injected without `COOKIE_ACCESS` /// // permission. -/// engine.add_resource(Resource { +/// let resource_storage = ResourceStorage::from_resources([Resource { /// name: "trusted-set-cookie.js".to_string(), /// aliases: vec![], /// kind: ResourceType::Mime(MimeType::ApplicationJavascript), /// content: base64::encode("document.cookie = '...';"), /// dependencies: vec![], /// permission: COOKIE_ACCESS, -/// }); +/// }]); +/// engine.use_resource_storage(resource_storage); /// ``` #[derive(Serialize, Deserialize, Clone, Copy, Default)] #[repr(transparent)] diff --git a/src/resources/resource_storage.rs b/src/resources/resource_storage.rs index 39150000..f1362d13 100644 --- a/src/resources/resource_storage.rs +++ b/src/resources/resource_storage.rs @@ -9,6 +9,14 @@ use thiserror::Error; use super::{PermissionMask, Resource, ResourceType}; +/// A ref-counted reference to a [`ResourceStorage`]. +#[cfg(feature = "single-thread")] +pub type ResourceStorageRef = std::rc::Rc; + +/// A ref-counted reference to a [`ResourceStorage`]. +#[cfg(not(feature = "single-thread"))] +pub type ResourceStorageRef = std::sync::Arc; + /// Unified resource storage for both redirects and scriptlets. #[derive(Default)] pub struct ResourceStorage { @@ -114,7 +122,7 @@ fn extract_function_name(fn_def: &str) -> Option<&str> { impl ResourceStorage { /// Convenience constructor that allows building storage for many resources at once. Errors are /// silently consumed. - pub fn from_resources(resources: impl IntoIterator) -> Self { + pub fn from_resources(resources: impl IntoIterator) -> ResourceStorageRef { let mut self_ = Self::default(); resources.into_iter().for_each(|resource| { @@ -125,7 +133,7 @@ impl ResourceStorage { }) }); - self_ + ResourceStorageRef::new(self_) } /// Adds a resource to storage so that it can be retrieved later. @@ -215,6 +223,10 @@ impl ResourceStorage { Ok(()) } + pub fn resources_total_length(&self) -> usize { + self.resources.values().map(|r| r.content.len()).sum() + } + /// Given the contents of a single `+js(...)` filter part, return a scriptlet string /// appropriate for injection in a page. fn get_scriptlet_resource<'a: 'b, 'b>( diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index 674384bc..20e85dc6 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -218,6 +218,14 @@ mod tests { let mut engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); let data = engine.serialize().to_vec(); + #[cfg(feature = "debug-info")] + { + let debug_info = engine.get_debug_info(); + let expected_size = 8_527_344_f32; + assert!(debug_info.flatbuffer_size >= (expected_size * 0.99) as usize); + assert!(debug_info.flatbuffer_size <= (expected_size * 1.01) as usize); + } + let expected_hash: u64 = if cfg!(feature = "css-validation") { 2942520321544562177 } else { @@ -229,6 +237,38 @@ mod tests { engine.deserialize(&data).unwrap(); } + #[test] + fn resource_storage_sharing() { + let resources = ResourceStorage::from_resources([Resource::simple( + "refresh-defuser.js", + MimeType::ApplicationJavascript, + "refresh-defuser", + )]); + let mut engine1 = + Engine::from_rules(["domain1.com##+js(refresh-defuser)"], Default::default()); + let mut engine2 = + Engine::from_rules(["domain2.com##+js(refresh-defuser)"], Default::default()); + engine1.use_resource_storage(ResourceStorageRef::clone(&resources)); + engine2.use_resource_storage(ResourceStorageRef::clone(&resources)); + + fn wrap_try(scriptlet_content: &str) -> String { + format!("try {{\n{}\n}} catch ( e ) {{ }}\n", scriptlet_content) + } + + assert_eq!( + engine1 + .url_cosmetic_resources("https://domain1.com") + .injected_script, + wrap_try("refresh-defuser") + ); + assert_eq!( + engine2 + .url_cosmetic_resources("https://domain2.com") + .injected_script, + wrap_try("refresh-defuser") + ); + } + #[test] fn redirect_resource_insertion_works() { let mut engine = Engine::from_rules( @@ -492,13 +532,11 @@ mod tests { ], Default::default()); let mut engine = Engine::from_filter_set(filter_set, false); - engine - .add_resource(Resource::simple( - "addthis.com/addthis_widget.js", - MimeType::ApplicationJavascript, - "window.addthis = undefined", - )) - .unwrap(); + engine.use_resources_list([Resource::simple( + "addthis.com/addthis_widget.js", + MimeType::ApplicationJavascript, + "window.addthis = undefined", + )]); let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); let result = engine.check_network_request(&request); diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index 54392eab..96be7d58 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -678,7 +678,7 @@ mod match_tests { #[test] #[ignore] // Not going to handle lookaround regexes - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] fn check_lookaround_regex_handled() { { let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index 8ffe1ff4..fdd9d107 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -1,4 +1,4 @@ -#[cfg(all(test, feature = "regex-debug-info"))] +#[cfg(all(test, feature = "debug-info"))] mod tests { use super::super::*;