Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ harness = false
default = ["embedded-domain-resolver", "full-regex-handling", "single-thread"]
full-regex-handling = []
single-thread = [] # disables `Send` and `Sync` on `Engine`.
regex-debug-info = []
debug-info = []
css-validation = ["cssparser", "selectors"]
content-blocking = []
embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled.
Expand Down
17 changes: 8 additions & 9 deletions js/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,18 +323,17 @@ fn engine_clear_tags(mut cx: FunctionContext) -> JsResult<JsNull> {
Ok(JsNull::new(&mut cx))
}

fn engine_add_resource(mut cx: FunctionContext) -> JsResult<JsBoolean> {
fn engine_use_resources_list(mut cx: FunctionContext) -> JsResult<JsNull> {
let this = cx.argument::<JsBox<Engine>>(0)?;

let resource_arg = cx.argument::<JsValue>(1)?;
let resource: Resource = json_ffi::from_js(&mut cx, resource_arg)?;

let success = if let Ok(mut engine) = this.0.lock() {
engine.add_resource(resource).is_ok()
let resources_arg = cx.argument::<JsValue>(1)?;
let resources: Vec<Resource> = json_ffi::from_js(&mut cx, resources_arg)?;
if let Ok(mut engine) = this.0.lock() {
let resource_storage = adblock::resources::ResourceStorage::from_resources(resources);
engine.use_resource_storage(resource_storage)
} else {
cx.throw_error("Failed to acquire lock on engine")?
};
Ok(cx.boolean(success))
Ok(JsNull::new(&mut cx))
}

fn validate_request(mut cx: FunctionContext) -> JsResult<JsBoolean> {
Expand Down Expand Up @@ -424,7 +423,7 @@ register_module!(mut m, {
m.export_function("Engine_useResources", engine_use_resources)?;
m.export_function("Engine_tagExists", engine_tag_exists)?;
m.export_function("Engine_clearTags", engine_clear_tags)?;
m.export_function("Engine_addResource", engine_add_resource)?;
m.export_function("Engine_useResourcesList", engine_use_resources_list)?;

m.export_function("validateRequest", validate_request)?;
m.export_function("uBlockResources", ublock_resources)?;
Expand Down
4 changes: 2 additions & 2 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,13 +487,13 @@ impl Blocker {
regex_manager.set_discard_policy(new_discard_policy);
}

#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub fn discard_regex(&self, regex_id: u64) {
let mut regex_manager = self.borrow_regex_manager();
regex_manager.discard_regex(regex_id);
}

#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo {
let regex_manager = self.borrow_regex_manager();
regex_manager.get_debug_info()
Expand Down
42 changes: 29 additions & 13 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
use crate::lists::{FilterSet, ParseOptions};
use crate::regex_manager::RegexManagerDiscardPolicy;
use crate::request::Request;
use crate::resources::{Resource, ResourceStorage};
use crate::resources::{Resource, ResourceStorage, ResourceStorageRef};

use std::collections::HashSet;

Expand Down Expand Up @@ -54,10 +54,18 @@ use std::collections::HashSet;
pub struct Engine {
blocker: Blocker,
cosmetic_cache: CosmeticFilterCache,
resources: ResourceStorage,
resources: ResourceStorageRef,
filter_data_context: FilterDataContextRef,
}

#[cfg(feature = "debug-info")]
pub struct EngineDebugInfo {
pub regex_debug_info: crate::regex_manager::RegexDebugInfo,

pub flatbuffer_size: usize,
pub resources_total_length: usize,
}

impl Default for Engine {
fn default() -> Self {
Self::from_filter_set(FilterSet::new(false), false)
Expand Down Expand Up @@ -122,7 +130,7 @@ impl Engine {
cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone(
&filter_data_context,
)),
resources: ResourceStorage::default(),
resources: ResourceStorage::from_resources(vec![]),
filter_data_context,
}
}
Expand Down Expand Up @@ -188,16 +196,20 @@ impl Engine {
}

/// Sets this engine's resources to be _only_ the ones provided in `resources`.
/// Avoid using this method if you have multiple engines. Use `use_resource_storage` instead.
pub fn use_resources(&mut self, resources: impl IntoIterator<Item = Resource>) {
self.resources = ResourceStorage::from_resources(resources);
}

/// Sets this engine's resources to additionally include `resource`.
pub fn add_resource(
&mut self,
resource: Resource,
) -> Result<(), crate::resources::AddResourceError> {
self.resources.add_resource(resource)
/// Sets this engine's resources storage.
/// Use this method to share resources between multiple engines.
pub fn use_resource_storage(&mut self, resources: ResourceStorageRef) {
self.resources = resources;
}

#[cfg(test)]
pub fn use_resources_list(&mut self, resources: impl IntoIterator<Item = Resource>) {
self.use_resource_storage(ResourceStorage::from_resources(resources));
}

// Cosmetic filter functionality
Expand Down Expand Up @@ -241,14 +253,18 @@ impl Engine {
self.blocker.set_regex_discard_policy(new_discard_policy);
}

#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub fn discard_regex(&mut self, regex_id: u64) {
self.blocker.discard_regex(regex_id);
}

#[cfg(feature = "regex-debug-info")]
pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo {
self.blocker.get_regex_debug_info()
#[cfg(feature = "debug-info")]
pub fn get_debug_info(&self) -> EngineDebugInfo {
EngineDebugInfo {
regex_debug_info: self.blocker.get_regex_debug_info(),
flatbuffer_size: self.filter_data_context.memory.data().len(),
resources_total_length: self.resources.resources_total_length(),
}
}

/// Serializes the `Engine` into a binary format so that it can be quickly reloaded later.
Expand Down
12 changes: 6 additions & 6 deletions src/regex_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const DEFAULT_DISCARD_UNUSED_TIME: Duration = Duration::from_secs(180);

/// Reports [`RegexManager`] metrics that may be useful for creating an optimized
/// [`RegexManagerDiscardPolicy`].
#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub struct RegexDebugInfo {
/// Information about each regex contained in the [`RegexManager`].
pub regex_data: Vec<RegexDebugEntry>,
Expand All @@ -48,7 +48,7 @@ pub struct RegexDebugInfo {
}

/// Describes metrics about a single regex from the [`RegexManager`].
#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub struct RegexDebugEntry {
/// Id for this particular regex, which is constant and unique for its lifetime.
///
Expand Down Expand Up @@ -312,7 +312,7 @@ impl RegexManager {
}

/// Discard one regex, identified by its id from a [`RegexDebugEntry`].
#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub fn discard_regex(&mut self, regex_id: u64) {
self.map
.iter_mut()
Expand All @@ -322,7 +322,7 @@ impl RegexManager {
});
}

#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub(crate) fn get_debug_regex_data(&self) -> Vec<RegexDebugEntry> {
use itertools::Itertools;
self.map
Expand All @@ -336,13 +336,13 @@ impl RegexManager {
.collect_vec()
}

#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub(crate) fn get_compiled_regex_count(&self) -> usize {
self.compiled_regex_count
}

/// Collect metrics that may be useful for creating an optimized [`RegexManagerDiscardPolicy`].
#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
pub fn get_debug_info(&self) -> RegexDebugInfo {
RegexDebugInfo {
regex_data: self.get_debug_regex_data(),
Expand Down
11 changes: 7 additions & 4 deletions src/resources/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ pub mod resource_assembler;
mod resource_storage;
pub(crate) use resource_storage::parse_scriptlet_args;
#[doc(inline)]
pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError};
pub use resource_storage::{
AddResourceError, ResourceStorage, ResourceStorageRef, ScriptletResourceError,
};

use memchr::memrchr as find_char_reverse;
use serde::{Deserialize, Serialize};
Expand All @@ -34,7 +36,7 @@ use serde::{Deserialize, Serialize};
/// ```
/// # use adblock::Engine;
/// # use adblock::lists::ParseOptions;
/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceType};
/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceStorage, ResourceType};
/// # let mut filter_set = adblock::lists::FilterSet::default();
/// # let untrusted_filters = vec![""];
/// # let trusted_filters = vec![""];
Expand All @@ -59,14 +61,15 @@ use serde::{Deserialize, Serialize};
/// let mut engine = Engine::from_filter_set(filter_set, true);
/// // The `trusted-set-cookie` scriptlet cannot be injected without `COOKIE_ACCESS`
/// // permission.
/// engine.add_resource(Resource {
/// let resource_storage = ResourceStorage::from_resources([Resource {
/// name: "trusted-set-cookie.js".to_string(),
/// aliases: vec![],
/// kind: ResourceType::Mime(MimeType::ApplicationJavascript),
/// content: base64::encode("document.cookie = '...';"),
/// dependencies: vec![],
/// permission: COOKIE_ACCESS,
/// });
/// }]);
/// engine.use_resource_storage(resource_storage);
/// ```
#[derive(Serialize, Deserialize, Clone, Copy, Default)]
#[repr(transparent)]
Expand Down
16 changes: 14 additions & 2 deletions src/resources/resource_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ use thiserror::Error;

use super::{PermissionMask, Resource, ResourceType};

/// A ref-counted reference to a [`ResourceStorage`].
#[cfg(feature = "single-thread")]
pub type ResourceStorageRef = std::rc::Rc<ResourceStorage>;

/// A ref-counted reference to a [`ResourceStorage`].
#[cfg(not(feature = "single-thread"))]
pub type ResourceStorageRef = std::sync::Arc<ResourceStorage>;
Comment on lines +12 to +18
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's implement this using traits instead; it will give us more flexibility to e.g. use an on-disk cache in brave-core and have better devtools diagnostics in the future.

More or less like this:

/// Customizable backend for [Resource] storage.
/// Custom implementations could be used to enable (for example) sharing of resources between multiple [Engine]s, an on-disk backend, or special caching behavior.
pub trait ResourceStorageBackend {
    /// Retrieves the resource with the associated name or alias
    fn get_resource(&self, resource_ident: &str) -> Option<Resource>;
}

/// Default implementation of `ResourceStorageBackend` that stores all resources in memory.
pub struct InMemoryResourceStorageBackend {
    /// Stores each resource by its canonical name
    resources: HashMap<String, Resource>,
    /// Stores mappings from aliases to their canonical resource names
    aliases: HashMap<String, String>,
}

impl ResourceStorageBackend for InMemoryResourceStorageBackend {
    fn get_resource(&self, resource_ident: &str) -> Option<Resource> {
        // ...the current `get_internal_resource` method implementation (note `&Resource` -> `Resource`)
    }
}

impl InMemoryResourceStorageBackend {
    pub fn from_resources(resources: impl IntoIterator<Item = Resource>) -> Self {
        // ...the current `ResourceStorage::from_resources` method
    }
}

// ...

/// Unified resource storage for both redirects and scriptlets.
pub struct ResourceStorage {
   backend: Box<ResourceStorageBackend>,
}

impl ResourceStorage {
    pub fn use_backend<T>(&mut self, backend: T) where T: ResourceStorageBackend + 'static {
        self.backend = Box::new(backend);
    }
    // ...replace all `self.get_internal_resource` calls with `backend.get_resource`
}

Dropping support for individual add_resource calls is ok with me. Engine can keep the existing use_resources method to create an InMemoryResourceStorageBackend with the provided resources, as well as a new method use_resources_backend.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The particular task I'm trying to solve it sharing the impl between the engines.

It's sounds like you suggest changing the interfaces in another place.
Right now we have only one ResourceStorage impl and no plans to support another. I have no idea what to put into InMemoryResourceStorageBackend. Can you elaborate on this?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

InMemoryResourceStorageBackend is identical to the current implementation. What I propose with the ResourceStorageBackend trait is that we can use it to define a new brave-core specific implementation that supports sharing between engines and (eventually) on-disk caching. We can put the implementation of that in brave-core's adblock binding layer since we will need tighter integration with Chromium's FS operations; it doesn't need to live in adblock-rust itself.

/// To be wrapped in [Rc] for shared access across engines
struct BraveCoreResourceStorageInner {
    /// Stores each resource by its canonical name
    resources: HashMap<String, Resource>,
    /// Stores mappings from aliases to their canonical resource names
    aliases: HashMap<String, String>,
}

#[derive(Clone)]
struct BraveCoreResourceStorage {
    shared_storage: Rc<BraveCoreResourceStorageInner>,
}

impl ResourceStorageBackend for BraveCoreResourceStorage {
    fn get_resource(&self, resource_ident: &str) -> Option<Resource> {
        // ...use `self.shared_storage.resources` and `self.shared_storage.aliases`
    }
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current conception assumes that the browser should just make ResourceStorage and pass it to the both engines.

trait is that we can use it to define a new brave-core specific implementation that supports sharing between engines and (eventually) on-disk caching.

I don't have such plans right now. Also, on-disk representation takes more memory because of base64: an extra work is needed to process it or change the backend code to storage. If you have a conception how to deal with it, it would be nice to see a PR.
I would say we first need to focus on 20MB+ flatbuffer caching rather that relatively small resources.

In conclusion, I don't want to make multiple get_resource.

The only thing I'm generalizing here is getting the current ResourceStorage impl.
If you really want it, we can do it via traits: ResourceStorageGetter with two impls SimpleResourceStorage and SharableResourceStorageGetter.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, I made a dedicated PR with the part with extra debug info: #527
At least we can merge this part and continue discussion about this.
I've also added the test to show how the resources can be shared from the b-c perspective.


/// Unified resource storage for both redirects and scriptlets.
#[derive(Default)]
pub struct ResourceStorage {
Expand Down Expand Up @@ -114,7 +122,7 @@ fn extract_function_name(fn_def: &str) -> Option<&str> {
impl ResourceStorage {
/// Convenience constructor that allows building storage for many resources at once. Errors are
/// silently consumed.
pub fn from_resources(resources: impl IntoIterator<Item = Resource>) -> Self {
pub fn from_resources(resources: impl IntoIterator<Item = Resource>) -> ResourceStorageRef {
let mut self_ = Self::default();

resources.into_iter().for_each(|resource| {
Expand All @@ -125,7 +133,7 @@ impl ResourceStorage {
})
});

self_
ResourceStorageRef::new(self_)
}

/// Adds a resource to storage so that it can be retrieved later.
Expand Down Expand Up @@ -215,6 +223,10 @@ impl ResourceStorage {
Ok(())
}

pub fn resources_total_length(&self) -> usize {
self.resources.values().map(|r| r.content.len()).sum()
}

/// Given the contents of a single `+js(...)` filter part, return a scriptlet string
/// appropriate for injection in a page.
fn get_scriptlet_resource<'a: 'b, 'b>(
Expand Down
52 changes: 45 additions & 7 deletions tests/unit/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,14 @@ mod tests {
let mut engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
let data = engine.serialize().to_vec();

#[cfg(feature = "debug-info")]
{
let debug_info = engine.get_debug_info();
let expected_size = 8_527_344_f32;
assert!(debug_info.flatbuffer_size >= (expected_size * 0.99) as usize);
assert!(debug_info.flatbuffer_size <= (expected_size * 1.01) as usize);
}

let expected_hash: u64 = if cfg!(feature = "css-validation") {
2942520321544562177
} else {
Expand All @@ -229,6 +237,38 @@ mod tests {
engine.deserialize(&data).unwrap();
}

#[test]
fn resource_storage_sharing() {
let resources = ResourceStorage::from_resources([Resource::simple(
"refresh-defuser.js",
MimeType::ApplicationJavascript,
"refresh-defuser",
)]);
let mut engine1 =
Engine::from_rules(["domain1.com##+js(refresh-defuser)"], Default::default());
let mut engine2 =
Engine::from_rules(["domain2.com##+js(refresh-defuser)"], Default::default());
engine1.use_resource_storage(ResourceStorageRef::clone(&resources));
engine2.use_resource_storage(ResourceStorageRef::clone(&resources));

fn wrap_try(scriptlet_content: &str) -> String {
format!("try {{\n{}\n}} catch ( e ) {{ }}\n", scriptlet_content)
}

assert_eq!(
engine1
.url_cosmetic_resources("https://domain1.com")
.injected_script,
wrap_try("refresh-defuser")
);
assert_eq!(
engine2
.url_cosmetic_resources("https://domain2.com")
.injected_script,
wrap_try("refresh-defuser")
);
}

#[test]
fn redirect_resource_insertion_works() {
let mut engine = Engine::from_rules(
Expand Down Expand Up @@ -492,13 +532,11 @@ mod tests {
], Default::default());
let mut engine = Engine::from_filter_set(filter_set, false);

engine
.add_resource(Resource::simple(
"addthis.com/addthis_widget.js",
MimeType::ApplicationJavascript,
"window.addthis = undefined",
))
.unwrap();
engine.use_resources_list([Resource::simple(
"addthis.com/addthis_widget.js",
MimeType::ApplicationJavascript,
"window.addthis = undefined",
)]);

let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap();
let result = engine.check_network_request(&request);
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/filters/network_matchers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ mod match_tests {

#[test]
#[ignore] // Not going to handle lookaround regexes
#[cfg(feature = "regex-debug-info")]
#[cfg(feature = "debug-info")]
fn check_lookaround_regex_handled() {
{
let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#;
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/regex_manager.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[cfg(all(test, feature = "regex-debug-info"))]
#[cfg(all(test, feature = "debug-info"))]
mod tests {
use super::super::*;

Expand Down
Loading