Skip to content

Commit

Permalink
chore(chrome): add block list items
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 8, 2024
1 parent cb9f547 commit cf8e367
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 55 deletions.
91 changes: 45 additions & 46 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.20.4"
version = "2.20.5"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.20.4"
version = "2.20.5"
rust-version = "1.70"
authors = [
"j-mendez <jeff@spider.cloud>"
Expand Down
54 changes: 54 additions & 0 deletions spider_chrome/src/handler/blockers/glassdoor_blockers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use crate::handler::blockers::Trie;

lazy_static::lazy_static! {
/// Ignore list of urls.
static ref URL_IGNORE_TRIE: Trie = {
let mut trie = Trie::new();
let patterns = [
"https://www.glassdoor.com/garnish/static/js/gd-sw-register.",
"https://cdnjs.cloudflare.com/ajax/libs/prop-types/15.7.2/prop-types.min.js",
"https://www.glassdoor.com/autocomplete/location?",
];
for pattern in &patterns {
trie.insert(pattern);
}
trie
};

/// Ignore list of urls styles.
static ref URL_IGNORE_TRIE_STYLES: Trie = {
let mut trie = Trie::new();
let patterns = [
"https://www.glassdoor.com/sam-global-nav/static/",
"https://www.glassdoor.com/garnish/static/js/gd-",
"https://unpkg.com/@dotlottie/player-component@",
"https://www.glassdoor.com/job-search-next/assets/_next/static/",
"https://www.glassdoor.com/ei-overview-next/assets/_next/static/",
"https://www.glassdoor.com/occ-salaries-web/assets/_next/static/"
];
for pattern in &patterns {
trie.insert(pattern);
}
trie
};
}

// Block glassdoor events that are not required
pub fn block_glassdoor_styles(
event: &chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused,
) -> bool {
URL_IGNORE_TRIE_STYLES.contains_prefix(&event.request.url)
}

// Block glassdoor events that are not required
pub fn block_glassdoor(
event: &chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused,
ignore_visuals: bool,
) -> bool {
let blocked = URL_IGNORE_TRIE.contains_prefix(&event.request.url);
if !blocked && ignore_visuals {
block_glassdoor_styles(event)
} else {
blocked
}
}
2 changes: 2 additions & 0 deletions spider_chrome/src/handler/blockers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
pub mod adblock_patterns;
/// amazon blockers
pub mod amazon_blockers;
/// glassdoor blockers
pub mod glassdoor_blockers;
/// linkedin blockers
pub mod linkedin_blockers;
/// netflix blockers
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/src/handler/blockers/upwork_blockers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ lazy_static::lazy_static! {
trie
};

/// Ignore list of urls.
/// Ignore list of urls styles.
static ref URL_IGNORE_TRIE_STYLES: Trie = {
let mut trie = Trie::new();
let patterns = [
Expand Down
Loading

0 comments on commit cf8e367

Please sign in to comment.