Skip to content

Commit

Permalink
chore(chrome): add linkd custom blocker
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 6, 2024
1 parent 4afe6db commit 1c6905d
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 28 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.20.2"
version = "2.20.3"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.20.2"
version = "2.20.3"
rust-version = "1.70"
authors = [
"j-mendez <jeff@spider.cloud>"
Expand Down
19 changes: 11 additions & 8 deletions spider_chrome/src/handler/blockers/amazon_blockers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@ lazy_static::lazy_static! {
"https://completion.amazon.com/api/2017/suggestions",
"https://sts.us-east-1.amazonaws.com/",
"https://www.amazon.com/cross_border_interstitial_sp/render",

"https://aax-us-east-retail-direct.amazon.com/e/xsp/getAd",
"https://fls-na.amazon.com/1/batch/1/OE/",
"https://unagi.amazon.com/1/events/",
"https://images-na.ssl-images-amazon.com/images/S/apesafeframe/ape/sf/desktop/",
// ads
"https://m.media-amazon.com/images/G/01/csm/showads",
// we can prob search for rum subs uptop instead.
"https://dataplane.rum",
"https://client.rum",
".amazon-adsystem.com"
".amazon-adsystem.com",
"SearchPartnerAssets",
];
for pattern in &patterns {
trie.insert(pattern);
Expand All @@ -37,12 +39,13 @@ pub fn block_amazon(
let mut block_request = URL_IGNORE_TRIE.contains_prefix(&event.request.url);

if !block_request {
if event.request.url.ends_with("?pageViewLogging=1")
|| event
.request
.url
.starts_with("https://s.amazon-adsystem.com/")
|| event.request.url.contains(".amazon-adsystem.com/")
let u = &event.request.url;

if u.ends_with("?pageViewLogging=1")
|| u.starts_with("https://s.amazon-adsystem.com/")
|| u.ends_with("inner-host.min.js")
|| u.ends_with(".js?xcp")
|| u.contains(".amazon-adsystem.com/")
{
block_request = true;
}
Expand Down
29 changes: 29 additions & 0 deletions spider_chrome/src/handler/blockers/linkedin_blockers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use crate::handler::blockers::Trie;

lazy_static::lazy_static! {
/// Ignore list of urls.
static ref URL_IGNORE_TRIE: Trie = {
let mut trie = Trie::new();
let patterns = [
"/log",
"https://www.linkedin.com/li/track",
"https://li.protechts.net",
"https://www.linkedin.com/platform-telemetry/li",
"https://www.linkedin.com/organization-guest/api/feedUpdates/",
"https://www.linkedin.com/feedcontent-guest/api/ingraphs/gauge",
"https://www.linkedin.com/voyager/api/",
"https://platform.linkedin.com/litms/allowlist/voyager-web-global"
];
for pattern in &patterns {
trie.insert(pattern);
}
trie
};
}

// Block linkedin events that are not required
pub fn block_linkedin(
event: &chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused,
) -> bool {
URL_IGNORE_TRIE.contains_prefix(&event.request.url)
}
16 changes: 9 additions & 7 deletions spider_chrome/src/handler/blockers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
// tiktok blockers
pub mod tiktok_blockers;
// amazon blockers
pub mod amazon_blockers;
// x blockers
pub mod x_blockers;
// netflix blockers
/// adblock patterns
pub mod adblock_patterns;
/// amazon blockers
pub mod amazon_blockers;
/// linkedin blockers
pub mod linkedin_blockers;
/// netflix blockers
pub mod netflix_blockers;
/// tiktok blockers
pub mod tiktok_blockers;
/// x blockers
pub mod x_blockers;

// Trie node for ignore.
#[derive(Default)]
Expand Down
3 changes: 2 additions & 1 deletion spider_chrome/src/handler/blockers/tiktok_blockers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ lazy_static::lazy_static! {
"https://www.tiktok.com/api/inbox/notice_count/",
"https://mcs.tiktokv.us/v1/user/webid",
"https://mon16-normal-useast5.tiktokv.us/monitor_browser/collect/batch/?bid=tiktok_pns_web_runtime",
"https://lf16-tiktok-web.tiktokcdn-us.com/obj/tiktok-web-tx/tiktok_privacy_protection_framework/loader/",
"https://webcast.tiktok.com/webcast/wallet_api/fs/diamond_buy",
"https://lf16-tiktok-web.tiktokcdn-us.com/obj/tiktok-web-tx/tiktok_privacy_protection_framework/loader/",
"https://lf16-tiktok-web.tiktokcdn-us.com/obj/tiktok-web-tx/tiktok/webapp/main/webapp-desktop/npm-async-bric_verify_sec_sdk_build_captcha",
"/tiktok_privacy_protection_framework/loader",
"/obj/tiktok-web-tx/tiktok_privacy_protection_framework/loader",
"/service/2/abtest_config/",
Expand Down
13 changes: 13 additions & 0 deletions spider_chrome/src/handler/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ lazy_static::lazy_static! {
"https://cdn.tinypass.com",
"https://cd.connatix.com",
"https://platform-api.sharethis.com/js/sharethis.js",
"https://js.hsforms.net/forms/embed/v2.js",
".sharethis.com",
".newrelic.com",
".googlesyndication.com",
Expand Down Expand Up @@ -355,6 +356,8 @@ pub enum NetworkInterceptManager {
Amazon,
/// x.com
X,
/// LinkedIn,
LinkedIn,
/// netflix.com
Netflix,
#[default]
Expand All @@ -376,6 +379,10 @@ impl NetworkInterceptManager {
|| url.starts_with("https://netflix.com")
{
NetworkInterceptManager::Netflix
} else if url.starts_with("https://www.linkedin.com")
|| url.starts_with("https://linkedin.com")
{
NetworkInterceptManager::LinkedIn
} else {
NetworkInterceptManager::Unknown
}
Expand Down Expand Up @@ -710,6 +717,9 @@ impl NetworkManager {
NetworkInterceptManager::Netflix => {
super::blockers::netflix_blockers::block_netflix(event)
}
NetworkInterceptManager::LinkedIn => {
super::blockers::linkedin_blockers::block_linkedin(event)
}
_ => skip_networking,
}
} else {
Expand Down Expand Up @@ -815,6 +825,9 @@ impl NetworkManager {
NetworkInterceptManager::Netflix => {
super::blockers::netflix_blockers::block_netflix(event)
}
NetworkInterceptManager::LinkedIn => {
super::blockers::linkedin_blockers::block_linkedin(event)
}
_ => skip_networking,
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.20.2"
version = "2.20.3"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.20.2"
version = "2.20.3"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.20.2"
version = "2.20.3"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.20.2"
version = "2.20.3"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down

0 comments on commit 1c6905d

Please sign in to comment.