Skip to content

Commit

Permalink
chore(chrome): fix bytes timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 8, 2024
1 parent cf8e367 commit ee098f2
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 44 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.20.5"
version = "2.20.6"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
10 changes: 2 additions & 8 deletions spider/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1134,14 +1134,8 @@ pub async fn fetch_page_html_chrome_base(
}
}

let res =
tokio::time::timeout(tokio::time::Duration::from_secs(15), page.content_bytes()).await;

let mut res: Box<bytes::Bytes> = match res {
Ok(b) => match b {
Ok(b) => b.into(),
_ => Default::default(),
},
let mut res: Box<bytes::Bytes> = match page.content_bytes().await {
Ok(b) => b.into(),
_ => Default::default(),
};

Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.20.5"
version = "2.20.6"
rust-version = "1.70"
authors = [
"j-mendez <jeff@spider.cloud>"
Expand Down
23 changes: 21 additions & 2 deletions spider_chrome/src/handler/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,16 @@ lazy_static! {
"https://static.parastorage.com/services/tag-manager-client/",
"https://www.datadoghq-browser-agent.com/datadog-rum-slim-v4.js",
"https://cdn.rudderlabs.com",
"https://script.hotjar.com/",
"https://static.hotjar.com/",
"https://cdn.insurads.com/",
"https://cdn-ukwest.onetrust.com",
"https://cdn.onetrust.com",
"https://services.insurads.com/",
"https://platform.iteratehq.com/loader.js",
"https://acdn.adnxs.com/ast/ast.js",
"https://schibsted-cdn.relevant-digital.com/static/tags/",
"https://bat.bing.net",
".sharethis.com",
".newrelic.com",
".googlesyndication.com",
Expand Down Expand Up @@ -179,7 +189,9 @@ lazy_static! {
"ads.js",
"analytics.js",
"otSDKStub.js",
"otBannerSdk.js",
"_vercel/insights/script.js",
"analytics."
];
for pattern in &patterns {
trie.insert(pattern);
Expand Down Expand Up @@ -212,17 +224,24 @@ lazy_static! {
"https://nimbleplot.com",
"https://api.lab.amplitude.com/",
"https://flag.lab.amplitude.com/sdk/v2/flags",
"https://cdn-ukwest.onetrust.com/",
"https://cdn.onetrust.com/",
"https://geolocation.onetrust.com/",
"https://assets.adobedtm.com/",
"https://sdkconfig.pulse.",
"https://bat.bing.net",
".wixapps.net/api/v1/bulklog",
// video embeddings
"https://video.squarespace-cdn.com/content/",
"googlesyndication.com",
".doubleclick.net",
".piano.io/",
".browsiprod.com",
".onetrust.com/consent/",
".onetrust.",
"https://logs.",
"/track.php",
"/api/v1/bulklog"
"/api/v1/bulklog",
"cookieconsentpub"
];
for pattern in &patterns {
trie.insert(pattern);
Expand Down
24 changes: 2 additions & 22 deletions spider_chrome/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1277,17 +1277,7 @@ impl Page {
pub async fn content(&self) -> Result<String> {
Ok(self
.evaluate(
"{
let retVal = '';
if (document.doctype) {
retVal = new XMLSerializer().serializeToString(document.doctype);
}
if (document.documentElement) {
retVal += document.documentElement.outerHTML;
}
retVal
}
",
"{let retVal = ''; if (document.doctype) { return new XMLSerializer().serializeToString(document.doctype); } if (document.documentElement) { retVal += document.documentElement.outerHTML; } retVal }",
)
.await?
.into_value()?)
Expand All @@ -1298,17 +1288,7 @@ impl Page {
pub async fn content_bytes(&self) -> Result<bytes::Bytes> {
Ok(self
.evaluate(
"{
let retVal = '';
if (document.doctype) {
retVal = new XMLSerializer().serializeToString(document.doctype);
}
if (document.documentElement) {
retVal += document.documentElement.outerHTML;
}
retVal
}
",
"{let retVal = ''; if (document.doctype) { retVal = new XMLSerializer().serializeToString(document.doctype); } if (document.documentElement) { retVal += document.documentElement.outerHTML; } retVal }",
)
.await?
.into_value()?)
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.20.5"
version = "2.20.6"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.20.5"
version = "2.20.6"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.20.5"
version = "2.20.6"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.20.5"
version = "2.20.6"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down

0 comments on commit ee098f2

Please sign in to comment.