Skip to content

Commit

Permalink
perf(smart): fix js need determination
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Nov 5, 2024
1 parent bc68858 commit 24893e7
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 192 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.11.20"
version = "2.12.1"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down Expand Up @@ -143,7 +143,7 @@ chrome_intercept = ["chrome"]
chrome_headless_new = ["chrome"]
cookies = ["reqwest/cookies"]
cron = ["dep:async_job", "dep:chrono", "dep:cron", "dep:async-trait"]
smart = ["chrome", "dep:rand", "chrome_intercept"]
smart = ["chrome", "dep:rand", "chrome_intercept", "dep:lol_html"]
encoding = []
headers = ["dep:httpdate"]
real_browser = ["dep:statrs", "dep:rand"]
Expand Down
4 changes: 1 addition & 3 deletions spider/src/packages/scraper/element_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ impl<'a> ElementRef<'a> {
create_missing_parent: false,
};
let mut buf = Vec::new();
match serialize(&mut buf, self, opts) {
_ => (),
};
let _ = serialize(&mut buf, self, opts);
// we need to get the initial encoding of the html lang if used.
auto_encoder::auto_encode_bytes(&buf)
}
Expand Down
4 changes: 1 addition & 3 deletions spider/src/packages/scraper/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,7 @@ impl Html {
create_missing_parent: false,
};
let mut buf = Vec::new();
match serialize(&mut buf, self, opts) {
_ => (),
};
let _ = serialize(&mut buf, self, opts);
auto_encoder::auto_encode_bytes(&buf)
}

Expand Down
11 changes: 6 additions & 5 deletions spider/src/packages/scraper/html/tree_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,16 @@ impl TreeSink for Html {

// Detach the given node from its parent.
fn remove_from_parent(&mut self, target: &Self::Handle) {
self.tree.get_mut(*target).unwrap().detach();
if let Some(mut p) = self.tree.get_mut(*target) {
p.detach();
}
}

// Remove all the children from node and append them to new_parent.
fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
self.tree
.get_mut(*new_parent)
.unwrap()
.reparent_from_id_append(*node);
if let Some(mut p) = self.tree.get_mut(*new_parent) {
p.reparent_from_id_append(*node);
}
}

// Add each attribute to the given element, if no attribute with that name already exists. The
Expand Down
361 changes: 193 additions & 168 deletions spider/src/page.rs

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions spider/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,15 @@ pub async fn fetch_page_html_chrome_base(
},
)
.await;

// perform extra navigate to trigger page actions.
if let Some(u) = url_target {
if u.starts_with("http") {
let _ = page
.evaluate(format!(r#"window.location = "{}";"#, u))
.await;
}
}
}
} else {
if let Err(e) = navigate(page, source, &mut chrome_http_req_res).await {
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.11.20"
version = "2.12.1"
rust-version = "1.70"
authors = [
"j-mendez <jeff@spider.cloud>"
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.11.20"
version = "2.12.1"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.11.20"
version = "2.12.1"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.11.20"
version = "2.12.1"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.11.20"
version = "2.12.1"
authors = [
"j-mendez <jeff@spider.cloud>"
]
Expand Down

0 comments on commit 24893e7

Please sign in to comment.