Skip to content

Commit

Permalink
Avoid unnecessary HTTP requests to the same, already checked links
Browse files Browse the repository at this point in the history
  • Loading branch information
biodranik committed Sep 13, 2023
1 parent 67a5577 commit 908ee39
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions components/site/src/link_checking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ pub fn check_external_links(site: &Site) -> Vec<String> {

let cpu_count = match thread::available_parallelism() {
Ok(count) => count.get(),
Err(_) => 1
Err(_) => 1,
};
// create thread pool with lots of threads so we can fetch
// (almost) all pages simultaneously, limiting all links for a single
Expand All @@ -214,13 +214,26 @@ pub fn check_external_links(site: &Site) -> Vec<String> {
.par_iter()
.map(|(_, links)| {
let mut num_links_left = links.len();
let mut checked_links: HashMap<&str, Option<link_checker::Result>> =
HashMap::new();
links
.iter()
.filter_map(move |link_def| {
num_links_left -= 1;

// Avoid double-checking the same url (e.g. for translated pages).
let external_link = link_def.external_link.as_str();
if checked_links.contains_key(external_link) {
return match &checked_links[external_link] {
Some(res) => {
Some((&link_def.file_path, external_link, res.clone()))
}
None => None,
};
}

let res = link_checker::check_url(
&link_def.external_link,
external_link,
&site.config.link_checker,
);

Expand All @@ -230,9 +243,11 @@ pub fn check_external_links(site: &Site) -> Vec<String> {
}

if link_checker::is_valid(&res) {
checked_links.insert(external_link, None);
None
} else {
Some((&link_def.file_path, &link_def.external_link, res))
checked_links.insert(external_link, Some(res.clone()));
return Some((&link_def.file_path, external_link, res));
}
})
.collect::<Vec<_>>()
Expand Down

0 comments on commit 908ee39

Please sign in to comment.