-
Notifications
You must be signed in to change notification settings - Fork 99
/
cache.rs
74 lines (60 loc) · 2.36 KB
/
cache.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//! `cargo run --example cache --features="spider/sync spider/cache"`
extern crate spider;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Duration;
use crate::spider::http_cache_reqwest::CacheManager;
use crate::spider::tokio::io::AsyncWriteExt;
use spider::string_concat::{string_concat, string_concat_impl};
use spider::tokio;
use spider::website::Website;
static GLOBAL_URL_COUNT: AtomicUsize = AtomicUsize::new(0);
#[tokio::main]
async fn main() {
let mut website: Website = Website::new("https://rsseau.fr")
.with_caching(true)
.build()
.unwrap();
let mut rx2: tokio::sync::broadcast::Receiver<spider::page::Page> =
website.subscribe(16).unwrap();
let start = std::time::Instant::now();
let mut website1 = website.clone();
let mut website2 = website.clone();
let subscription = async move {
while let Ok(res) = rx2.recv().await {
let mut stdout = tokio::io::stdout();
let cache_url = string_concat!("GET:", res.get_url());
tokio::task::spawn(async move {
let result = tokio::time::timeout(Duration::from_millis(60), async {
spider::website::CACACHE_MANAGER.get(&cache_url).await
})
.await;
match result {
Ok(Ok(Some(_cache))) => {
let message = format!("HIT - {:?}\n", cache_url);
let _ = stdout.write_all(message.as_bytes()).await;
}
Ok(Ok(None)) | Ok(Err(_)) => {
let message = format!("MISS - {:?}\n", cache_url);
let _ = stdout.write_all(message.as_bytes()).await;
}
Err(_) => {
let message = format!("ERROR - {:?}\n", cache_url);
let _ = stdout.write_all(message.as_bytes()).await;
}
};
GLOBAL_URL_COUNT.fetch_add(1, Ordering::Relaxed);
});
}
};
tokio::pin!(subscription);
tokio::select! {
_ = website1.crawl() => (),
_ = website2.crawl() => (),
_ = subscription => (),
};
let duration = start.elapsed();
println!(
"Time elapsed in website.crawl() is: {:?} for total pages: {:?}",
duration, GLOBAL_URL_COUNT
)
}