diff --git a/Cargo.lock b/Cargo.lock index dbe2fb45a..96ce16411 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,9 +131,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" [[package]] name = "async-compression" @@ -1063,9 +1063,9 @@ checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "encoding_rs" -version = "0.8.33" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] @@ -1159,9 +1159,9 @@ dependencies = [ [[package]] name = "fast_html5ever" -version = "0.26.1" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "656eabd1dff4108381e1c4a0cb5de7459f83ea29a2af7e7120254c85a3e6eac2" +checksum = "75f48c7fbaf50f16347bd85e9e43fdf1c7ee15a54bf294af4f1ed92d0a5e8b6b" dependencies = [ "fast_markup5ever", "log", @@ -1173,9 +1173,9 @@ dependencies = [ [[package]] name = "fast_markup5ever" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6bad4b797ffa2976342fc044beb1db7cb44c401caac2187f36c130b08d86fb1" +checksum = "f899f3b7e9bf005393dd90318d7e721e0ecd230073b675a6737ed502ecff557c" dependencies = [ "log", "phf 0.11.2", @@ -2639,9 +2639,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -3377,7 +3377,7 @@ dependencies = [ [[package]] name = "spider" -version = "1.89.18" +version = "1.90.0" dependencies = [ "ahash", "async-openai", @@ -3427,7 +3427,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "1.89.18" +version = "1.90.0" dependencies = [ "clap", "env_logger", @@ -3450,7 +3450,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "1.89.18" +version = "1.90.0" dependencies = [ "env_logger", "lazy_static", @@ -3733,9 +3733,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.34" +version = "0.3.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +checksum = "ef89ece63debf11bc32d1ed8d078ac870cbeb44da02afb02a9ff135ae7ca0582" dependencies = [ "deranged", "itoa 1.0.11", @@ -3754,9 +3754,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" dependencies = [ "num-conv", "time-core", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 94142d6ca..0aa12afb4 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "1.89.18" +version = "1.90.0" authors = [ "madeindjs ", "j-mendez ", @@ -40,7 +40,7 @@ ua_generator = { version = "0.3.6", optional = true } string_concat = "0.0.1" lazy_static = "1.4.0" compact_str = "0.7.1" -fast_html5ever = "0.26.1" +fast_html5ever = "0.26.4" selectors = "0.25.0" tendril = "0.4.3" ahash = "0.8.11" diff --git a/spider/README.md b/spider/README.md index 2969e5941..6aebc857c 100644 --- a/spider/README.md +++ b/spider/README.md @@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom ```toml [dependencies] -spider = "1.89.18" +spider = "1.90.0" ``` And then the code: @@ -93,7 +93,7 @@ We have the following optional feature flags. ```toml [dependencies] -spider = { version = "1.89.18", features = ["regex", "ua_generator"] } +spider = { version = "1.90.0", features = ["regex", "ua_generator"] } ``` 1. `ua_generator`: Enables auto generating a random real User-Agent. @@ -137,7 +137,7 @@ Move processing to a worker, drastically increases performance even if worker is ```toml [dependencies] -spider = { version = "1.89.18", features = ["decentralized"] } +spider = { version = "1.90.0", features = ["decentralized"] } ``` ```sh @@ -168,7 +168,7 @@ Use the subscribe method to get a broadcast channel. ```toml [dependencies] -spider = { version = "1.89.18", features = ["sync"] } +spider = { version = "1.90.0", features = ["sync"] } ``` ```rust,no_run @@ -198,7 +198,7 @@ Allow regex for blacklisting routes ```toml [dependencies] -spider = { version = "1.89.18", features = ["regex"] } +spider = { version = "1.90.0", features = ["regex"] } ``` ```rust,no_run @@ -225,7 +225,7 @@ If you are performing large workloads you may need to control the crawler by ena ```toml [dependencies] -spider = { version = "1.89.18", features = ["control"] } +spider = { version = "1.90.0", features = ["control"] } ``` ```rust @@ -295,7 +295,7 @@ Use cron jobs to run crawls continuously at anytime. ```toml [dependencies] -spider = { version = "1.89.18", features = ["sync", "cron"] } +spider = { version = "1.90.0", features = ["sync", "cron"] } ``` ```rust,no_run @@ -334,7 +334,7 @@ the feature flag [`chrome_intercept`] to possibly speed up request using Network ```toml [dependencies] -spider = { version = "1.89.18", features = ["chrome", "chrome_intercept"] } +spider = { version = "1.90.0", features = ["chrome", "chrome_intercept"] } ``` You can use `website.crawl_concurrent_raw` to perform a crawl without chromium when needed. Use the feature flag `chrome_headed` to enable headful browser usage if needed to debug. @@ -364,7 +364,7 @@ Enabling HTTP cache can be done with the feature flag [`cache`] or [`cache_mem`] ```toml [dependencies] -spider = { version = "1.89.18", features = ["cache"] } +spider = { version = "1.90.0", features = ["cache"] } ``` You need to set `website.cache` to true to enable as well. @@ -395,7 +395,7 @@ Intelligently run crawls using HTTP and JavaScript Rendering when needed. The be ```toml [dependencies] -spider = { version = "1.89.18", features = ["smart"] } +spider = { version = "1.90.0", features = ["smart"] } ``` ```rust,no_run @@ -421,7 +421,7 @@ Use OpenAI to generate dynamic scripts to drive the browser done with the featur ```toml [dependencies] -spider = { version = "1.89.18", features = ["openai"] } +spider = { version = "1.90.0", features = ["openai"] } ``` ```rust @@ -447,7 +447,7 @@ Set a depth limit to prevent forwarding. ```toml [dependencies] -spider = { version = "1.89.18", features = ["budget"] } +spider = { version = "1.90.0", features = ["budget"] } ``` ```rust,no_run diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index 6b89091cb..3117f2456 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "1.89.18" +version = "1.90.0" authors = [ "madeindjs ", "j-mendez ", @@ -29,7 +29,7 @@ quote = "1.0.18" failure_derive = "0.1.8" [dependencies.spider] -version = "1.89.18" +version = "1.90.0" path = "../spider" [[bin]] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 9be1a3a8a..3b79dea4d 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "1.89.18" +version = "1.90.0" authors = [ "madeindjs ", "j-mendez ", @@ -25,7 +25,7 @@ lazy_static = "1.4.0" env_logger = "0.11.3" [dependencies.spider] -version = "1.89.18" +version = "1.90.0" path = "../spider" features = ["serde", "flexbuffers"]