diff --git a/Cargo.lock b/Cargo.lock index b03f20e..c5bca56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -517,7 +517,7 @@ dependencies = [ "markup5ever", "nom", "tendril", - "thiserror 2.0.4", + "thiserror 2.0.6", "unicode-width", ] @@ -571,9 +571,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper2" -version = "0.14.66" +version = "0.14.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1f878abe9e3fd7d2a64cf087bd30e1e78c138e292faf0f13225a56845ade93" +checksum = "d9737fcbdab5ba611c942dd67cbe39b944541b6997a9f728474512a86ebd5646" dependencies = [ "bytes", "futures-channel", @@ -584,6 +584,7 @@ dependencies = [ "httparse", "httpdate", "itoa", + "lru", "pin-project-lite", "rh2", "socket2", @@ -781,9 +782,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.167" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libloading" @@ -838,6 +839,12 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" + [[package]] name = "mac" version = "0.1.1" @@ -897,9 +904,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ "adler2", ] @@ -1108,9 +1115,11 @@ dependencies = [ "pyo3", "pyo3-log", "pythonize", + "rand", "rquest", "serde_json", "tokio", + "webpki-root-certs", ] [[package]] @@ -1322,9 +1331,9 @@ dependencies = [ [[package]] name = "rquest" -version = "0.30.5" +version = "0.31.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d2945834d2fcbdfd1dab3943f3c549beafca5b5f941b6a94d341ac1e100b625" +checksum = "3ec45993a20ed6cfdfd5545352ff8e4611f6ced82a26063d21a23730db13d462" dependencies = [ "antidote", "async-compression", @@ -1589,11 +1598,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.4" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490" +checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" dependencies = [ - "thiserror-impl 2.0.4", + "thiserror-impl 2.0.6", ] [[package]] @@ -1609,9 +1618,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.4" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" +checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index dfb8a6f..af6e16b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ pyo3 = { version = "0.23", features = ["extension-module", "abi3-py38", "indexma anyhow = "1" log = "0.4" pyo3-log = "0.12" -rquest = { version = "0.30", features = [ +rquest = { version = "0.31", features = [ "cookies", "multipart", "json", @@ -34,6 +34,8 @@ html2text = "0.13" bytes = "1" pythonize = "0.23" serde_json = "1" +webpki-root-certs = "0.26" +rand = "0.8" [profile.release] codegen-units = 1 diff --git a/README.md b/README.md index 4a7ff85..cbab312 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Provides precompiled wheels:
- [I. Client](#i-client) - [Client methods](#client-methods) - [Response object](#response-object) + - [Devices](#devices) - [Examples](#examples) - [II. AsyncClient](#ii-asyncclient) @@ -143,12 +144,23 @@ resp.text_rich # html is converted to rich text resp.url ``` +#### Devices + +- Chrome: `Chrome100`,`Chrome101`,`Chrome104`,`Chrome105`,`Chrome106`,`Chrome107`,`Chrome108`,`Chrome109`,`Chrome114`,`Chrome116`,`Chrome117`,`Chrome118`,`Chrome119`,`Chrome120`,`Chrome123`,`Chrome124`,`Chrome126`,`Chrome127`,`Chrome128`,`Chrome129`,`Chrome130`,`Chrome131` + +- Edge: `Edge101`,`Edge122`,`Edge127` + +- Safari: `SafariIos17_2`,`SafariIos17_4_1`,`SafariIos16_5`,`Safari15_3`,`Safari15_5`,`Safari15_6_1`,`Safari16`,`Safari16_5`,`Safari17_0`,`Safari17_2_1`,`Safari17_4_1`,`Safari17_5`,`Safari18`,`SafariIPad18` + +- OkHttp: `OkHttp3_9`,`OkHttp3_11`,`OkHttp3_13`,`OkHttp3_14`,`OkHttp4_9`,`OkHttp4_10`,`OkHttp5` + #### Examples ```python import primp -client = primp.Client(impersonate="chrome_131") +# Impersonate +client = primp.Client(impersonate="chrome_131") # chrome_131 # GET request resp = client.get("https://tls.peet.ws/api/all") @@ -196,7 +208,8 @@ export PRIMP_PROXY="socks5://127.0.0.1:1080" resp = primp.Client().get("https://tls.peet.ws/api/all") print(resp.json()) -# Using custom CA certificate store: file or certifi.where() or env var PRIMP_CA_BUNDLE +# Using custom CA certificate store: env var PRIMP_CA_BUNDLE +#(Primp built with the Mozilla's latest trusted root certificates, so maybe it's not necessary) resp = primp.Client(ca_cert_file="/cert/cacert.pem").get("https://tls.peet.ws/api/all") print(resp.json()) resp = primp.Client(ca_cert_file=certifi.where()).get("https://tls.peet.ws/api/all") @@ -215,4 +228,3 @@ print(r.text) ### II. AsyncClient TODO - diff --git a/src/lib.rs b/src/lib.rs index 6bc7011..1b30554 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -use std::fs; use std::str::FromStr; use std::sync::{Arc, LazyLock}; use std::time::Duration; @@ -11,12 +10,13 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::PyBytes; use pythonize::depythonize; -use rquest::boring::x509::{store::X509StoreBuilder, X509}; -use rquest::header::{HeaderMap, HeaderName, HeaderValue, COOKIE}; -use rquest::multipart; -use rquest::redirect::Policy; -use rquest::tls::Impersonate; -use rquest::Method; +use rquest::{ + header::{HeaderMap, HeaderName, HeaderValue, COOKIE}, + multipart, + redirect::Policy, + tls::Impersonate, + Method, +}; use serde_json::Value; use tokio::runtime::{self, Runtime}; @@ -24,6 +24,7 @@ mod response; use response::Response; mod utils; +use utils::load_ca_certs; // Tokio global one-thread runtime static RUNTIME: LazyLock = LazyLock::new(|| { @@ -32,12 +33,6 @@ static RUNTIME: LazyLock = LazyLock::new(|| { .build() .unwrap() }); -static PRIMP_CA_BUNDLE: LazyLock> = LazyLock::new(|| { - std::env::var("PRIMP_CA_BUNDLE") - .or(std::env::var("CA_CERT_FILE")) - .ok() -}); -static PRIMP_PROXY: LazyLock> = LazyLock::new(|| std::env::var("PRIMP_PROXY").ok()); #[pyclass] /// HTTP client that can impersonate web browsers. @@ -112,7 +107,7 @@ impl Client { cookies: Option>, cookie_store: Option, referer: Option, - proxy: Option<&str>, + proxy: Option, timeout: Option, impersonate: Option<&str>, follow_redirects: Option, @@ -160,8 +155,7 @@ impl Client { } // Proxy - let proxy = proxy.or(PRIMP_PROXY.as_deref()); - if let Some(proxy_url) = proxy { + if let Some(proxy_url) = proxy.or_else(|| std::env::var("PRIMP_PROXY").ok()) { let proxy = rquest::Proxy::all(proxy_url)?; client_builder = client_builder.proxy(proxy); } @@ -179,24 +173,16 @@ impl Client { client_builder = client_builder.redirect(Policy::none()); } - // Verify - let verify: bool = verify.unwrap_or(true); - if !verify { - client_builder = client_builder.danger_accept_invalid_certs(true); + // Ca_cert_file. BEFORE!!! verify (fn load_ca_certs() reads env var PRIMP_CA_BUNDLE) + if let Some(ca_bundle_path) = ca_cert_file { + std::env::set_var("PRIMP_CA_BUNDLE", ca_bundle_path); } - // Ca_cert_file - let ca_cert_file = ca_cert_file.or(PRIMP_CA_BUNDLE.clone()); - if let Some(ca_cert_file) = ca_cert_file { - client_builder = client_builder.ca_cert_store(move || { - let mut ca_store = X509StoreBuilder::new()?; - let cert_file = &fs::read(&ca_cert_file).expect("Failed to read ca_cert_file"); - let certs = X509::stack_from_pem(&cert_file)?; - for cert in certs { - ca_store.add_cert(cert)?; - } - Ok(ca_store.build()) - }); + // Verify + if verify.unwrap_or(true) { + client_builder = client_builder.ca_cert_store(load_ca_certs); + } else { + client_builder = client_builder.danger_accept_invalid_certs(true); } // Http version: http1 || http2 @@ -275,12 +261,12 @@ impl Client { "DELETE" => Ok(Method::DELETE), _ => Err(PyValueError::new_err("Unrecognized HTTP method")), }?; - let params = params.or(self.params.clone()); - let cookies = cookies.or(self.cookies.clone()); + let params = params.or_else(|| self.params.clone()); + let cookies = cookies.or_else(|| self.cookies.clone()); let data_value: Option = data.map(|data| depythonize(&data)).transpose()?; let json_value: Option = json.map(|json| depythonize(&json)).transpose()?; - let auth = auth.or(self.auth.clone()); - let auth_bearer = auth_bearer.or(self.auth_bearer.clone()); + let auth = auth.or_else(|| self.auth.clone()); + let auth_bearer = auth_bearer.or_else(|| self.auth_bearer.clone()); if auth.is_some() && auth_bearer.is_some() { return Err(PyValueError::new_err("Cannot provide both auth and auth_bearer").into()); } diff --git a/src/response.rs b/src/response.rs index f3a52c7..0e179f6 100644 --- a/src/response.rs +++ b/src/response.rs @@ -39,7 +39,7 @@ impl Response { return Ok(&self.encoding); } self.encoding = get_encoding_from_headers(&self.headers) - .or(get_encoding_from_content(&self.content.bind(py).as_bytes())) + .or_else(|| get_encoding_from_content(&self.content.bind(py).as_bytes())) .unwrap_or("UTF-8".to_string()); Ok(&self.encoding) } diff --git a/src/utils.rs b/src/utils.rs index 30a98e8..75c3065 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,7 +1,52 @@ use std::cmp::min; +use std::sync::LazyLock; use ahash::RandomState; use indexmap::IndexMap; +use rquest::boring::{ + error::ErrorStack, + x509::{ + store::{X509Store, X509StoreBuilder, X509StoreRef}, + X509, + }, +}; + +/// Loads the CA certificates from venv var PRIMP_CA_BUNDLE or the WebPKI certificate store +pub fn load_ca_certs() -> Option<&'static X509StoreRef> { + static CERT_STORE: LazyLock> = LazyLock::new(|| { + let mut ca_store = X509StoreBuilder::new()?; + if let Some(ca_cert_path) = std::env::var("PRIMP_CA_BUNDLE") + .or(std::env::var("CA_CERT_FILE")) + .ok() + { + // Use CA certificate bundle from env var PRIMP_CA_BUNDLE + let cert_file = &std::fs::read(ca_cert_path) + .expect("Failed to read file from env var PRIMP_CA_BUNDLE"); + let certs = X509::stack_from_pem(&cert_file)?; + for cert in certs { + ca_store.add_cert(cert)?; + } + } else { + // Use WebPKI certificate store (Mozilla's trusted root certificates) + for cert in webpki_root_certs::TLS_SERVER_ROOT_CERTS { + let x509 = X509::from_der(cert)?; + ca_store.add_cert(x509)?; + } + } + Ok(ca_store.build()) + }); + + match CERT_STORE.as_ref() { + Ok(cert_store) => { + log::debug!("Loaded CA certs"); + Some(cert_store) + } + Err(err) => { + log::error!("Failed to load CA certs: {:?}", err); + None + } + } +} /// Get encoding from the "Content-Type" header pub fn get_encoding_from_headers( @@ -54,6 +99,54 @@ pub fn get_encoding_from_content(raw_bytes: &[u8]) -> Option { } } +#[cfg(test)] +mod load_ca_certs_tests { + use super::*; + use std::env; + use std::fs; + use std::path::Path; + + #[test] + fn test_load_ca_certs_with_env_var() { + // Create a temporary file with a CA certificate + let ca_cert_path = Path::new("test_ca_cert.pem"); + let ca_cert = "-----BEGIN CERTIFICATE----- +MIIDdTCCAl2gAwIBAgIVAMIIujU9wQIBADANBgkqhkiG9w0BAQUFADBGMQswCQYD +VQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4g +Q29sbGVjdGlvbjEgMB4GA1UECgwXUG9zdGdyZXMgQ29uc3VsdGF0aW9uczEhMB8G +A1UECwwYUG9zdGdyZXMgQ29uc3VsdGF0aW9uczEhMB8GA1UEAwwYUG9zdGdyZXMg +Q29uc3VsdGF0aW9uczEiMCAGCSqGSIb3DQEJARYTcGVyc29uYWwtZW1haWwuY29t +MIIDdTCCAl2gAwIBAgIVAMIIujU9wQIBADANBgkqhkiG9w0BAQUFADBGMQswCQYD +VQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4g +Q29sbGVjdGlvbjEgMB4GA1UECgwXUG9zdGdyZXMgQ29uc3VsdGF0aW9uczEhMB8G +A1UECwwYUG9zdGdyZXMgQ29uc3VsdGF0aW9uczEhMB8GA1UEAwwYUG9zdGdyZXMg +Q29uc3VsdGF0aW9uczEiMCAGCSqGSIb3DQEJARYTcGVyc29uYWwtZW1haWwuY29t +-----END CERTIFICATE-----"; + fs::write(ca_cert_path, ca_cert).unwrap(); + + // Set the environment variable + env::set_var("PRIMP_CA_BUNDLE", ca_cert_path); + + // Call the function + let result = load_ca_certs(); + + // Check the result + assert!(result.is_some()); + + // Clean up + fs::remove_file(ca_cert_path).unwrap(); + } + + #[test] + fn test_load_ca_certs_without_env_var() { + // Call the function + let result = load_ca_certs(); + + // Check the result + assert!(result.is_some()); + } +} + #[cfg(test)] mod utils_tests { use super::*;