From b55ed8f448b0dfca743696e502e9759de5656d53 Mon Sep 17 00:00:00 2001 From: Martin Algesten Date: Fri, 12 Jul 2024 19:14:24 +0200 Subject: [PATCH] gzip and brotli support --- Cargo.lock | 62 +++++++++++++++++++++++ Cargo.toml | 10 +++- src/agent.rs | 26 +++++++++- src/body.rs | 139 +++++++++++++++++++++++++++++++++++++++++++++------ src/lib.rs | 4 +- src/util.rs | 1 + 6 files changed, 219 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 89a4165c..4ccf2d1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "1.1.3" @@ -11,6 +17,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "base64" version = "0.22.1" @@ -29,6 +50,16 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "brotli-decompressor" +version = "4.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -97,6 +128,15 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "der" version = "0.7.9" @@ -151,6 +191,16 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -301,6 +351,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + [[package]] name = "native-tls" version = "0.2.12" @@ -759,10 +818,12 @@ name = "ureq" version = "3.0.0-beta1" dependencies = [ "base64", + "brotli-decompressor", "cc", "cookie_store", "der", "env_logger", + "flate2", "hoot", "http", "log", @@ -772,6 +833,7 @@ dependencies = [ "rustls-native-certs", "rustls-pemfile", "rustls-pki-types", + "smallvec", "socks", "thiserror", "url", diff --git a/Cargo.toml b/Cargo.toml index 23144f64..3a5f30e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,15 +19,17 @@ exclude = ["/cargo_deny.sh", "/deny.toml", "/test.sh"] rust-version = "1.67" [package.metadata.docs.rs] -features = ["rustls", "native-roots", "native-tls", "socks-proxy", "cookies"] +features = ["rustls", "native-roots", "native-tls", "socks-proxy", "cookies", "gzip", "brotli"] [features] -default = ["rustls", "native-roots", "native-tls", "socks-proxy", "cookies"] +default = ["rustls", "native-roots", "native-tls", "socks-proxy", "cookies", "gzip", "brotli"] rustls = ["dep:rustls", "_tls"] native-tls = ["dep:native-tls", "dep:der", "_tls"] native-roots = ["dep:rustls-native-certs"] socks-proxy = ["dep:socks"] cookies = ["dep:cookie_store", "_url"] +gzip = ["dep:flate2"] +brotli = ["dep:brotli-decompressor"] # Underscore prefixed features are internal _url = ["dep:url"] @@ -40,6 +42,7 @@ http = "1.1.0" log = "0.4.22" thiserror = "1.0.61" once_cell = "1.19.0" +smallvec = "1.13.2" # These are used regardless of TLS implementation. rustls-pemfile = { version = "2.1.2", optional = true, default-features = false, features = ["std"] } @@ -58,6 +61,9 @@ socks = { version = "0.3.4", optional = true } cookie_store = { version = "0.21.0", optional = true, default-features = false, features = ["preserve_order"] } url = { version = "2.3.1", optional = true, default-features = false } +flate2 = { version = "1.0.30", optional = true } +brotli-decompressor = { version = "4.0.1", optional = true } + [build-dependencies] cc = "1.0.106" diff --git a/src/agent.rs b/src/agent.rs index d039a410..afca3dbf 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use hoot::client::flow::RedirectAuthHeaders; use http::{Method, Request, Response, Uri}; -use crate::body::Body; +use crate::body::{Body, ResponseInfo}; use crate::pool::{Connection, ConnectionPool}; use crate::proxy::Proxy; use crate::resolver::{DefaultResolver, Resolver}; @@ -285,6 +285,27 @@ impl Agent { unit.handle_input(current_time(), input, &mut [])?; } + #[cfg(any(feature = "gzip", feature = "brotli"))] + { + use once_cell::sync::Lazy; + static ACCEPTS: Lazy = Lazy::new(|| { + let mut value = String::with_capacity(10); + #[cfg(feature = "gzip")] + value.push_str("gzip"); + #[cfg(all(feature = "gzip", feature = "brotli"))] + value.push_str(", "); + #[cfg(feature = "brotli")] + value.push_str("br"); + value + }); + let input = Input::Header { + name: http::HeaderName::from_static("accept-encoding"), + // unwrap is ok because above ACCEPTS will produce a valid value + value: http::HeaderValue::from_str(&*ACCEPTS).unwrap(), + }; + unit.handle_input(current_time(), input, &mut [])?; + } + unit.handle_input(current_time(), Input::Prepared, &mut [])?; } @@ -371,7 +392,8 @@ impl Agent { let unit = unit.release_body(); let (parts, _) = response.into_parts(); - let recv_body = Body::new(unit, connection, current_time); + let info = ResponseInfo::new(&parts.headers); + let recv_body = Body::new(unit, connection, info, current_time); let response = Response::from_parts(parts, recv_body); info!("{}", response.status()); diff --git a/src/body.rs b/src/body.rs index 5dd20e03..7cfd7064 100644 --- a/src/body.rs +++ b/src/body.rs @@ -9,28 +9,58 @@ use crate::Error; pub struct Body { unit: Unit<()>, connection: Option, + info: ResponseInfo, current_time: Box Instant + Send + Sync>, } +#[derive(Clone, Copy)] +pub(crate) struct ResponseInfo { + content_encoding: ContentEncoding, +} + +#[derive(Clone, Copy)] +enum ContentEncoding { + None, + Gzip, + Brotli, + Unknown, +} + +impl ResponseInfo { + pub fn new(headers: &http::HeaderMap) -> Self { + let content_encoding = headers + .get("content-encoding") + .and_then(|v| v.to_str().ok()) + .map(ContentEncoding::from) + .unwrap_or(ContentEncoding::None); + + ResponseInfo { content_encoding } + } +} + impl Body { pub(crate) fn new( unit: Unit<()>, connection: Connection, + info: ResponseInfo, current_time: impl Fn() -> Instant + Send + Sync + 'static, ) -> Self { Body { unit, connection: Some(connection), + info, current_time: Box::new(current_time), } } pub fn as_reader(&mut self, limit: u64) -> BodyReader { - BodyReader::shared(self, limit) + let info = self.info; + BodyReader::new(LimitReader::shared(self, limit), info) } pub fn into_reader(self, limit: u64) -> BodyReader<'static> { - BodyReader::owned(self, limit) + let info = self.info; + BodyReader::new(LimitReader::owned(self, limit), info) } pub fn read_to_string(&mut self, limit: usize) -> Result { @@ -94,6 +124,70 @@ impl Body { } pub struct BodyReader<'a> { + reader: ContentDecoder<'a>, +} + +impl<'a> BodyReader<'a> { + fn new(reader: LimitReader<'a>, info: ResponseInfo) -> BodyReader<'a> { + let reader = match info.content_encoding { + ContentEncoding::None => ContentDecoder::PassThrough(reader), + #[cfg(feature = "gzip")] + ContentEncoding::Gzip => { + ContentDecoder::Gzip(flate2::read::MultiGzDecoder::new(reader)) + } + #[cfg(not(feature = "gzip"))] + ContentEncoding::Gzip => { + info!("Not decompressing. Enable feature gzip"); + ContentDecoder::Gzip(reader) + } + #[cfg(feature = "brotli")] + ContentEncoding::Brotli => { + ContentDecoder::Brotli(brotli_decompressor::Decompressor::new(reader, 4096)) + } + #[cfg(not(feature = "brotli"))] + ContentEncoding::Brotli => { + info!("Not decompressing. Enable feature brotli"); + ContentDecoder::Brotli(reader) + } + ContentEncoding::Unknown => { + info!("Unknown content-encoding"); + ContentDecoder::PassThrough(reader) + } + }; + + BodyReader { reader } + } +} + +impl<'a> Read for BodyReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.reader.read(buf) + } +} + +enum ContentDecoder<'a> { + #[cfg(feature = "gzip")] + Gzip(flate2::read::MultiGzDecoder>), + #[cfg(not(feature = "gzip"))] + Gzip(LimitReader<'a>), + #[cfg(feature = "brotli")] + Brotli(brotli_decompressor::Decompressor>), + #[cfg(not(feature = "brotli"))] + Brotli(LimitReader<'a>), + PassThrough(LimitReader<'a>), +} + +impl<'a> Read for ContentDecoder<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + ContentDecoder::Gzip(v) => v.read(buf), + ContentDecoder::Brotli(v) => v.read(buf), + ContentDecoder::PassThrough(v) => v.read(buf), + } + } +} + +struct LimitReader<'a> { body: BodyRef<'a>, left: u64, } @@ -103,34 +197,34 @@ enum BodyRef<'a> { Owned(Body), } -impl<'a> BodyReader<'a> { - fn shared(body: &'a mut Body, limit: u64) -> BodyReader<'a> { - Self { - body: BodyRef::Shared(body), - left: limit, +impl<'a> BodyRef<'a> { + fn do_read(&mut self, buf: &mut [u8]) -> Result { + match self { + BodyRef::Shared(v) => v.do_read(buf), + BodyRef::Owned(v) => v.do_read(buf), } } } -impl BodyReader<'static> { - fn owned(body: Body, limit: u64) -> BodyReader<'static> { +impl<'a> LimitReader<'a> { + fn shared(body: &'a mut Body, limit: u64) -> LimitReader<'a> { Self { - body: BodyRef::Owned(body), + body: BodyRef::Shared(body), left: limit, } } } -impl<'a> BodyRef<'a> { - fn do_read(&mut self, buf: &mut [u8]) -> Result { - match self { - BodyRef::Shared(v) => v.do_read(buf), - BodyRef::Owned(v) => v.do_read(buf), +impl LimitReader<'static> { + fn owned(body: Body, limit: u64) -> LimitReader<'static> { + Self { + body: BodyRef::Owned(body), + left: limit, } } } -impl<'a> Read for BodyReader<'a> { +impl<'a> Read for LimitReader<'a> { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.left == 0 { return Err(Error::BodyExceedsLimit.into_io()); @@ -155,3 +249,16 @@ impl fmt::Debug for Body { f.debug_struct("Body").finish() } } + +impl From<&str> for ContentEncoding { + fn from(s: &str) -> Self { + match s { + "gzip" => ContentEncoding::Gzip, + "br" => ContentEncoding::Brotli, + _ => { + info!("Unknown content-encoding: {}", s); + ContentEncoding::Unknown + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ba624580..e1ff9615 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,9 +86,7 @@ mod test { #[test] fn simple_get() { env_logger::init(); - let mut response = get("https://httpbin.org/relative-redirect/3") - .call() - .unwrap(); + let mut response = get("https://httpbin.org/gzip").call().unwrap(); // println!("{:#?}", response); let _body = response.body_mut().read_to_string(16384).unwrap(); // println!("body: {:?}", body); diff --git a/src/util.rs b/src/util.rs index dc546f66..61fa116f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -153,6 +153,7 @@ const NON_SENSITIVE_HEADERS: &[&str] = &[ "transfer-encoding", "connection", "location", + "content-encoding", ]; impl<'a> fmt::Debug for DebugHeaders<'a> {