From 4b95d4d29e72012f7365e31331a17f8ca496e1ec Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Sun, 27 Sep 2020 22:54:56 -0700 Subject: [PATCH 1/4] Cookie refactor --- src/agent.rs | 15 ++++- src/unit.rs | 181 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 120 insertions(+), 76 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index 1aa59290..e9bf406a 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -52,6 +52,7 @@ pub(crate) struct AgentState { /// Reused connections between requests. pub(crate) pool: ConnectionPool, /// Cookies saved between requests. + /// Invariant: All cookies must have a nonempty domain and path. #[cfg(feature = "cookie")] pub(crate) jar: CookieJar, pub(crate) resolver: ArcResolver, @@ -237,11 +238,23 @@ impl Agent { /// ``` /// let agent = ureq::agent(); /// - /// let cookie = ureq::Cookie::new("name", "value"); + /// let cookie = ureq::Cookie::build("name", "value") + /// .domain("example.com") + /// .path("/") + /// .secure(true) + /// .finish(); /// agent.set_cookie(cookie); /// ``` #[cfg(feature = "cookie")] pub fn set_cookie(&self, cookie: Cookie<'static>) { + if cookie.domain().is_none() { + return; + } + let mut cookie = cookie.clone(); + if cookie.path().is_none() { + cookie.set_path("/"); + } + let mut state = self.state.lock().unwrap(); state.jar.add_original(cookie); } diff --git a/src/unit.rs b/src/unit.rs index 506bd830..4f071a02 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -48,10 +48,6 @@ impl Unit { let query_string = combine_query(&url, &req.query, mix_queries); - let cookie_header: Option
= url - .host_str() - .and_then(|host_str| extract_cookies(&req.agent, &url.scheme(), host_str, &url.path())); - let extra_headers = { let mut extra = vec![]; @@ -83,12 +79,15 @@ impl Unit { extra.push(Header::new("Authorization", &format!("Basic {}", encoded))); } + #[cfg(feature = "cookie")] + extra.extend(extract_cookies(&req.agent, &url).into_iter()); + extra }; + let headers: Vec<_> = req .headers .iter() - .chain(cookie_header.iter()) .chain(extra_headers.iter()) .cloned() .collect(); @@ -191,6 +190,7 @@ pub(crate) fn connect( } // squirrel away cookies + #[cfg(feature = "cookie")] save_cookies(&unit, &resp); // handle redirects @@ -238,62 +238,79 @@ pub(crate) fn connect( } #[cfg(feature = "cookie")] -fn extract_cookies( - state: &std::sync::Mutex, - scheme: &str, - host: &str, - path: &str, -) -> Option
{ +fn extract_cookies(state: &std::sync::Mutex, url: &Url) -> Option
{ + // We specifically use url.domain() here because cookies cannot be + // set for IP addresses. + let domain = match url.domain() { + Some(d) => d, + None => return None, + }; + let path = url.path(); + let is_secure = url.scheme().eq_ignore_ascii_case("https"); + let state = state.lock().unwrap(); - let is_secure = scheme.eq_ignore_ascii_case("https"); + match_cookies(&state.jar, domain, path, is_secure) +} - match_cookies(&state.jar, host, path, is_secure) +// Return true iff the string domain-matches the domain. +// This function must only be called on hostnames, not IP addresses. +// +// https://tools.ietf.org/html/rfc6265#section-5.1.3 +// A string domain-matches a given domain string if at least one of the +// following conditions hold: +// +// o The domain string and the string are identical. (Note that both +// the domain string and the string will have been canonicalized to +// lower case at this point.) +// o All of the following conditions hold: +// * The domain string is a suffix of the string. +// * The last character of the string that is not included in the +// domain string is a %x2E (".") character. +// * The string is a host name (i.e., not an IP address). +#[cfg(feature = "cookie")] +fn domain_match(s: &str, domain: &str) -> bool { + match s.strip_suffix(domain) { + Some("") => true, // domain and string are identical. + Some(remains) => remains.ends_with('.'), + None => false, // domain was not a suffix of string. + } } -#[cfg(not(feature = "cookie"))] -fn extract_cookies( - _state: &std::sync::Mutex, - _scheme: &str, - _host: &str, - _path: &str, -) -> Option
{ - None +// Return true iff the request-path path-matches the cookie-path. +// https://tools.ietf.org/html/rfc6265#section-5.1.4 +// A request-path path-matches a given cookie-path if at least one of +// the following conditions holds: +// +// o The cookie-path and the request-path are identical. +// o The cookie-path is a prefix of the request-path, and the last +// character of the cookie-path is %x2F ("/"). +// o The cookie-path is a prefix of the request-path, and the first +// character of the request-path that is not included in the cookie- +// path is a %x2F ("/") character. +#[cfg(feature = "cookie")] +fn path_match(request_path: &str, cookie_path: &str) -> bool { + match request_path.strip_prefix(cookie_path) { + Some("") => true, // cookie path and request path were identical. + Some(remains) => cookie_path.ends_with('/') || remains.starts_with('/'), + None => false, // cookie path was not a prefix of request path + } } -// TODO check so cookies can't be set for tld:s #[cfg(feature = "cookie")] fn match_cookies(jar: &CookieJar, domain: &str, path: &str, is_secure: bool) -> Option
{ - Some( - jar.iter() - .filter(|c| { - // if there is a domain, it must be matched. - // if there is no domain, then ignore cookie - let domain_ok = c - .domain() - .map(|cdom| domain.contains(cdom)) - .unwrap_or(false); - // a path must match the beginning of request path. - // no cookie path, we say is ok. is it?! - let path_ok = c - .path() - .map(|cpath| path.find(cpath).map(|pos| pos == 0).unwrap_or(false)) - .unwrap_or(true); - // either the cookie isnt secure, or we're not doing a secure request. - let secure_ok = !c.secure().unwrap_or(false) || is_secure; - - domain_ok && path_ok && secure_ok - }) - .map(|c| { - let name = c.name().to_string(); - let value = c.value().to_string(); - let nameval = Cookie::new(name, value).encoded().to_string(); - nameval - }) - .collect::>() - .join(";"), - ) - .filter(|x| !x.is_empty()) - .map(|s| Header::new("Cookie", &s)) + let header_value = jar + .iter() + .filter(|c| domain_match(domain, c.domain().unwrap())) + .filter(|c| path_match(path, c.path().unwrap())) + .filter(|c| is_secure || !c.secure().unwrap_or(false)) + // Create a new cookie with just the name and value so we don't send attributes. + .map(|c| Cookie::new(c.name(), c.value()).encoded().to_string()) + .collect::>() + .join(";"); + match header_value.as_str() { + "" => None, + val => Some(Header::new("Cookie", val)), + } } /// Combine the query of the url and the query options set on the request object. @@ -398,35 +415,49 @@ fn send_prelude(unit: &Unit, stream: &mut Stream, redir: bool) -> io::Result<()> Ok(()) } -#[cfg(not(feature = "cookie"))] -fn save_cookies(_unit: &Unit, _resp: &Response) {} - /// Investigate a response for "Set-Cookie" headers. #[cfg(feature = "cookie")] fn save_cookies(unit: &Unit, resp: &Response) { // - let cookies = resp.all("set-cookie"); - if cookies.is_empty() { + // Specifically use domain here because IPs cannot have cookies. + let request_domain = match unit.url.domain() { + Some(d) => d.to_ascii_lowercase(), + None => return, + }; + let headers = resp.all("set-cookie"); + // Avoid locking if there are no cookie headers + if headers.is_empty() { return; } - - // only lock if we know there is something to process - let state = &mut unit.req.agent.lock().unwrap(); - for raw_cookie in cookies.iter() { - let to_parse = if raw_cookie.to_lowercase().contains("domain=") { - (*raw_cookie).to_string() - } else { - let host = &unit.url.host_str().unwrap().to_string(); - format!("{}; Domain={}", raw_cookie, host) + let cookies = headers.into_iter().flat_map(|header_value| { + let mut cookie = match Cookie::parse_encoded(header_value) { + Err(_) => return None, + Ok(c) => c, }; - match Cookie::parse_encoded(&to_parse[..]) { - Err(_) => (), // ignore unparseable cookies - Ok(cookie) => { - let cookie = cookie.into_owned(); - state.jar.add(cookie) - } + // Canonicalize the cookie domain, check that it matches the request, + // and store it back in the cookie. + // https://tools.ietf.org/html/rfc6265#section-5.3, Item 6 + // Summary: If domain is empty, set it from the request and + // set the host_only flag. + // TODO: store a host_only flag. + // TODO: Check so cookies can't be set for TLDs. + let cookie_domain = match cookie.domain() { + None => request_domain.clone(), + Some(d) if domain_match(&request_domain, &d) => d.to_ascii_lowercase(), + Some(_) => return None, + }; + cookie.set_domain(cookie_domain); + if cookie.path().is_none() { + cookie.set_path("/"); } + Some(cookie) + }); + let state = &mut unit.req.agent.lock().unwrap(); + for c in cookies { + assert!(c.domain().is_some()); + assert!(c.path().is_some()); + state.jar.add(c.into_owned()); } } @@ -448,8 +479,8 @@ mod tests { #[test] fn match_cookies_returns_one_header() { let mut jar = CookieJar::new(); - let cookie1 = Cookie::parse("cookie1=value1; Domain=crates.io").unwrap(); - let cookie2 = Cookie::parse("cookie2=value2; Domain=crates.io").unwrap(); + let cookie1 = Cookie::parse("cookie1=value1; Domain=crates.io; Path=/").unwrap(); + let cookie2 = Cookie::parse("cookie2=value2; Domain=crates.io; Path=/").unwrap(); jar.add(cookie1); jar.add(cookie2); From 9b39e55d1ce5e922a6d31de375dd8d34292e54ad Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Tue, 29 Sep 2020 00:20:32 -0700 Subject: [PATCH 2/4] Use cookie_store --- Cargo.toml | 1 + src/agent.rs | 39 +++++++++++-- src/test/agent_test.rs | 28 --------- src/unit.rs | 129 +++++++---------------------------------- 4 files changed, 54 insertions(+), 143 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fe9ec80a..09cdb4f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ serde = { version = "1", optional = true } serde_json = { version = "1", optional = true } encoding = { version = "0.2", optional = true } native-tls = { version = "0.2", optional = true } +cookie_store = "0.12.0" [dev-dependencies] serde = { version = "1", features = ["derive"] } diff --git a/src/agent.rs b/src/agent.rs index e9bf406a..a2d139a0 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -1,7 +1,11 @@ #[cfg(feature = "cookie")] -use cookie::{Cookie, CookieJar}; +use cookie::Cookie; +#[cfg(feature = "cookie")] +use cookie_store::CookieStore; use std::sync::Arc; use std::sync::Mutex; +#[cfg(feature = "cookie")] +use url::Url; use crate::header::{self, Header}; use crate::pool::ConnectionPool; @@ -54,7 +58,7 @@ pub(crate) struct AgentState { /// Cookies saved between requests. /// Invariant: All cookies must have a nonempty domain and path. #[cfg(feature = "cookie")] - pub(crate) jar: CookieJar, + pub(crate) jar: CookieStore, pub(crate) resolver: ArcResolver, } @@ -220,6 +224,9 @@ impl Agent { /// either by setting it in the agent, or by making requests /// that `Set-Cookie` in the agent. /// + /// Note that this will return any cookie for the given name, + /// regardless of which host and path that cookie was set on. + /// /// ``` /// let agent = ureq::agent(); /// @@ -230,15 +237,24 @@ impl Agent { #[cfg(feature = "cookie")] pub fn cookie(&self, name: &str) -> Option> { let state = self.state.lock().unwrap(); - state.jar.get(name).cloned() + let first_found = state.jar.iter_any().find(|c| c.name() == name); + if let Some(first_found) = first_found { + let c: &Cookie = &*first_found; + Some(c.clone()) + } else { + None + } } /// Set a cookie in this agent. /// + /// Cookies without a domain, or with a malformed domain or path, + /// will be silently ignored. + /// /// ``` /// let agent = ureq::agent(); /// - /// let cookie = ureq::Cookie::build("name", "value") + /// let cookie = ureq::Cookie::build("name", "value") /// .domain("example.com") /// .path("/") /// .secure(true) @@ -247,16 +263,27 @@ impl Agent { /// ``` #[cfg(feature = "cookie")] pub fn set_cookie(&self, cookie: Cookie<'static>) { + let mut cookie = cookie.clone(); if cookie.domain().is_none() { return; } - let mut cookie = cookie.clone(); + if cookie.path().is_none() { cookie.set_path("/"); } + let path = cookie.path().unwrap(); + let domain = cookie.domain().unwrap(); + let fake_url: Url = match format!("http://{}{}", domain, path).parse() { + Ok(u) => u, + Err(_) => return, + }; let mut state = self.state.lock().unwrap(); - state.jar.add_original(cookie); + let cs_cookie = match cookie_store::Cookie::try_from_raw_cookie(&cookie, &fake_url) { + Ok(c) => c, + Err(_) => return, + }; + state.jar.insert(cs_cookie, &fake_url).ok(); } /// Make a GET request from this agent. diff --git a/src/test/agent_test.rs b/src/test/agent_test.rs index 97568b39..3d83175c 100644 --- a/src/test/agent_test.rs +++ b/src/test/agent_test.rs @@ -31,34 +31,6 @@ fn agent_reuse_headers() { assert_eq!(resp.header("X-Call").unwrap(), "2"); } -#[cfg(feature = "cookie")] -#[test] -fn agent_cookies() { - let agent = agent(); - - test::set_handler("/agent_cookies", |_unit| { - test::make_response( - 200, - "OK", - vec!["Set-Cookie: foo=bar%20baz; Path=/; HttpOnly"], - vec![], - ) - }); - - agent.get("test://host/agent_cookies").call(); - - assert!(agent.cookie("foo").is_some()); - assert_eq!(agent.cookie("foo").unwrap().value(), "bar baz"); - - test::set_handler("/agent_cookies", |unit| { - assert!(unit.has("cookie")); - assert_eq!(unit.header("cookie").unwrap(), "foo=bar%20baz"); - test::make_response(200, "OK", vec![], vec![]) - }); - - agent.get("test://host/agent_cookies").call(); -} - // Handler that answers with a simple HTTP response, and times // out idle connections after 2 seconds. fn idle_timeout_handler(mut stream: TcpStream) -> io::Result<()> { diff --git a/src/unit.rs b/src/unit.rs index 4f071a02..e439b154 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -5,7 +5,7 @@ use qstring::QString; use url::Url; #[cfg(feature = "cookie")] -use cookie::{Cookie, CookieJar}; +use cookie::Cookie; use crate::agent::AgentState; use crate::body::{self, BodySize, Payload, SizedReader}; @@ -239,71 +239,10 @@ pub(crate) fn connect( #[cfg(feature = "cookie")] fn extract_cookies(state: &std::sync::Mutex, url: &Url) -> Option
{ - // We specifically use url.domain() here because cookies cannot be - // set for IP addresses. - let domain = match url.domain() { - Some(d) => d, - None => return None, - }; - let path = url.path(); - let is_secure = url.scheme().eq_ignore_ascii_case("https"); - let state = state.lock().unwrap(); - match_cookies(&state.jar, domain, path, is_secure) -} - -// Return true iff the string domain-matches the domain. -// This function must only be called on hostnames, not IP addresses. -// -// https://tools.ietf.org/html/rfc6265#section-5.1.3 -// A string domain-matches a given domain string if at least one of the -// following conditions hold: -// -// o The domain string and the string are identical. (Note that both -// the domain string and the string will have been canonicalized to -// lower case at this point.) -// o All of the following conditions hold: -// * The domain string is a suffix of the string. -// * The last character of the string that is not included in the -// domain string is a %x2E (".") character. -// * The string is a host name (i.e., not an IP address). -#[cfg(feature = "cookie")] -fn domain_match(s: &str, domain: &str) -> bool { - match s.strip_suffix(domain) { - Some("") => true, // domain and string are identical. - Some(remains) => remains.ends_with('.'), - None => false, // domain was not a suffix of string. - } -} - -// Return true iff the request-path path-matches the cookie-path. -// https://tools.ietf.org/html/rfc6265#section-5.1.4 -// A request-path path-matches a given cookie-path if at least one of -// the following conditions holds: -// -// o The cookie-path and the request-path are identical. -// o The cookie-path is a prefix of the request-path, and the last -// character of the cookie-path is %x2F ("/"). -// o The cookie-path is a prefix of the request-path, and the first -// character of the request-path that is not included in the cookie- -// path is a %x2F ("/") character. -#[cfg(feature = "cookie")] -fn path_match(request_path: &str, cookie_path: &str) -> bool { - match request_path.strip_prefix(cookie_path) { - Some("") => true, // cookie path and request path were identical. - Some(remains) => cookie_path.ends_with('/') || remains.starts_with('/'), - None => false, // cookie path was not a prefix of request path - } -} - -#[cfg(feature = "cookie")] -fn match_cookies(jar: &CookieJar, domain: &str, path: &str, is_secure: bool) -> Option
{ - let header_value = jar - .iter() - .filter(|c| domain_match(domain, c.domain().unwrap())) - .filter(|c| path_match(path, c.path().unwrap())) - .filter(|c| is_secure || !c.secure().unwrap_or(false)) - // Create a new cookie with just the name and value so we don't send attributes. + let header_value = state + .jar + .get_request_cookies(url) .map(|c| Cookie::new(c.name(), c.value()).encoded().to_string()) .collect::>() .join(";"); @@ -420,45 +359,19 @@ fn send_prelude(unit: &Unit, stream: &mut Stream, redir: bool) -> io::Result<()> fn save_cookies(unit: &Unit, resp: &Response) { // - // Specifically use domain here because IPs cannot have cookies. - let request_domain = match unit.url.domain() { - Some(d) => d.to_ascii_lowercase(), - None => return, - }; let headers = resp.all("set-cookie"); // Avoid locking if there are no cookie headers if headers.is_empty() { return; } let cookies = headers.into_iter().flat_map(|header_value| { - let mut cookie = match Cookie::parse_encoded(header_value) { - Err(_) => return None, - Ok(c) => c, - }; - // Canonicalize the cookie domain, check that it matches the request, - // and store it back in the cookie. - // https://tools.ietf.org/html/rfc6265#section-5.3, Item 6 - // Summary: If domain is empty, set it from the request and - // set the host_only flag. - // TODO: store a host_only flag. - // TODO: Check so cookies can't be set for TLDs. - let cookie_domain = match cookie.domain() { - None => request_domain.clone(), - Some(d) if domain_match(&request_domain, &d) => d.to_ascii_lowercase(), - Some(_) => return None, - }; - cookie.set_domain(cookie_domain); - if cookie.path().is_none() { - cookie.set_path("/"); + match Cookie::parse(header_value.to_string()) { + Err(_) => None, + Ok(c) => Some(c), } - Some(cookie) }); let state = &mut unit.req.agent.lock().unwrap(); - for c in cookies { - assert!(c.domain().is_some()); - assert!(c.path().is_some()); - state.jar.add(c.into_owned()); - } + state.jar.store_response_cookies(cookies, &unit.url.clone()); } #[cfg(test)] @@ -466,27 +379,25 @@ fn save_cookies(unit: &Unit, resp: &Response) { mod tests { use super::*; + use crate::Agent; ///////////////////// COOKIE TESTS ////////////////////////////// - #[test] - fn match_cookies_returns_nothing_when_no_cookies() { - let jar = CookieJar::new(); - - let result = match_cookies(&jar, "crates.io", "/", false); - assert_eq!(result, None); - } - #[test] fn match_cookies_returns_one_header() { - let mut jar = CookieJar::new(); - let cookie1 = Cookie::parse("cookie1=value1; Domain=crates.io; Path=/").unwrap(); - let cookie2 = Cookie::parse("cookie2=value2; Domain=crates.io; Path=/").unwrap(); - jar.add(cookie1); - jar.add(cookie2); + let agent = Agent::default(); + let url: Url = "https://crates.io/".parse().unwrap(); + let cookie1: Cookie = "cookie1=value1; Domain=crates.io; Path=/".parse().unwrap(); + let cookie2: Cookie = "cookie2=value2; Domain=crates.io; Path=/".parse().unwrap(); + agent + .state + .lock() + .unwrap() + .jar + .store_response_cookies(vec![cookie1, cookie2].into_iter(), &url); // There's no guarantee to the order in which cookies are defined. // Ensure that they're either in one order or the other. - let result = match_cookies(&jar, "crates.io", "/", false); + let result = extract_cookies(&agent.state, &url); let order1 = "cookie1=value1;cookie2=value2"; let order2 = "cookie2=value2;cookie1=value1"; From 9300c504612e7a41be8fed9ada3b40881d04b553 Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Tue, 29 Sep 2020 01:45:26 -0700 Subject: [PATCH 3/4] Make cookie_store optional --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 09cdb4f0..f9b001c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ json = ["serde", "serde_json"] charset = ["encoding"] tls = ["rustls", "webpki", "webpki-roots"] native-certs = ["rustls-native-certs"] -cookies = ["cookie"] +cookies = ["cookie", "cookie_store"] socks-proxy = ["socks"] [dependencies] @@ -39,7 +39,7 @@ serde = { version = "1", optional = true } serde_json = { version = "1", optional = true } encoding = { version = "0.2", optional = true } native-tls = { version = "0.2", optional = true } -cookie_store = "0.12.0" +cookie_store = { version = "0.12.0", optional = true } [dev-dependencies] serde = { version = "1", features = ["derive"] } From d4dfe4096f6ea2e63650c5ba11c0b679a05035a7 Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Thu, 1 Oct 2020 14:27:59 -0700 Subject: [PATCH 4/4] Feature-gate AgentState --- src/unit.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/unit.rs b/src/unit.rs index 3ea4d5de..452daca4 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -8,6 +8,7 @@ use url::Url; #[cfg(feature = "cookie")] use cookie::Cookie; +#[cfg(feature = "cookie")] use crate::agent::AgentState; use crate::body::{self, BodySize, Payload, SizedReader}; use crate::header;