From d40c4f1d6ae8b6a10c54e95e6f870e2f5ccd7cfe Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Fri, 28 Jan 2022 18:48:42 +0100 Subject: [PATCH] Align IPv4 parsing to spec This commit aligns the IPv4 parsing steps to spec and adds the relevant WPT tests. --- url/src/host.rs | 70 +++++++++------- url/tests/unit.rs | 4 +- url/tests/urltestdata.json | 160 +++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+), 32 deletions(-) diff --git a/url/src/host.rs b/url/src/host.rs index c150dd997..a69d8c97d 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -109,7 +109,8 @@ impl Host { if domain.find(is_invalid_domain_char).is_some() { Err(ParseError::InvalidDomainCharacter) - } else if let Some(address) = parse_ipv4addr(&domain)? { + } else if ends_in_a_number(&domain) { + let address = parse_ipv4addr(&domain)?; Ok(Host::Ipv4(address)) } else { Ok(Host::Domain(domain)) @@ -264,8 +265,33 @@ fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { } } +/// +fn ends_in_a_number(input: &str) -> bool { + let mut parts = input.rsplit('.'); + let last = parts.next().unwrap(); + let last = if last.is_empty() { + if let Some(last) = parts.next() { + last + } else { + return false; + } + } else { + last + }; + if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) { + return true; + } + + parse_ipv4number(last).is_ok() +} + /// +/// Ok(None) means the input is a valid number, but it overflows a `u32`. fn parse_ipv4number(mut input: &str) -> Result, ()> { + if input.is_empty() { + return Err(()); + } + let mut r = 10; if input.starts_with("0x") || input.starts_with("0X") { input = &input[2..]; @@ -275,10 +301,10 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { r = 8; } - // At the moment we can't know the reason why from_str_radix fails - // https://github.com/rust-lang/rust/issues/22639 - // So instead we check if the input looks like a real number and only return - // an error when it's an overflow. + if input.is_empty() { + return Ok(Some(0)); + } + let valid_number = match r { 8 => input.chars().all(|c| ('0'..='7').contains(&c)), 10 => input.chars().all(|c| ('0'..='9').contains(&c)), @@ -287,50 +313,34 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { }), _ => false, }; - if !valid_number { - return Ok(None); + return Err(()); } - if input.is_empty() { - return Ok(Some(0)); - } - if input.starts_with('+') { - return Ok(None); - } match u32::from_str_radix(input, r) { - Ok(number) => Ok(Some(number)), - Err(_) => Err(()), + Ok(num) => Ok(Some(num)), + Err(_) => Ok(None), // The only possible error kind here is an integer overflow. + // The validity of the chars in the input is checked above. } } /// -fn parse_ipv4addr(input: &str) -> ParseResult> { - if input.is_empty() { - return Ok(None); - } +fn parse_ipv4addr(input: &str) -> ParseResult { let mut parts: Vec<&str> = input.split('.').collect(); if parts.last() == Some(&"") { parts.pop(); } if parts.len() > 4 { - return Ok(None); + return Err(ParseError::InvalidIpv4Address); } let mut numbers: Vec = Vec::new(); - let mut overflow = false; for part in parts { - if part.is_empty() { - return Ok(None); - } match parse_ipv4number(part) { Ok(Some(n)) => numbers.push(n), - Ok(None) => return Ok(None), - Err(()) => overflow = true, + Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow + Err(()) => return Err(ParseError::InvalidIpv4Address), }; } - if overflow { - return Err(ParseError::InvalidIpv4Address); - } let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { @@ -342,7 +352,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult> { for (counter, n) in numbers.iter().enumerate() { ipv4 += n << (8 * (3 - counter as u32)) } - Ok(Some(Ipv4Addr::from(ipv4))) + Ok(Ipv4Addr::from(ipv4)) } /// diff --git a/url/tests/unit.rs b/url/tests/unit.rs index 4dcba9c13..0fa99b6d8 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -256,7 +256,6 @@ fn host() { 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344, )), ); - assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49")); assert_host( "http://[::]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), @@ -271,7 +270,8 @@ fn host() { ); assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111))); - assert_host("http://2..2.3", Host::Domain("2..2.3")); + assert!(Url::parse("http://1.35.+33.49").is_err()); + assert!(Url::parse("http://2..2.3").is_err()); assert!(Url::parse("http://42.0x1232131").is_err()); assert!(Url::parse("http://192.168.0.257").is_err()); diff --git a/url/tests/urltestdata.json b/url/tests/urltestdata.json index c6a4d4061..4265f5928 100644 --- a/url/tests/urltestdata.json +++ b/url/tests/urltestdata.json @@ -7744,5 +7744,165 @@ "input": "?", "base": null, "failure": true + }, + "Last component looks like a number, but not valid IPv4", + { + "input": "http://1.2.3.4.5", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.4.5.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0..0x300/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0..0x300./", + "base": "about:blank", + "failure": true + }, + { + "input": "http://256.256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256.256.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.08", + "base": "about:blank", + "failure": true + }, + { + "input": "http://1.2.3.08.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://1.2.3.09", + "base": "about:blank", + "failure": true + }, + { + "input": "http://09.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://09.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://01.2.3.4.5", + "base": "about:blank", + "failure": true + }, + { + "input": "http://01.2.3.4.5.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x100.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x100.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x1.2.3.4.5", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x1.2.3.4.5.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.1.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.1.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09..", + "base": "about:blank", + "hash": "", + "host": "foo.09..", + "hostname": "foo.09..", + "href":"http://foo.09../", + "password": "", + "pathname": "/", + "port":"", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://0999999999999999999/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123", + "base": "about:blank", + "failure": true + }, + { + "input": "http://💩.123/", + "base": "about:blank", + "failure": true } ]