Skip to content

Commit b370d36

Browse files
feat(uri): make Authority/PathAndQuery::from_static const (#786)
1 parent 0d74251 commit b370d36

File tree

2 files changed

+208
-95
lines changed

2 files changed

+208
-95
lines changed

src/uri/authority.rs

Lines changed: 132 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ use bytes::Bytes;
88
use super::{ErrorKind, InvalidUri, Port, URI_CHARS};
99
use crate::byte_str::ByteStr;
1010

11+
/// Validation result for authority parsing.
12+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13+
enum AuthorityError {
14+
Empty,
15+
InvalidUriChar,
16+
InvalidAuthority,
17+
TooManyColons,
18+
MismatchedBrackets,
19+
InvalidBracketUsage,
20+
EmptyAfterAt,
21+
InvalidPercent,
22+
}
23+
1124
/// Represents the authority component of a URI.
1225
#[derive(Clone)]
1326
pub struct Authority {
@@ -45,9 +58,14 @@ impl Authority {
4558
/// let authority = Authority::from_static("example.com");
4659
/// assert_eq!(authority.host(), "example.com");
4760
/// ```
48-
pub fn from_static(src: &'static str) -> Self {
49-
Authority::from_shared(Bytes::from_static(src.as_bytes()))
50-
.expect("static str is not valid authority")
61+
#[inline]
62+
pub const fn from_static(src: &'static str) -> Self {
63+
match validate_authority_bytes(src.as_bytes()) {
64+
Ok(_) => Authority {
65+
data: ByteStr::from_static(src),
66+
},
67+
Err(_) => panic!("static str is not valid authority"),
68+
}
5169
}
5270

5371
/// Attempt to convert a `Bytes` buffer to a `Authority`.
@@ -69,95 +87,19 @@ impl Authority {
6987
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
7088
// ret is the return value.
7189
pub(super) fn parse(s: &[u8]) -> Result<usize, InvalidUri> {
72-
let mut colon_cnt = 0u32;
73-
let mut start_bracket = false;
74-
let mut end_bracket = false;
75-
let mut has_percent = false;
76-
let mut end = s.len();
77-
let mut at_sign_pos = None;
78-
const MAX_COLONS: u32 = 8; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80
79-
80-
// Among other things, this loop checks that every byte in s up to the
81-
// first '/', '?', or '#' is a valid URI character (or in some contexts,
82-
// a '%'). This means that each such byte is a valid single-byte UTF-8
83-
// code point.
84-
for (i, &b) in s.iter().enumerate() {
85-
match URI_CHARS[b as usize] {
86-
b'/' | b'?' | b'#' => {
87-
end = i;
88-
break;
89-
}
90-
b':' => {
91-
if colon_cnt >= MAX_COLONS {
92-
return Err(ErrorKind::InvalidAuthority.into());
93-
}
94-
colon_cnt += 1;
95-
}
96-
b'[' => {
97-
if has_percent || start_bracket {
98-
// Something other than the userinfo has a `%`, so reject it.
99-
return Err(ErrorKind::InvalidAuthority.into());
100-
}
101-
start_bracket = true;
102-
}
103-
b']' => {
104-
if (!start_bracket) || end_bracket {
105-
return Err(ErrorKind::InvalidAuthority.into());
106-
}
107-
end_bracket = true;
108-
109-
// Those were part of an IPv6 hostname, so forget them...
110-
colon_cnt = 0;
111-
has_percent = false;
112-
}
113-
b'@' => {
114-
at_sign_pos = Some(i);
115-
116-
// Those weren't a port colon, but part of the
117-
// userinfo, so it needs to be forgotten.
118-
colon_cnt = 0;
119-
has_percent = false;
120-
}
121-
0 if b == b'%' => {
122-
// Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
123-
// https://url.spec.whatwg.org/#authority-state
124-
// the userinfo can have a percent-encoded username and password,
125-
// so record that a `%` was found. If this turns out to be
126-
// part of the userinfo, this flag will be cleared.
127-
// Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
128-
// be used to indicate a zone identifier.
129-
// If the flag hasn't been cleared at the end, that means this
130-
// was part of the hostname (and not part of an IPv6 address), and
131-
// will fail with an error.
132-
has_percent = true;
133-
}
134-
0 => {
135-
return Err(ErrorKind::InvalidUriChar.into());
136-
}
137-
_ => {}
90+
validate_authority_bytes(s).map_err(|e| {
91+
match e {
92+
AuthorityError::Empty => ErrorKind::Empty,
93+
AuthorityError::InvalidUriChar => ErrorKind::InvalidUriChar,
94+
AuthorityError::InvalidAuthority
95+
| AuthorityError::MismatchedBrackets
96+
| AuthorityError::InvalidBracketUsage
97+
| AuthorityError::EmptyAfterAt
98+
| AuthorityError::InvalidPercent
99+
| AuthorityError::TooManyColons => ErrorKind::InvalidAuthority,
138100
}
139-
}
140-
141-
if start_bracket ^ end_bracket {
142-
return Err(ErrorKind::InvalidAuthority.into());
143-
}
144-
145-
if colon_cnt > 1 {
146-
// Things like 'localhost:8080:3030' are rejected.
147-
return Err(ErrorKind::InvalidAuthority.into());
148-
}
149-
150-
if end > 0 && at_sign_pos == Some(end - 1) {
151-
// If there's nothing after an `@`, this is bonkers.
152-
return Err(ErrorKind::InvalidAuthority.into());
153-
}
154-
155-
if has_percent {
156-
// Something after the userinfo has a `%`, so reject it.
157-
return Err(ErrorKind::InvalidAuthority.into());
158-
}
159-
160-
Ok(end)
101+
.into()
102+
})
161103
}
162104

163105
// Parse bytes as an Authority, not allowing an empty string.
@@ -528,6 +470,105 @@ where
528470
})
529471
}
530472

473+
/// Shared validation logic for authority bytes.
474+
/// Returns the end position of valid authority bytes, or an error.
475+
const fn validate_authority_bytes(s: &[u8]) -> Result<usize, AuthorityError> {
476+
if s.is_empty() {
477+
return Err(AuthorityError::Empty);
478+
}
479+
480+
let mut colon_cnt: u32 = 0;
481+
let mut start_bracket = false;
482+
let mut end_bracket = false;
483+
let mut has_percent = false;
484+
let mut end = s.len();
485+
let mut at_sign_pos: usize = s.len();
486+
const MAX_COLONS: u32 = 8; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80
487+
488+
let mut i = 0;
489+
// Among other things, this loop checks that every byte in s up to the
490+
// first '/', '?', or '#' is a valid URI character (or in some contexts,
491+
// a '%'). This means that each such byte is a valid single-byte UTF-8
492+
// code point.
493+
while i < s.len() {
494+
let b = s[i];
495+
let ch = URI_CHARS[b as usize];
496+
497+
if ch == b'/' || ch == b'?' || ch == b'#' {
498+
end = i;
499+
break;
500+
}
501+
502+
if ch == 0 {
503+
if b == b'%' {
504+
// Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
505+
// https://url.spec.whatwg.org/#authority-state
506+
// the userinfo can have a percent-encoded username and password,
507+
// so record that a `%` was found. If this turns out to be
508+
// part of the userinfo, this flag will be cleared.
509+
// Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
510+
// be used to indicate a zone identifier.
511+
// If the flag hasn't been cleared at the end, that means this
512+
// was part of the hostname (and not part of an IPv6 address), and
513+
// will fail with an error.
514+
has_percent = true;
515+
} else {
516+
return Err(AuthorityError::InvalidUriChar);
517+
}
518+
} else if ch == b':' {
519+
if colon_cnt >= MAX_COLONS {
520+
return Err(AuthorityError::TooManyColons);
521+
}
522+
colon_cnt += 1;
523+
} else if ch == b'[' {
524+
if has_percent || start_bracket {
525+
// Something other than the userinfo has a `%`, so reject it.
526+
return Err(AuthorityError::InvalidBracketUsage);
527+
}
528+
start_bracket = true;
529+
} else if ch == b']' {
530+
if !start_bracket || end_bracket {
531+
return Err(AuthorityError::InvalidBracketUsage);
532+
}
533+
end_bracket = true;
534+
535+
// Those were part of an IPv6 hostname, so forget them...
536+
colon_cnt = 0;
537+
has_percent = false;
538+
} else if ch == b'@' {
539+
at_sign_pos = i;
540+
541+
// Those weren't a port colon, but part of the
542+
// userinfo, so it needs to be forgotten.
543+
colon_cnt = 0;
544+
has_percent = false;
545+
}
546+
547+
i += 1;
548+
}
549+
550+
if start_bracket != end_bracket {
551+
return Err(AuthorityError::MismatchedBrackets);
552+
}
553+
554+
if colon_cnt > 1 {
555+
// Things like 'localhost:8080:3030' are rejected.
556+
return Err(AuthorityError::InvalidAuthority);
557+
}
558+
559+
if end > 0 && at_sign_pos == end - 1 {
560+
// If there's nothing after an `@`, this is bonkers.
561+
return Err(AuthorityError::EmptyAfterAt);
562+
}
563+
564+
if has_percent {
565+
// Something after the userinfo has a `%`, so reject it.
566+
return Err(AuthorityError::InvalidPercent);
567+
}
568+
569+
Ok(end)
570+
}
571+
531572
#[cfg(test)]
532573
mod tests {
533574
use super::*;

src/uri/path.rs

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ use bytes::Bytes;
77
use super::{ErrorKind, InvalidUri};
88
use crate::byte_str::ByteStr;
99

10+
/// Validation result for path and query parsing.
11+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12+
enum PathAndQueryError {
13+
InvalidPathChar,
14+
InvalidQueryChar,
15+
FragmentNotAllowed,
16+
}
17+
1018
/// Represents the path component of a URI
1119
#[derive(Clone)]
1220
pub struct PathAndQuery {
@@ -138,10 +146,14 @@ impl PathAndQuery {
138146
/// assert_eq!(v.query(), Some("world"));
139147
/// ```
140148
#[inline]
141-
pub fn from_static(src: &'static str) -> Self {
142-
let src = Bytes::from_static(src.as_bytes());
143-
144-
PathAndQuery::from_shared(src).unwrap()
149+
pub const fn from_static(src: &'static str) -> Self {
150+
match validate_path_and_query_bytes(src.as_bytes()) {
151+
Ok(query) => PathAndQuery {
152+
data: ByteStr::from_static(src),
153+
query,
154+
},
155+
Err(_) => panic!("static str is not valid path"),
156+
}
145157
}
146158

147159
/// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
@@ -467,6 +479,66 @@ impl PartialOrd<PathAndQuery> for String {
467479
}
468480
}
469481

482+
/// Shared validation logic for path and query bytes.
483+
/// Returns the query position (or NONE), or an error.
484+
const fn validate_path_and_query_bytes(bytes: &[u8]) -> Result<u16, PathAndQueryError> {
485+
let mut query: u16 = NONE;
486+
let mut i: usize = 0;
487+
488+
// path ...
489+
while i < bytes.len() {
490+
let b = bytes[i];
491+
if b == b'?' {
492+
query = i as u16;
493+
i += 1;
494+
break;
495+
} else if b == b'#' {
496+
return Err(PathAndQueryError::FragmentNotAllowed);
497+
} else {
498+
let allowed = b == 0x21
499+
|| (b >= 0x24 && b <= 0x3B)
500+
|| b == 0x3D
501+
|| (b >= 0x40 && b <= 0x5F)
502+
|| (b >= 0x61 && b <= 0x7A)
503+
|| b == 0x7C
504+
|| b == 0x7E
505+
|| b == b'"'
506+
|| b == b'{'
507+
|| b == b'}'
508+
|| (b >= 0x7F);
509+
510+
if !allowed {
511+
return Err(PathAndQueryError::InvalidPathChar);
512+
}
513+
}
514+
i += 1;
515+
}
516+
517+
// query ...
518+
if query != NONE {
519+
while i < bytes.len() {
520+
let b = bytes[i];
521+
if b == b'#' {
522+
return Err(PathAndQueryError::FragmentNotAllowed);
523+
}
524+
525+
let allowed = b == 0x21
526+
|| (b >= 0x24 && b <= 0x3B)
527+
|| b == 0x3D
528+
|| (b >= 0x3F && b <= 0x7E)
529+
|| (b >= 0x7F);
530+
531+
if !allowed {
532+
return Err(PathAndQueryError::InvalidQueryChar);
533+
}
534+
535+
i += 1;
536+
}
537+
}
538+
539+
Ok(query)
540+
}
541+
470542
#[cfg(test)]
471543
mod tests {
472544
use super::*;

0 commit comments

Comments
 (0)