@@ -8,6 +8,19 @@ use bytes::Bytes;
88use super :: { ErrorKind , InvalidUri , Port , URI_CHARS } ;
99use crate :: byte_str:: ByteStr ;
1010
11+ /// Validation result for authority parsing.
12+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
13+ enum AuthorityError {
14+ Empty ,
15+ InvalidUriChar ,
16+ InvalidAuthority ,
17+ TooManyColons ,
18+ MismatchedBrackets ,
19+ InvalidBracketUsage ,
20+ EmptyAfterAt ,
21+ InvalidPercent ,
22+ }
23+
1124/// Represents the authority component of a URI.
1225#[ derive( Clone ) ]
1326pub struct Authority {
@@ -45,9 +58,14 @@ impl Authority {
4558 /// let authority = Authority::from_static("example.com");
4659 /// assert_eq!(authority.host(), "example.com");
4760 /// ```
48- pub fn from_static ( src : & ' static str ) -> Self {
49- Authority :: from_shared ( Bytes :: from_static ( src. as_bytes ( ) ) )
50- . expect ( "static str is not valid authority" )
61+ #[ inline]
62+ pub const fn from_static ( src : & ' static str ) -> Self {
63+ match validate_authority_bytes ( src. as_bytes ( ) ) {
64+ Ok ( _) => Authority {
65+ data : ByteStr :: from_static ( src) ,
66+ } ,
67+ Err ( _) => panic ! ( "static str is not valid authority" ) ,
68+ }
5169 }
5270
5371 /// Attempt to convert a `Bytes` buffer to a `Authority`.
@@ -69,95 +87,19 @@ impl Authority {
6987 // Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
7088 // ret is the return value.
7189 pub ( super ) fn parse ( s : & [ u8 ] ) -> Result < usize , InvalidUri > {
72- let mut colon_cnt = 0u32 ;
73- let mut start_bracket = false ;
74- let mut end_bracket = false ;
75- let mut has_percent = false ;
76- let mut end = s. len ( ) ;
77- let mut at_sign_pos = None ;
78- const MAX_COLONS : u32 = 8 ; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80
79-
80- // Among other things, this loop checks that every byte in s up to the
81- // first '/', '?', or '#' is a valid URI character (or in some contexts,
82- // a '%'). This means that each such byte is a valid single-byte UTF-8
83- // code point.
84- for ( i, & b) in s. iter ( ) . enumerate ( ) {
85- match URI_CHARS [ b as usize ] {
86- b'/' | b'?' | b'#' => {
87- end = i;
88- break ;
89- }
90- b':' => {
91- if colon_cnt >= MAX_COLONS {
92- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
93- }
94- colon_cnt += 1 ;
95- }
96- b'[' => {
97- if has_percent || start_bracket {
98- // Something other than the userinfo has a `%`, so reject it.
99- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
100- }
101- start_bracket = true ;
102- }
103- b']' => {
104- if ( !start_bracket) || end_bracket {
105- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
106- }
107- end_bracket = true ;
108-
109- // Those were part of an IPv6 hostname, so forget them...
110- colon_cnt = 0 ;
111- has_percent = false ;
112- }
113- b'@' => {
114- at_sign_pos = Some ( i) ;
115-
116- // Those weren't a port colon, but part of the
117- // userinfo, so it needs to be forgotten.
118- colon_cnt = 0 ;
119- has_percent = false ;
120- }
121- 0 if b == b'%' => {
122- // Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
123- // https://url.spec.whatwg.org/#authority-state
124- // the userinfo can have a percent-encoded username and password,
125- // so record that a `%` was found. If this turns out to be
126- // part of the userinfo, this flag will be cleared.
127- // Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
128- // be used to indicate a zone identifier.
129- // If the flag hasn't been cleared at the end, that means this
130- // was part of the hostname (and not part of an IPv6 address), and
131- // will fail with an error.
132- has_percent = true ;
133- }
134- 0 => {
135- return Err ( ErrorKind :: InvalidUriChar . into ( ) ) ;
136- }
137- _ => { }
90+ validate_authority_bytes ( s) . map_err ( |e| {
91+ match e {
92+ AuthorityError :: Empty => ErrorKind :: Empty ,
93+ AuthorityError :: InvalidUriChar => ErrorKind :: InvalidUriChar ,
94+ AuthorityError :: InvalidAuthority
95+ | AuthorityError :: MismatchedBrackets
96+ | AuthorityError :: InvalidBracketUsage
97+ | AuthorityError :: EmptyAfterAt
98+ | AuthorityError :: InvalidPercent
99+ | AuthorityError :: TooManyColons => ErrorKind :: InvalidAuthority ,
138100 }
139- }
140-
141- if start_bracket ^ end_bracket {
142- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
143- }
144-
145- if colon_cnt > 1 {
146- // Things like 'localhost:8080:3030' are rejected.
147- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
148- }
149-
150- if end > 0 && at_sign_pos == Some ( end - 1 ) {
151- // If there's nothing after an `@`, this is bonkers.
152- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
153- }
154-
155- if has_percent {
156- // Something after the userinfo has a `%`, so reject it.
157- return Err ( ErrorKind :: InvalidAuthority . into ( ) ) ;
158- }
159-
160- Ok ( end)
101+ . into ( )
102+ } )
161103 }
162104
163105 // Parse bytes as an Authority, not allowing an empty string.
@@ -528,6 +470,105 @@ where
528470 } )
529471}
530472
473+ /// Shared validation logic for authority bytes.
474+ /// Returns the end position of valid authority bytes, or an error.
475+ const fn validate_authority_bytes ( s : & [ u8 ] ) -> Result < usize , AuthorityError > {
476+ if s. is_empty ( ) {
477+ return Err ( AuthorityError :: Empty ) ;
478+ }
479+
480+ let mut colon_cnt: u32 = 0 ;
481+ let mut start_bracket = false ;
482+ let mut end_bracket = false ;
483+ let mut has_percent = false ;
484+ let mut end = s. len ( ) ;
485+ let mut at_sign_pos: usize = s. len ( ) ;
486+ const MAX_COLONS : u32 = 8 ; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80
487+
488+ let mut i = 0 ;
489+ // Among other things, this loop checks that every byte in s up to the
490+ // first '/', '?', or '#' is a valid URI character (or in some contexts,
491+ // a '%'). This means that each such byte is a valid single-byte UTF-8
492+ // code point.
493+ while i < s. len ( ) {
494+ let b = s[ i] ;
495+ let ch = URI_CHARS [ b as usize ] ;
496+
497+ if ch == b'/' || ch == b'?' || ch == b'#' {
498+ end = i;
499+ break ;
500+ }
501+
502+ if ch == 0 {
503+ if b == b'%' {
504+ // Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
505+ // https://url.spec.whatwg.org/#authority-state
506+ // the userinfo can have a percent-encoded username and password,
507+ // so record that a `%` was found. If this turns out to be
508+ // part of the userinfo, this flag will be cleared.
509+ // Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
510+ // be used to indicate a zone identifier.
511+ // If the flag hasn't been cleared at the end, that means this
512+ // was part of the hostname (and not part of an IPv6 address), and
513+ // will fail with an error.
514+ has_percent = true ;
515+ } else {
516+ return Err ( AuthorityError :: InvalidUriChar ) ;
517+ }
518+ } else if ch == b':' {
519+ if colon_cnt >= MAX_COLONS {
520+ return Err ( AuthorityError :: TooManyColons ) ;
521+ }
522+ colon_cnt += 1 ;
523+ } else if ch == b'[' {
524+ if has_percent || start_bracket {
525+ // Something other than the userinfo has a `%`, so reject it.
526+ return Err ( AuthorityError :: InvalidBracketUsage ) ;
527+ }
528+ start_bracket = true ;
529+ } else if ch == b']' {
530+ if !start_bracket || end_bracket {
531+ return Err ( AuthorityError :: InvalidBracketUsage ) ;
532+ }
533+ end_bracket = true ;
534+
535+ // Those were part of an IPv6 hostname, so forget them...
536+ colon_cnt = 0 ;
537+ has_percent = false ;
538+ } else if ch == b'@' {
539+ at_sign_pos = i;
540+
541+ // Those weren't a port colon, but part of the
542+ // userinfo, so it needs to be forgotten.
543+ colon_cnt = 0 ;
544+ has_percent = false ;
545+ }
546+
547+ i += 1 ;
548+ }
549+
550+ if start_bracket != end_bracket {
551+ return Err ( AuthorityError :: MismatchedBrackets ) ;
552+ }
553+
554+ if colon_cnt > 1 {
555+ // Things like 'localhost:8080:3030' are rejected.
556+ return Err ( AuthorityError :: InvalidAuthority ) ;
557+ }
558+
559+ if end > 0 && at_sign_pos == end - 1 {
560+ // If there's nothing after an `@`, this is bonkers.
561+ return Err ( AuthorityError :: EmptyAfterAt ) ;
562+ }
563+
564+ if has_percent {
565+ // Something after the userinfo has a `%`, so reject it.
566+ return Err ( AuthorityError :: InvalidPercent ) ;
567+ }
568+
569+ Ok ( end)
570+ }
571+
531572#[ cfg( test) ]
532573mod tests {
533574 use super :: * ;
0 commit comments