Skip to content

Commit e09b3d6

Browse files
committed
refactor
- make bracket handling clearer - improve field docs - remove assert! as it is fine to not serialise everything in alternative forms. That's what canonical serialisation is there for. - prefer strip instead of indexed operation
1 parent 5d16214 commit e09b3d6

File tree

4 files changed

+50
-43
lines changed

4 files changed

+50
-43
lines changed

gix-url/src/lib.rs

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,39 @@ pub struct Url {
113113
/// The URL scheme.
114114
pub scheme: Scheme,
115115
/// The user to impersonate on the remote.
116+
///
117+
/// Stored in decoded form: percent-encoded characters are decoded during parsing.
118+
/// Re-encoded during canonical serialization, but written as-is in alternative form.
116119
pub user: Option<String>,
117120
/// The password associated with a user.
121+
///
122+
/// Stored in decoded form: percent-encoded characters are decoded during parsing.
123+
/// Re-encoded during canonical serialization. Cannot be serialized in alternative form (will panic in debug builds).
118124
pub password: Option<String>,
119125
/// The host to which to connect. Localhost is implied if `None`.
126+
///
127+
/// IPv6 addresses are stored *without* brackets for SSH schemes, but *with* brackets for other schemes.
128+
/// Brackets are automatically added during serialization when needed (e.g., when a port is specified with an IPv6 host).
120129
pub host: Option<String>,
121130
/// When serializing, use the alternative forms as it was parsed as such.
131+
///
132+
/// Alternative forms include SCP-like syntax (`user@host:path`) and bare file paths.
133+
/// When `true`, password and port cannot be serialized (will panic in debug builds).
122134
pub serialize_alternative_form: bool,
123135
/// The port to use when connecting to a host. If `None`, standard ports depending on `scheme` will be used.
124136
pub port: Option<u16>,
125137
/// The path portion of the URL, usually the location of the git repository.
126138
///
139+
/// Unlike `user` and `password`, paths are stored and serialized in their original form
140+
/// without percent-decoding or re-encoding (e.g., `%20` remains `%20`, not converted to space).
141+
///
142+
/// Path normalization during parsing:
143+
/// - SSH/Git schemes: Leading `/~` is stripped (e.g., `/~repo` becomes `~repo`)
144+
/// - SSH/Git schemes: Empty paths are rejected as errors
145+
/// - HTTP/HTTPS schemes: Empty paths are normalized to `/`
146+
///
147+
/// During serialization, SSH/Git URLs prepend `/` to paths not starting with `/`.
148+
///
127149
/// # Security Warning
128150
///
129151
/// URLs allow paths to start with `-` which makes it possible to mask command-line arguments as path which then leads to
@@ -381,7 +403,7 @@ impl Url {
381403
out.write_all(self.scheme.as_str().as_bytes())?;
382404
out.write_all(b"://")?;
383405

384-
let needs_brackets = self.port.is_some() && self.host.as_ref().is_some_and(|h| Self::is_ipv6(h));
406+
let needs_brackets = self.port.is_some() && self.host_needs_brackets();
385407

386408
match (&self.user, &self.host) {
387409
(Some(user), Some(host)) => {
@@ -427,20 +449,19 @@ impl Url {
427449
Ok(())
428450
}
429451

430-
fn is_ipv6(host: &str) -> bool {
431-
host.contains(':') && !host.starts_with('[')
452+
fn host_needs_brackets(&self) -> bool {
453+
fn is_ipv6(h: &str) -> bool {
454+
h.contains(':') && !h.starts_with('[')
455+
}
456+
self.host.as_ref().is_some_and(|h| is_ipv6(h))
432457
}
433458

434459
fn write_alternative_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
435-
let needs_brackets = self.host.as_ref().is_some_and(|h| Self::is_ipv6(h));
460+
let needs_brackets = self.host_needs_brackets();
436461

437462
match (&self.user, &self.host) {
438463
(Some(user), Some(host)) => {
439464
out.write_all(user.as_bytes())?;
440-
assert!(
441-
self.password.is_none(),
442-
"BUG: cannot serialize password in alternative form"
443-
);
444465
out.write_all(b"@")?;
445466
if needs_brackets {
446467
out.write_all(b"[")?;
@@ -466,7 +487,6 @@ impl Url {
466487
));
467488
}
468489
}
469-
assert!(self.port.is_none(), "BUG: cannot serialize port in alternative form");
470490
if self.scheme == Scheme::Ssh {
471491
out.write_all(b":")?;
472492
}

gix-url/src/parse.rs

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -145,32 +145,29 @@ pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error
145145
// For SSH URLs, strip brackets from IPv6 addresses
146146
let host = if scheme == Scheme::Ssh {
147147
url.host.map(|mut h| {
148-
// Check if we have bracketed IPv6 with trailing colon: "[::1]:"
149-
if h.starts_with('[') {
150-
if h.ends_with("]:") {
151-
// "[::1]:" -> "::1" (strip brackets and colon)
152-
h = h[1..h.len() - 2].to_string();
153-
} else if h.ends_with(']') {
154-
// "[::1]" -> "::1" (just strip brackets)
155-
h = h[1..h.len() - 1].to_string();
148+
// Bracketed IPv6 forms
149+
if let Some(h2) = h.strip_prefix('[') {
150+
if let Some(inner) = h2.strip_suffix("]:") {
151+
// "[::1]:" "::1"
152+
h = inner.to_string();
153+
} else if let Some(inner) = h2.strip_suffix(']') {
154+
// "[::1]" "::1"
155+
h = inner.to_string();
156156
}
157157
} else {
158-
// For non-bracketed hosts, only strip trailing colon if it's not part of IPv6
159-
// Count colons: if there's only one colon and it's at the end, strip it
160-
// Otherwise (multiple colons or colon not at end), keep it
161-
let colon_count = h.chars().filter(|&c| c == ':').count();
162-
if colon_count == 1 && h.ends_with(':') {
163-
// Regular host with empty port "host:" -> "host"
164-
h = h[..h.len() - 1].to_string();
158+
// Non-bracketed host: strip a single trailing colon
159+
let colon_count = h.chars().filter(|&c| c == ':').take(2).count();
160+
if colon_count == 1 {
161+
if let Some(inner) = h.strip_suffix(':') {
162+
h = inner.to_string();
163+
}
165164
}
166-
// For bare IPv6 with trailing colon "::1:", keep it as is (colon_count > 1)
167165
}
168166
h
169167
})
170168
} else {
171169
url.host
172170
};
173-
174171
Ok(crate::Url {
175172
serialize_alternative_form: false,
176173
scheme,
@@ -232,8 +229,8 @@ pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
232229

233230
// For SCP-like SSH URLs, strip brackets from IPv6 addresses
234231
let host = url.host.map(|h| {
235-
if h.starts_with('[') && h.ends_with(']') {
236-
h[1..h.len() - 1].to_string()
232+
if let Some(h) = h.strip_prefix("[").and_then(|h| h.strip_suffix("]")) {
233+
h.to_string()
237234
} else {
238235
h
239236
}

gix-url/src/simple_url.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,12 @@ impl<'a> ParsedUrl<'a> {
6464
};
6565

6666
// Parse authority: [user[:password]@]host[:port]
67-
let (username, password, host, port) = if let Some(at_pos) = authority.rfind('@') {
67+
let (username, password, host, port) = if let Some((user_info, host_port)) = authority.rsplit_once('@') {
6868
// Has user info
69-
let user_info = &authority[..at_pos];
70-
let host_port = &authority[at_pos + 1..];
71-
72-
let (user, pass) = if let Some(colon_pos) = user_info.find(':') {
73-
let pass_str = &user_info[colon_pos + 1..];
69+
let (user, pass) = if let Some((user, pass_str)) = user_info.split_once(':') {
7470
// Treat empty password as None
7571
let pass = if pass_str.is_empty() { None } else { Some(pass_str) };
76-
(&user_info[..colon_pos], pass)
72+
(user, pass)
7773
} else {
7874
(user_info, None)
7975
};
@@ -145,10 +141,7 @@ impl<'a> ParsedUrl<'a> {
145141

146142
// Handle regular host:port
147143
// Use rfind to find the last colon
148-
if let Some(colon_pos) = host_port.rfind(':') {
149-
let before_last_colon = &host_port[..colon_pos];
150-
let after_last_colon = &host_port[colon_pos + 1..];
151-
144+
if let Some((before_last_colon, after_last_colon)) = host_port.rsplit_once(':') {
152145
// Check if this looks like a port (all digits after colon)
153146
// But avoid treating IPv6 addresses as host:port
154147
// IPv6 addresses have colons in the part before the last colon (e.g., "::1" has "::" before the last ":")

gix-url/tests/url/baseline.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,7 @@ fn parse_and_compare_baseline_urls() {
4343
println!(
4444
"\nBaseline tests: {passed}/{total} passed, {failed} failed, {expected_failures} expected failures (skipped)"
4545
);
46-
47-
if failed > 0 {
48-
panic!("{failed} baseline test(s) failed");
49-
}
46+
assert_eq!(failed, 0, "{failed} baseline test(s) failed");
5047
}
5148

5249
fn assert_urls_equal(expected: &baseline::GitDiagUrl<'_>, actual: &gix_url::Url) {

0 commit comments

Comments
 (0)