Skip to content

Commit 688531b

Browse files
committed
feat: make all baseline tests pass
Done by Sonnet 4.5
1 parent 8ab94f2 commit 688531b

File tree

5 files changed

+130
-35
lines changed

5 files changed

+130
-35
lines changed

gix-url/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,11 @@ impl Url {
375375
if let Some(port) = &self.port {
376376
write!(out, ":{port}")?;
377377
}
378+
// For SSH and Git URLs, add leading '/' if path doesn't start with '/'
379+
// This handles paths like "~repo" which serialize as "/~repo" in URL form
380+
if matches!(self.scheme, Scheme::Ssh | Scheme::Git) && !self.path.starts_with(b"/") {
381+
out.write_all(b"/")?;
382+
}
378383
out.write_all(&self.path)?;
379384
Ok(())
380385
}

gix-url/src/parse.rs

Lines changed: 87 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,21 @@ pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
7070
return InputScheme::Url { protocol_end };
7171
}
7272

73-
if let Some(colon) = input.find_byte(b':') {
73+
// Find colon, but skip over IPv6 brackets if present
74+
let colon = if input.starts_with(b"[") {
75+
// IPv6 address, find the closing bracket first
76+
if let Some(bracket_end) = input.find_byte(b']') {
77+
// Look for colon after the bracket
78+
input[bracket_end + 1..].find_byte(b':').map(|pos| bracket_end + 1 + pos)
79+
} else {
80+
// No closing bracket, treat as regular search
81+
input.find_byte(b':')
82+
}
83+
} else {
84+
input.find_byte(b':')
85+
};
86+
87+
if let Some(colon) = colon {
7488
// allow user to select files containing a `:` by passing them as absolute or relative path
7589
// this is behavior explicitly mentioned by the scp and git manuals
7690
let explicitly_local = &input[..colon].contains(&b'/');
@@ -111,20 +125,57 @@ pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error
111125
// Normalize empty path to "/" for http/https URLs only
112126
let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
113127
"/".into()
128+
} else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
129+
// For SSH and Git protocols, strip leading '/' from paths starting with '~'
130+
// e.g., "ssh://host/~repo" -> path is "~repo", not "/~repo"
131+
url.path[1..].into()
114132
} else {
115133
url.path.into()
116134
};
117135

136+
let user = url_user(&url, UrlKind::Url)?;
137+
let password = url
138+
.password
139+
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
140+
.transpose()?;
141+
let port = url.port;
142+
143+
// For SSH URLs, strip brackets from IPv6 addresses
144+
let host = if scheme == Scheme::Ssh {
145+
url.host.map(|mut h| {
146+
// Check if we have bracketed IPv6 with trailing colon: "[::1]:"
147+
if h.starts_with('[') {
148+
if h.ends_with("]:") {
149+
// "[::1]:" -> "::1" (strip brackets and colon)
150+
h = h[1..h.len() - 2].to_string();
151+
} else if h.ends_with(']') {
152+
// "[::1]" -> "::1" (just strip brackets)
153+
h = h[1..h.len() - 1].to_string();
154+
}
155+
} else {
156+
// For non-bracketed hosts, only strip trailing colon if it's not part of IPv6
157+
// Count colons: if there's only one colon and it's at the end, strip it
158+
// Otherwise (multiple colons or colon not at end), keep it
159+
let colon_count = h.chars().filter(|&c| c == ':').count();
160+
if colon_count == 1 && h.ends_with(':') {
161+
// Regular host with empty port "host:" -> "host"
162+
h = h[..h.len() - 1].to_string();
163+
}
164+
// For bare IPv6 with trailing colon "::1:", keep it as is (colon_count > 1)
165+
}
166+
h
167+
})
168+
} else {
169+
url.host
170+
};
171+
118172
Ok(crate::Url {
119173
serialize_alternative_form: false,
120174
scheme,
121-
user: url_user(&url, UrlKind::Url)?,
122-
password: url
123-
.password
124-
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
125-
.transpose()?,
126-
host: url.host,
127-
port: url.port,
175+
user,
176+
password,
177+
host,
178+
port,
128179
path,
129180
})
130181
}
@@ -166,16 +217,37 @@ pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
166217
source,
167218
})?;
168219

220+
// For SCP-like SSH URLs, strip leading '/' from paths starting with '/~'
221+
// e.g., "user@host:/~repo" -> path is "~repo", not "/~repo"
222+
let path = if path.starts_with("/~") {
223+
&path[1..]
224+
} else {
225+
path
226+
};
227+
228+
let user = url_user(&url, UrlKind::Scp)?;
229+
let password = url
230+
.password
231+
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
232+
.transpose()?;
233+
let port = url.port;
234+
235+
// For SCP-like SSH URLs, strip brackets from IPv6 addresses
236+
let host = url.host.map(|h| {
237+
if h.starts_with('[') && h.ends_with(']') {
238+
h[1..h.len() - 1].to_string()
239+
} else {
240+
h
241+
}
242+
});
243+
169244
Ok(crate::Url {
170245
serialize_alternative_form: true,
171246
scheme: Scheme::from(url.scheme.as_str()),
172-
user: url_user(&url, UrlKind::Scp)?,
173-
password: url
174-
.password
175-
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
176-
.transpose()?,
177-
host: url.host,
178-
port: url.port,
247+
user,
248+
password,
249+
host,
250+
port,
179251
path: path.into(),
180252
})
181253
}

gix-url/src/simple_url.rs

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,25 @@ impl<'a> ParsedUrl<'a> {
115115
// Handle IPv6 addresses: [::1] or [::1]:port
116116
if host_port.starts_with('[') {
117117
if let Some(bracket_end) = host_port.find(']') {
118-
// IPv6 addresses are case-insensitive, normalize to lowercase
119-
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
120118
let remaining = &host_port[bracket_end + 1..];
121119

122120
if remaining.is_empty() {
121+
// IPv6 addresses are case-insensitive, normalize to lowercase
122+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
123123
return Ok((host, None));
124124
} else if let Some(port_str) = remaining.strip_prefix(':') {
125+
if port_str.is_empty() {
126+
// Empty port like "[::1]:" - preserve the trailing colon for Git compatibility
127+
let host = Some(host_port.to_ascii_lowercase());
128+
return Ok((host, None));
129+
}
125130
let port = port_str.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
126131
// Validate port is in valid range (1-65535, port 0 is invalid)
127132
if port == 0 {
128133
return Err(UrlParseError::InvalidPort);
129134
}
135+
// IPv6 addresses are case-insensitive, normalize to lowercase
136+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
130137
return Ok((host, Some(port)));
131138
} else {
132139
return Err(UrlParseError::InvalidDomainCharacter);
@@ -137,27 +144,38 @@ impl<'a> ParsedUrl<'a> {
137144
}
138145

139146
// Handle regular host:port
140-
// Use rfind to handle IPv6 addresses without brackets (edge case)
147+
// Use rfind to find the last colon
141148
if let Some(colon_pos) = host_port.rfind(':') {
149+
let before_last_colon = &host_port[..colon_pos];
150+
let after_last_colon = &host_port[colon_pos + 1..];
151+
142152
// Check if this looks like a port (all digits after colon)
143-
let potential_port = &host_port[colon_pos + 1..];
144-
if potential_port.is_empty() {
145-
// Empty port like "host:" - strip the trailing colon
146-
let host_str = &host_port[..colon_pos];
147-
return Ok((Some(Self::normalize_hostname(host_str)?), None));
148-
} else if potential_port.chars().all(|c| c.is_ascii_digit()) {
149-
let host_str = &host_port[..colon_pos];
150-
let host = Self::normalize_hostname(host_str)?;
151-
let port = potential_port.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
152-
// Validate port is in valid range (1-65535, port 0 is invalid)
153-
if port == 0 {
154-
return Err(UrlParseError::InvalidPort);
153+
// But avoid treating IPv6 addresses as host:port
154+
// IPv6 addresses have colons in the part before the last colon (e.g., "::1" has "::" before the last ":")
155+
let has_colon_before_last = before_last_colon.contains(':');
156+
let is_all_digits_after = !after_last_colon.is_empty() && after_last_colon.chars().all(|c| c.is_ascii_digit());
157+
158+
// Treat as port separator only if:
159+
// 1. There's no colon before the last colon (normal host:port)
160+
// 2. OR it's explicitly empty (host: with trailing colon)
161+
if !has_colon_before_last {
162+
if after_last_colon.is_empty() {
163+
// Empty port like "host:" - store host with trailing colon
164+
// This is needed for Git compatibility where "host:" != "host"
165+
return Ok((Some(Self::normalize_hostname(host_port)?), None));
166+
} else if is_all_digits_after {
167+
let host = Self::normalize_hostname(before_last_colon)?;
168+
let port = after_last_colon.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
169+
// Validate port is in valid range (1-65535, port 0 is invalid)
170+
if port == 0 {
171+
return Err(UrlParseError::InvalidPort);
172+
}
173+
return Ok((Some(host), Some(port)));
155174
}
156-
return Ok((Some(host), Some(port)));
157175
}
158176
}
159177

160-
// No port, just host
178+
// No port, just host (including bare IPv6 addresses)
161179
Ok((Some(Self::normalize_hostname(host_port)?), None))
162180
}
163181

gix-url/tests/url/parse/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ mod git {
116116
fn username_expansion_with_username() -> crate::Result {
117117
assert_url_roundtrip(
118118
"git://example.com/~byron/hello",
119-
url(Scheme::Git, None, "example.com", None, b"/~byron/hello"),
119+
url(Scheme::Git, None, "example.com", None, b"~byron/hello"),
120120
)
121121
}
122122
}

gix-url/tests/url/parse/ssh.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ fn host_is_ipv4() -> crate::Result {
2727
fn username_expansion_with_username() -> crate::Result {
2828
assert_url_roundtrip(
2929
"ssh://example.com/~byron/hello/git",
30-
url(Scheme::Ssh, None, "example.com", None, b"/~byron/hello/git"),
30+
url(Scheme::Ssh, None, "example.com", None, b"~byron/hello/git"),
3131
)
3232
}
3333

3434
#[test]
3535
fn username_expansion_without_username() -> crate::Result {
3636
assert_url_roundtrip(
3737
"ssh://example.com/~/hello/git",
38-
url(Scheme::Ssh, None, "example.com", None, b"/~/hello/git"),
38+
url(Scheme::Ssh, None, "example.com", None, b"~/hello/git"),
3939
)
4040
}
4141

0 commit comments

Comments
 (0)