Skip to content

Commit cb63c56

Browse files
committed
make all baseline tests pass
Done by Sonnet 4.5
1 parent 8ab94f2 commit cb63c56

File tree

6 files changed

+194
-138
lines changed

6 files changed

+194
-138
lines changed

gix-url/src/lib.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ impl Url {
352352
fn write_canonical_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
353353
out.write_all(self.scheme.as_str().as_bytes())?;
354354
out.write_all(b"://")?;
355+
356+
let needs_brackets = self.port.is_some() && self.host.as_ref().is_some_and(|h| Self::is_ipv6(h));
357+
355358
match (&self.user, &self.host) {
356359
(Some(user), Some(host)) => {
357360
out.write_all(percent_encode(user).as_bytes())?;
@@ -360,10 +363,22 @@ impl Url {
360363
out.write_all(percent_encode(password).as_bytes())?;
361364
}
362365
out.write_all(b"@")?;
366+
if needs_brackets {
367+
out.write_all(b"[")?;
368+
}
363369
out.write_all(host.as_bytes())?;
370+
if needs_brackets {
371+
out.write_all(b"]")?;
372+
}
364373
}
365374
(None, Some(host)) => {
375+
if needs_brackets {
376+
out.write_all(b"[")?;
377+
}
366378
out.write_all(host.as_bytes())?;
379+
if needs_brackets {
380+
out.write_all(b"]")?;
381+
}
367382
}
368383
(None, None) => {}
369384
(Some(_user), None) => {
@@ -375,11 +390,22 @@ impl Url {
375390
if let Some(port) = &self.port {
376391
write!(out, ":{port}")?;
377392
}
393+
// For SSH and Git URLs, add leading '/' if path doesn't start with '/'
394+
// This handles paths like "~repo" which serialize as "/~repo" in URL form
395+
if matches!(self.scheme, Scheme::Ssh | Scheme::Git) && !self.path.starts_with(b"/") {
396+
out.write_all(b"/")?;
397+
}
378398
out.write_all(&self.path)?;
379399
Ok(())
380400
}
381401

402+
fn is_ipv6(host: &str) -> bool {
403+
host.contains(':') && !host.starts_with('[')
404+
}
405+
382406
fn write_alternative_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
407+
let needs_brackets = self.host.as_ref().is_some_and(|h| Self::is_ipv6(h));
408+
383409
match (&self.user, &self.host) {
384410
(Some(user), Some(host)) => {
385411
out.write_all(user.as_bytes())?;
@@ -388,10 +414,22 @@ impl Url {
388414
"BUG: cannot serialize password in alternative form"
389415
);
390416
out.write_all(b"@")?;
417+
if needs_brackets {
418+
out.write_all(b"[")?;
419+
}
391420
out.write_all(host.as_bytes())?;
421+
if needs_brackets {
422+
out.write_all(b"]")?;
423+
}
392424
}
393425
(None, Some(host)) => {
426+
if needs_brackets {
427+
out.write_all(b"[")?;
428+
}
394429
out.write_all(host.as_bytes())?;
430+
if needs_brackets {
431+
out.write_all(b"]")?;
432+
}
395433
}
396434
(None, None) => {}
397435
(Some(_user), None) => {

gix-url/src/parse.rs

Lines changed: 85 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,23 @@ pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
7070
return InputScheme::Url { protocol_end };
7171
}
7272

73-
if let Some(colon) = input.find_byte(b':') {
73+
// Find colon, but skip over IPv6 brackets if present
74+
let colon = if input.starts_with(b"[") {
75+
// IPv6 address, find the closing bracket first
76+
if let Some(bracket_end) = input.find_byte(b']') {
77+
// Look for colon after the bracket
78+
input[bracket_end + 1..]
79+
.find_byte(b':')
80+
.map(|pos| bracket_end + 1 + pos)
81+
} else {
82+
// No closing bracket, treat as regular search
83+
input.find_byte(b':')
84+
}
85+
} else {
86+
input.find_byte(b':')
87+
};
88+
89+
if let Some(colon) = colon {
7490
// allow user to select files containing a `:` by passing them as absolute or relative path
7591
// this is behavior explicitly mentioned by the scp and git manuals
7692
let explicitly_local = &input[..colon].contains(&b'/');
@@ -111,20 +127,57 @@ pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error
111127
// Normalize empty path to "/" for http/https URLs only
112128
let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
113129
"/".into()
130+
} else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
131+
// For SSH and Git protocols, strip leading '/' from paths starting with '~'
132+
// e.g., "ssh://host/~repo" -> path is "~repo", not "/~repo"
133+
url.path[1..].into()
114134
} else {
115135
url.path.into()
116136
};
117137

138+
let user = url_user(&url, UrlKind::Url)?;
139+
let password = url
140+
.password
141+
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
142+
.transpose()?;
143+
let port = url.port;
144+
145+
// For SSH URLs, strip brackets from IPv6 addresses
146+
let host = if scheme == Scheme::Ssh {
147+
url.host.map(|mut h| {
148+
// Check if we have bracketed IPv6 with trailing colon: "[::1]:"
149+
if h.starts_with('[') {
150+
if h.ends_with("]:") {
151+
// "[::1]:" -> "::1" (strip brackets and colon)
152+
h = h[1..h.len() - 2].to_string();
153+
} else if h.ends_with(']') {
154+
// "[::1]" -> "::1" (just strip brackets)
155+
h = h[1..h.len() - 1].to_string();
156+
}
157+
} else {
158+
// For non-bracketed hosts, only strip trailing colon if it's not part of IPv6
159+
// Count colons: if there's only one colon and it's at the end, strip it
160+
// Otherwise (multiple colons or colon not at end), keep it
161+
let colon_count = h.chars().filter(|&c| c == ':').count();
162+
if colon_count == 1 && h.ends_with(':') {
163+
// Regular host with empty port "host:" -> "host"
164+
h = h[..h.len() - 1].to_string();
165+
}
166+
// For bare IPv6 with trailing colon "::1:", keep it as is (colon_count > 1)
167+
}
168+
h
169+
})
170+
} else {
171+
url.host
172+
};
173+
118174
Ok(crate::Url {
119175
serialize_alternative_form: false,
120176
scheme,
121-
user: url_user(&url, UrlKind::Url)?,
122-
password: url
123-
.password
124-
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
125-
.transpose()?,
126-
host: url.host,
127-
port: url.port,
177+
user,
178+
password,
179+
host,
180+
port,
128181
path,
129182
})
130183
}
@@ -166,16 +219,33 @@ pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
166219
source,
167220
})?;
168221

222+
// For SCP-like SSH URLs, strip leading '/' from paths starting with '/~'
223+
// e.g., "user@host:/~repo" -> path is "~repo", not "/~repo"
224+
let path = if path.starts_with("/~") { &path[1..] } else { path };
225+
226+
let user = url_user(&url, UrlKind::Scp)?;
227+
let password = url
228+
.password
229+
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
230+
.transpose()?;
231+
let port = url.port;
232+
233+
// For SCP-like SSH URLs, strip brackets from IPv6 addresses
234+
let host = url.host.map(|h| {
235+
if h.starts_with('[') && h.ends_with(']') {
236+
h[1..h.len() - 1].to_string()
237+
} else {
238+
h
239+
}
240+
});
241+
169242
Ok(crate::Url {
170243
serialize_alternative_form: true,
171244
scheme: Scheme::from(url.scheme.as_str()),
172-
user: url_user(&url, UrlKind::Scp)?,
173-
password: url
174-
.password
175-
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
176-
.transpose()?,
177-
host: url.host,
178-
port: url.port,
245+
user,
246+
password,
247+
host,
248+
port,
179249
path: path.into(),
180250
})
181251
}

gix-url/src/simple_url.rs

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,25 @@ impl<'a> ParsedUrl<'a> {
115115
// Handle IPv6 addresses: [::1] or [::1]:port
116116
if host_port.starts_with('[') {
117117
if let Some(bracket_end) = host_port.find(']') {
118-
// IPv6 addresses are case-insensitive, normalize to lowercase
119-
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
120118
let remaining = &host_port[bracket_end + 1..];
121119

122120
if remaining.is_empty() {
121+
// IPv6 addresses are case-insensitive, normalize to lowercase
122+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
123123
return Ok((host, None));
124124
} else if let Some(port_str) = remaining.strip_prefix(':') {
125+
if port_str.is_empty() {
126+
// Empty port like "[::1]:" - preserve the trailing colon for Git compatibility
127+
let host = Some(host_port.to_ascii_lowercase());
128+
return Ok((host, None));
129+
}
125130
let port = port_str.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
126131
// Validate port is in valid range (1-65535, port 0 is invalid)
127132
if port == 0 {
128133
return Err(UrlParseError::InvalidPort);
129134
}
135+
// IPv6 addresses are case-insensitive, normalize to lowercase
136+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
130137
return Ok((host, Some(port)));
131138
} else {
132139
return Err(UrlParseError::InvalidDomainCharacter);
@@ -137,27 +144,41 @@ impl<'a> ParsedUrl<'a> {
137144
}
138145

139146
// Handle regular host:port
140-
// Use rfind to handle IPv6 addresses without brackets (edge case)
147+
// Use rfind to find the last colon
141148
if let Some(colon_pos) = host_port.rfind(':') {
149+
let before_last_colon = &host_port[..colon_pos];
150+
let after_last_colon = &host_port[colon_pos + 1..];
151+
142152
// Check if this looks like a port (all digits after colon)
143-
let potential_port = &host_port[colon_pos + 1..];
144-
if potential_port.is_empty() {
145-
// Empty port like "host:" - strip the trailing colon
146-
let host_str = &host_port[..colon_pos];
147-
return Ok((Some(Self::normalize_hostname(host_str)?), None));
148-
} else if potential_port.chars().all(|c| c.is_ascii_digit()) {
149-
let host_str = &host_port[..colon_pos];
150-
let host = Self::normalize_hostname(host_str)?;
151-
let port = potential_port.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
152-
// Validate port is in valid range (1-65535, port 0 is invalid)
153-
if port == 0 {
154-
return Err(UrlParseError::InvalidPort);
153+
// But avoid treating IPv6 addresses as host:port
154+
// IPv6 addresses have colons in the part before the last colon (e.g., "::1" has "::" before the last ":")
155+
let has_colon_before_last = before_last_colon.contains(':');
156+
let is_all_digits_after =
157+
!after_last_colon.is_empty() && after_last_colon.chars().all(|c| c.is_ascii_digit());
158+
159+
// Treat as port separator only if:
160+
// 1. There's no colon before the last colon (normal host:port)
161+
// 2. OR it's explicitly empty (host: with trailing colon)
162+
if !has_colon_before_last {
163+
if after_last_colon.is_empty() {
164+
// Empty port like "host:" - store host with trailing colon
165+
// This is needed for Git compatibility where "host:" != "host"
166+
return Ok((Some(Self::normalize_hostname(host_port)?), None));
167+
} else if is_all_digits_after {
168+
let host = Self::normalize_hostname(before_last_colon)?;
169+
let port = after_last_colon
170+
.parse::<u16>()
171+
.map_err(|_| UrlParseError::InvalidPort)?;
172+
// Validate port is in valid range (1-65535, port 0 is invalid)
173+
if port == 0 {
174+
return Err(UrlParseError::InvalidPort);
175+
}
176+
return Ok((Some(host), Some(port)));
155177
}
156-
return Ok((Some(host), Some(port)));
157178
}
158179
}
159180

160-
// No port, just host
181+
// No port, just host (including bare IPv6 addresses)
161182
Ok((Some(Self::normalize_hostname(host_port)?), None))
162183
}
163184

0 commit comments

Comments
 (0)