Skip to content

Commit 73e5b8e

Browse files
committed
make all baseline tests pass
Done by Sonnet 4.5
1 parent 8ab94f2 commit 73e5b8e

File tree

6 files changed

+195
-141
lines changed

6 files changed

+195
-141
lines changed

gix-url/src/lib.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ impl Url {
352352
fn write_canonical_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
353353
out.write_all(self.scheme.as_str().as_bytes())?;
354354
out.write_all(b"://")?;
355+
356+
let needs_brackets = self.port.is_some() && self.host.as_ref().map_or(false, |h| Self::is_ipv6(h));
357+
355358
match (&self.user, &self.host) {
356359
(Some(user), Some(host)) => {
357360
out.write_all(percent_encode(user).as_bytes())?;
@@ -360,10 +363,22 @@ impl Url {
360363
out.write_all(percent_encode(password).as_bytes())?;
361364
}
362365
out.write_all(b"@")?;
366+
if needs_brackets {
367+
out.write_all(b"[")?;
368+
}
363369
out.write_all(host.as_bytes())?;
370+
if needs_brackets {
371+
out.write_all(b"]")?;
372+
}
364373
}
365374
(None, Some(host)) => {
375+
if needs_brackets {
376+
out.write_all(b"[")?;
377+
}
366378
out.write_all(host.as_bytes())?;
379+
if needs_brackets {
380+
out.write_all(b"]")?;
381+
}
367382
}
368383
(None, None) => {}
369384
(Some(_user), None) => {
@@ -375,11 +390,22 @@ impl Url {
375390
if let Some(port) = &self.port {
376391
write!(out, ":{port}")?;
377392
}
393+
// For SSH and Git URLs, add leading '/' if path doesn't start with '/'
394+
// This handles paths like "~repo" which serialize as "/~repo" in URL form
395+
if matches!(self.scheme, Scheme::Ssh | Scheme::Git) && !self.path.starts_with(b"/") {
396+
out.write_all(b"/")?;
397+
}
378398
out.write_all(&self.path)?;
379399
Ok(())
380400
}
401+
402+
fn is_ipv6(host: &str) -> bool {
403+
host.contains(':') && !host.starts_with('[')
404+
}
381405

382406
fn write_alternative_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
407+
let needs_brackets = self.host.as_ref().map_or(false, |h| Self::is_ipv6(h));
408+
383409
match (&self.user, &self.host) {
384410
(Some(user), Some(host)) => {
385411
out.write_all(user.as_bytes())?;
@@ -388,10 +414,22 @@ impl Url {
388414
"BUG: cannot serialize password in alternative form"
389415
);
390416
out.write_all(b"@")?;
417+
if needs_brackets {
418+
out.write_all(b"[")?;
419+
}
391420
out.write_all(host.as_bytes())?;
421+
if needs_brackets {
422+
out.write_all(b"]")?;
423+
}
392424
}
393425
(None, Some(host)) => {
426+
if needs_brackets {
427+
out.write_all(b"[")?;
428+
}
394429
out.write_all(host.as_bytes())?;
430+
if needs_brackets {
431+
out.write_all(b"]")?;
432+
}
395433
}
396434
(None, None) => {}
397435
(Some(_user), None) => {

gix-url/src/parse.rs

Lines changed: 87 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,21 @@ pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
7070
return InputScheme::Url { protocol_end };
7171
}
7272

73-
if let Some(colon) = input.find_byte(b':') {
73+
// Find colon, but skip over IPv6 brackets if present
74+
let colon = if input.starts_with(b"[") {
75+
// IPv6 address, find the closing bracket first
76+
if let Some(bracket_end) = input.find_byte(b']') {
77+
// Look for colon after the bracket
78+
input[bracket_end + 1..].find_byte(b':').map(|pos| bracket_end + 1 + pos)
79+
} else {
80+
// No closing bracket, treat as regular search
81+
input.find_byte(b':')
82+
}
83+
} else {
84+
input.find_byte(b':')
85+
};
86+
87+
if let Some(colon) = colon {
7488
// allow user to select files containing a `:` by passing them as absolute or relative path
7589
// this is behavior explicitly mentioned by the scp and git manuals
7690
let explicitly_local = &input[..colon].contains(&b'/');
@@ -111,20 +125,57 @@ pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error
111125
// Normalize empty path to "/" for http/https URLs only
112126
let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
113127
"/".into()
128+
} else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
129+
// For SSH and Git protocols, strip leading '/' from paths starting with '~'
130+
// e.g., "ssh://host/~repo" -> path is "~repo", not "/~repo"
131+
url.path[1..].into()
114132
} else {
115133
url.path.into()
116134
};
117135

136+
let user = url_user(&url, UrlKind::Url)?;
137+
let password = url
138+
.password
139+
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
140+
.transpose()?;
141+
let port = url.port;
142+
143+
// For SSH URLs, strip brackets from IPv6 addresses
144+
let host = if scheme == Scheme::Ssh {
145+
url.host.map(|mut h| {
146+
// Check if we have bracketed IPv6 with trailing colon: "[::1]:"
147+
if h.starts_with('[') {
148+
if h.ends_with("]:") {
149+
// "[::1]:" -> "::1" (strip brackets and colon)
150+
h = h[1..h.len() - 2].to_string();
151+
} else if h.ends_with(']') {
152+
// "[::1]" -> "::1" (just strip brackets)
153+
h = h[1..h.len() - 1].to_string();
154+
}
155+
} else {
156+
// For non-bracketed hosts, only strip trailing colon if it's not part of IPv6
157+
// Count colons: if there's only one colon and it's at the end, strip it
158+
// Otherwise (multiple colons or colon not at end), keep it
159+
let colon_count = h.chars().filter(|&c| c == ':').count();
160+
if colon_count == 1 && h.ends_with(':') {
161+
// Regular host with empty port "host:" -> "host"
162+
h = h[..h.len() - 1].to_string();
163+
}
164+
// For bare IPv6 with trailing colon "::1:", keep it as is (colon_count > 1)
165+
}
166+
h
167+
})
168+
} else {
169+
url.host
170+
};
171+
118172
Ok(crate::Url {
119173
serialize_alternative_form: false,
120174
scheme,
121-
user: url_user(&url, UrlKind::Url)?,
122-
password: url
123-
.password
124-
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
125-
.transpose()?,
126-
host: url.host,
127-
port: url.port,
175+
user,
176+
password,
177+
host,
178+
port,
128179
path,
129180
})
130181
}
@@ -166,16 +217,37 @@ pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
166217
source,
167218
})?;
168219

220+
// For SCP-like SSH URLs, strip leading '/' from paths starting with '/~'
221+
// e.g., "user@host:/~repo" -> path is "~repo", not "/~repo"
222+
let path = if path.starts_with("/~") {
223+
&path[1..]
224+
} else {
225+
path
226+
};
227+
228+
let user = url_user(&url, UrlKind::Scp)?;
229+
let password = url
230+
.password
231+
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
232+
.transpose()?;
233+
let port = url.port;
234+
235+
// For SCP-like SSH URLs, strip brackets from IPv6 addresses
236+
let host = url.host.map(|h| {
237+
if h.starts_with('[') && h.ends_with(']') {
238+
h[1..h.len() - 1].to_string()
239+
} else {
240+
h
241+
}
242+
});
243+
169244
Ok(crate::Url {
170245
serialize_alternative_form: true,
171246
scheme: Scheme::from(url.scheme.as_str()),
172-
user: url_user(&url, UrlKind::Scp)?,
173-
password: url
174-
.password
175-
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
176-
.transpose()?,
177-
host: url.host,
178-
port: url.port,
247+
user,
248+
password,
249+
host,
250+
port,
179251
path: path.into(),
180252
})
181253
}

gix-url/src/simple_url.rs

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,25 @@ impl<'a> ParsedUrl<'a> {
115115
// Handle IPv6 addresses: [::1] or [::1]:port
116116
if host_port.starts_with('[') {
117117
if let Some(bracket_end) = host_port.find(']') {
118-
// IPv6 addresses are case-insensitive, normalize to lowercase
119-
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
120118
let remaining = &host_port[bracket_end + 1..];
121119

122120
if remaining.is_empty() {
121+
// IPv6 addresses are case-insensitive, normalize to lowercase
122+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
123123
return Ok((host, None));
124124
} else if let Some(port_str) = remaining.strip_prefix(':') {
125+
if port_str.is_empty() {
126+
// Empty port like "[::1]:" - preserve the trailing colon for Git compatibility
127+
let host = Some(host_port.to_ascii_lowercase());
128+
return Ok((host, None));
129+
}
125130
let port = port_str.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
126131
// Validate port is in valid range (1-65535, port 0 is invalid)
127132
if port == 0 {
128133
return Err(UrlParseError::InvalidPort);
129134
}
135+
// IPv6 addresses are case-insensitive, normalize to lowercase
136+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
130137
return Ok((host, Some(port)));
131138
} else {
132139
return Err(UrlParseError::InvalidDomainCharacter);
@@ -137,27 +144,38 @@ impl<'a> ParsedUrl<'a> {
137144
}
138145

139146
// Handle regular host:port
140-
// Use rfind to handle IPv6 addresses without brackets (edge case)
147+
// Use rfind to find the last colon
141148
if let Some(colon_pos) = host_port.rfind(':') {
149+
let before_last_colon = &host_port[..colon_pos];
150+
let after_last_colon = &host_port[colon_pos + 1..];
151+
142152
// Check if this looks like a port (all digits after colon)
143-
let potential_port = &host_port[colon_pos + 1..];
144-
if potential_port.is_empty() {
145-
// Empty port like "host:" - strip the trailing colon
146-
let host_str = &host_port[..colon_pos];
147-
return Ok((Some(Self::normalize_hostname(host_str)?), None));
148-
} else if potential_port.chars().all(|c| c.is_ascii_digit()) {
149-
let host_str = &host_port[..colon_pos];
150-
let host = Self::normalize_hostname(host_str)?;
151-
let port = potential_port.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
152-
// Validate port is in valid range (1-65535, port 0 is invalid)
153-
if port == 0 {
154-
return Err(UrlParseError::InvalidPort);
153+
// But avoid treating IPv6 addresses as host:port
154+
// IPv6 addresses have colons in the part before the last colon (e.g., "::1" has "::" before the last ":")
155+
let has_colon_before_last = before_last_colon.contains(':');
156+
let is_all_digits_after = !after_last_colon.is_empty() && after_last_colon.chars().all(|c| c.is_ascii_digit());
157+
158+
// Treat as port separator only if:
159+
// 1. There's no colon before the last colon (normal host:port)
160+
// 2. OR it's explicitly empty (host: with trailing colon)
161+
if !has_colon_before_last {
162+
if after_last_colon.is_empty() {
163+
// Empty port like "host:" - store host with trailing colon
164+
// This is needed for Git compatibility where "host:" != "host"
165+
return Ok((Some(Self::normalize_hostname(host_port)?), None));
166+
} else if is_all_digits_after {
167+
let host = Self::normalize_hostname(before_last_colon)?;
168+
let port = after_last_colon.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
169+
// Validate port is in valid range (1-65535, port 0 is invalid)
170+
if port == 0 {
171+
return Err(UrlParseError::InvalidPort);
172+
}
173+
return Ok((Some(host), Some(port)));
155174
}
156-
return Ok((Some(host), Some(port)));
157175
}
158176
}
159177

160-
// No port, just host
178+
// No port, just host (including bare IPv6 addresses)
161179
Ok((Some(Self::normalize_hostname(host_port)?), None))
162180
}
163181

0 commit comments

Comments
 (0)