Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 63 additions & 5 deletions gix-url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,39 @@ pub struct Url {
/// The URL scheme.
pub scheme: Scheme,
/// The user to impersonate on the remote.
///
/// Stored in decoded form: percent-encoded characters are decoded during parsing.
/// Re-encoded during canonical serialization, but written as-is in alternative form.
pub user: Option<String>,
/// The password associated with a user.
///
/// Stored in decoded form: percent-encoded characters are decoded during parsing.
/// Re-encoded during canonical serialization. Cannot be serialized in alternative form (will panic in debug builds).
pub password: Option<String>,
/// The host to which to connect. Localhost is implied if `None`.
///
/// IPv6 addresses are stored *without* brackets for SSH schemes, but *with* brackets for other schemes.
/// Brackets are automatically added during serialization when needed (e.g., when a port is specified with an IPv6 host).
pub host: Option<String>,
/// When serializing, use the alternative forms as it was parsed as such.
///
/// Alternative forms include SCP-like syntax (`user@host:path`) and bare file paths.
/// When `true`, password and port cannot be serialized (will panic in debug builds).
pub serialize_alternative_form: bool,
/// The port to use when connecting to a host. If `None`, standard ports depending on `scheme` will be used.
pub port: Option<u16>,
/// The path portion of the URL, usually the location of the git repository.
///
/// Unlike `user` and `password`, paths are stored and serialized in their original form
/// without percent-decoding or re-encoding (e.g., `%20` remains `%20`, not converted to space).
///
/// Path normalization during parsing:
/// - SSH/Git schemes: Leading `/~` is stripped (e.g., `/~repo` becomes `~repo`)
/// - SSH/Git schemes: Empty paths are rejected as errors
/// - HTTP/HTTPS schemes: Empty paths are normalized to `/`
///
/// During serialization, SSH/Git URLs prepend `/` to paths not starting with `/`.
///
/// # Security Warning
///
/// URLs allow paths to start with `-` which makes it possible to mask command-line arguments as path which then leads to
Expand Down Expand Up @@ -380,6 +402,9 @@ impl Url {

out.write_all(self.scheme.as_str().as_bytes())?;
out.write_all(b"://")?;

let needs_brackets = self.port.is_some() && self.host_needs_brackets();

match (&self.user, &self.host) {
(Some(user), Some(host)) => {
out.write_all(percent_encode(user).as_bytes())?;
Expand All @@ -388,10 +413,22 @@ impl Url {
out.write_all(percent_encode(password).as_bytes())?;
}
out.write_all(b"@")?;
if needs_brackets {
out.write_all(b"[")?;
}
out.write_all(host.as_bytes())?;
if needs_brackets {
out.write_all(b"]")?;
}
}
(None, Some(host)) => {
if needs_brackets {
out.write_all(b"[")?;
}
out.write_all(host.as_bytes())?;
if needs_brackets {
out.write_all(b"]")?;
}
}
(None, None) => {}
(Some(_user), None) => {
Expand All @@ -403,23 +440,45 @@ impl Url {
if let Some(port) = &self.port {
write!(out, ":{port}")?;
}
// For SSH and Git URLs, add leading '/' if path doesn't start with '/'
// This handles paths like "~repo" which serialize as "/~repo" in URL form
if matches!(self.scheme, Scheme::Ssh | Scheme::Git) && !self.path.starts_with(b"/") {
out.write_all(b"/")?;
}
out.write_all(&self.path)?;
Ok(())
}

fn host_needs_brackets(&self) -> bool {
fn is_ipv6(h: &str) -> bool {
h.contains(':') && !h.starts_with('[')
}
self.host.as_ref().is_some_and(|h| is_ipv6(h))
}

fn write_alternative_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
let needs_brackets = self.host_needs_brackets();

match (&self.user, &self.host) {
(Some(user), Some(host)) => {
out.write_all(user.as_bytes())?;
assert!(
self.password.is_none(),
"BUG: cannot serialize password in alternative form"
);
out.write_all(b"@")?;
if needs_brackets {
out.write_all(b"[")?;
}
out.write_all(host.as_bytes())?;
if needs_brackets {
out.write_all(b"]")?;
}
}
(None, Some(host)) => {
if needs_brackets {
out.write_all(b"[")?;
}
out.write_all(host.as_bytes())?;
if needs_brackets {
out.write_all(b"]")?;
}
}
(None, None) => {}
(Some(_user), None) => {
Expand All @@ -428,7 +487,6 @@ impl Url {
));
}
}
assert!(self.port.is_none(), "BUG: cannot serialize port in alternative form");
if self.scheme == Scheme::Ssh {
out.write_all(b":")?;
}
Expand Down
97 changes: 82 additions & 15 deletions gix-url/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,23 @@ pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
return InputScheme::Url { protocol_end };
}

if let Some(colon) = input.find_byte(b':') {
// Find colon, but skip over IPv6 brackets if present
let colon = if input.starts_with(b"[") {
// IPv6 address, find the closing bracket first
if let Some(bracket_end) = input.find_byte(b']') {
// Look for colon after the bracket
input[bracket_end + 1..]
.find_byte(b':')
.map(|pos| bracket_end + 1 + pos)
} else {
// No closing bracket, treat as regular search
input.find_byte(b':')
}
} else {
input.find_byte(b':')
};

if let Some(colon) = colon {
// allow user to select files containing a `:` by passing them as absolute or relative path
// this is behavior explicitly mentioned by the scp and git manuals
let explicitly_local = &input[..colon].contains(&b'/');
Expand Down Expand Up @@ -111,20 +127,54 @@ pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error
// Normalize empty path to "/" for http/https URLs only
let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
"/".into()
} else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
// For SSH and Git protocols, strip leading '/' from paths starting with '~'
// e.g., "ssh://host/~repo" -> path is "~repo", not "/~repo"
url.path[1..].into()
} else {
url.path.into()
};

let user = url_user(&url, UrlKind::Url)?;
let password = url
.password
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
.transpose()?;
let port = url.port;

// For SSH URLs, strip brackets from IPv6 addresses
let host = if scheme == Scheme::Ssh {
url.host.map(|mut h| {
// Bracketed IPv6 forms
if let Some(h2) = h.strip_prefix('[') {
if let Some(inner) = h2.strip_suffix("]:") {
// "[::1]:" → "::1"
h = inner.to_string();
} else if let Some(inner) = h2.strip_suffix(']') {
// "[::1]" → "::1"
h = inner.to_string();
}
} else {
// Non-bracketed host: strip a single trailing colon
let colon_count = h.chars().filter(|&c| c == ':').take(2).count();
if colon_count == 1 {
if let Some(inner) = h.strip_suffix(':') {
h = inner.to_string();
}
}
}
h
})
} else {
url.host
};
Ok(crate::Url {
serialize_alternative_form: false,
scheme,
user: url_user(&url, UrlKind::Url)?,
password: url
.password
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
.transpose()?,
host: url.host,
port: url.port,
user,
password,
host,
port,
path,
})
}
Expand Down Expand Up @@ -166,16 +216,33 @@ pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
source,
})?;

// For SCP-like SSH URLs, strip leading '/' from paths starting with '/~'
// e.g., "user@host:/~repo" -> path is "~repo", not "/~repo"
let path = if path.starts_with("/~") { &path[1..] } else { path };

let user = url_user(&url, UrlKind::Scp)?;
let password = url
.password
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
.transpose()?;
let port = url.port;

// For SCP-like SSH URLs, strip brackets from IPv6 addresses
let host = url.host.map(|h| {
if let Some(h) = h.strip_prefix("[").and_then(|h| h.strip_suffix("]")) {
h.to_string()
} else {
h
}
});

Ok(crate::Url {
serialize_alternative_form: true,
scheme: Scheme::from(url.scheme.as_str()),
user: url_user(&url, UrlKind::Scp)?,
password: url
.password
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
.transpose()?,
host: url.host,
port: url.port,
user,
password,
host,
port,
path: path.into(),
})
}
Expand Down
64 changes: 39 additions & 25 deletions gix-url/src/simple_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,12 @@ impl<'a> ParsedUrl<'a> {
};

// Parse authority: [user[:password]@]host[:port]
let (username, password, host, port) = if let Some(at_pos) = authority.rfind('@') {
let (username, password, host, port) = if let Some((user_info, host_port)) = authority.rsplit_once('@') {
// Has user info
let user_info = &authority[..at_pos];
let host_port = &authority[at_pos + 1..];

let (user, pass) = if let Some(colon_pos) = user_info.find(':') {
let pass_str = &user_info[colon_pos + 1..];
let (user, pass) = if let Some((user, pass_str)) = user_info.split_once(':') {
// Treat empty password as None
let pass = if pass_str.is_empty() { None } else { Some(pass_str) };
(&user_info[..colon_pos], pass)
(user, pass)
} else {
(user_info, None)
};
Expand Down Expand Up @@ -115,18 +111,25 @@ impl<'a> ParsedUrl<'a> {
// Handle IPv6 addresses: [::1] or [::1]:port
if host_port.starts_with('[') {
if let Some(bracket_end) = host_port.find(']') {
// IPv6 addresses are case-insensitive, normalize to lowercase
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
let remaining = &host_port[bracket_end + 1..];

if remaining.is_empty() {
// IPv6 addresses are case-insensitive, normalize to lowercase
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
return Ok((host, None));
} else if let Some(port_str) = remaining.strip_prefix(':') {
if port_str.is_empty() {
// Empty port like "[::1]:" - preserve the trailing colon for Git compatibility
let host = Some(host_port.to_ascii_lowercase());
return Ok((host, None));
}
let port = port_str.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
// Validate port is in valid range (1-65535, port 0 is invalid)
if port == 0 {
return Err(UrlParseError::InvalidPort);
}
// IPv6 addresses are case-insensitive, normalize to lowercase
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
return Ok((host, Some(port)));
} else {
return Err(UrlParseError::InvalidDomainCharacter);
Expand All @@ -137,27 +140,38 @@ impl<'a> ParsedUrl<'a> {
}

// Handle regular host:port
// Use rfind to handle IPv6 addresses without brackets (edge case)
if let Some(colon_pos) = host_port.rfind(':') {
// Use rfind to find the last colon
if let Some((before_last_colon, after_last_colon)) = host_port.rsplit_once(':') {
// Check if this looks like a port (all digits after colon)
let potential_port = &host_port[colon_pos + 1..];
if potential_port.is_empty() {
// Empty port like "host:" - strip the trailing colon
let host_str = &host_port[..colon_pos];
return Ok((Some(Self::normalize_hostname(host_str)?), None));
} else if potential_port.chars().all(|c| c.is_ascii_digit()) {
let host_str = &host_port[..colon_pos];
let host = Self::normalize_hostname(host_str)?;
let port = potential_port.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
// Validate port is in valid range (1-65535, port 0 is invalid)
if port == 0 {
return Err(UrlParseError::InvalidPort);
// But avoid treating IPv6 addresses as host:port
// IPv6 addresses have colons in the part before the last colon (e.g., "::1" has "::" before the last ":")
let has_colon_before_last = before_last_colon.contains(':');
let is_all_digits_after =
!after_last_colon.is_empty() && after_last_colon.chars().all(|c| c.is_ascii_digit());

// Treat as port separator only if:
// 1. There's no colon before the last colon (normal host:port)
// 2. OR it's explicitly empty (host: with trailing colon)
if !has_colon_before_last {
if after_last_colon.is_empty() {
// Empty port like "host:" - store host with trailing colon
// This is needed for Git compatibility where "host:" != "host"
return Ok((Some(Self::normalize_hostname(host_port)?), None));
} else if is_all_digits_after {
let host = Self::normalize_hostname(before_last_colon)?;
let port = after_last_colon
.parse::<u16>()
.map_err(|_| UrlParseError::InvalidPort)?;
// Validate port is in valid range (1-65535, port 0 is invalid)
if port == 0 {
return Err(UrlParseError::InvalidPort);
}
return Ok((Some(host), Some(port)));
}
return Ok((Some(host), Some(port)));
}
}

// No port, just host
// No port, just host (including bare IPv6 addresses)
Ok((Some(Self::normalize_hostname(host_port)?), None))
}

Expand Down
Loading
Loading