Skip to content

Commit

Permalink
Expand environment variables prior to detecting scheme (#2394)
Browse files Browse the repository at this point in the history
## Summary

This PR ensures that we expand environment variables _before_ sniffing
for the URL scheme (e.g., `file://` vs. `https://` vs. something else).

Closes #2375.
  • Loading branch information
charliermarsh authored Mar 12, 2024
1 parent 3bf20f9 commit 7220894
Show file tree
Hide file tree
Showing 7 changed files with 286 additions and 111 deletions.
14 changes: 9 additions & 5 deletions crates/distribution-types/src/index_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use url::Url;

use pep508_rs::{split_scheme, Scheme, VerbatimUrl};
use pep508_rs::{expand_env_vars, split_scheme, Scheme, VerbatimUrl};
use uv_fs::normalize_url_path;

use crate::Verbatim;
Expand Down Expand Up @@ -108,7 +108,11 @@ impl FromStr for FlatIndexLocation {
/// - `../ferris/`
/// - `https://download.pytorch.org/whl/torch_stable.html`
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Some((scheme, path)) = split_scheme(s) {
// Expand environment variables.
let expanded = expand_env_vars(s);

// Parse the expanded path.
if let Some((scheme, path)) = split_scheme(&expanded) {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...` or `file:../ferris/`
Some(Scheme::File) => {
Expand All @@ -123,19 +127,19 @@ impl FromStr for FlatIndexLocation {

// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Some(_) => {
let url = Url::parse(s)?;
let url = Url::parse(expanded.as_ref())?;
Ok(Self::Url(url))
}

// Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz`
None => {
let path = PathBuf::from(s);
let path = PathBuf::from(expanded.as_ref());
Ok(Self::Path(path))
}
}
} else {
// Ex) `../ferris/`
let path = PathBuf::from(s);
let path = PathBuf::from(expanded.as_ref());
Ok(Self::Path(path))
}
}
Expand Down
42 changes: 24 additions & 18 deletions crates/pep508-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ pub use marker::{
use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers};
use uv_fs::normalize_url_path;
// Parity with the crates.io version of pep508_rs
use crate::verbatim_url::VerbatimUrlError;
pub use uv_normalize::{ExtraName, InvalidNameError, PackageName};
pub use verbatim_url::{expand_path_vars, split_scheme, Scheme, VerbatimUrl};
pub use verbatim_url::{expand_env_vars, split_scheme, Scheme, VerbatimUrl};

mod marker;
mod verbatim_url;
Expand Down Expand Up @@ -803,7 +804,10 @@ fn preprocess_url(
start: usize,
len: usize,
) -> Result<VerbatimUrl, Pep508Error> {
if let Some((scheme, path)) = split_scheme(url) {
// Expand environment variables in the URL.
let expanded = expand_env_vars(url);

if let Some((scheme, path)) = split_scheme(&expanded) {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...` or `file:../editable/`.
Some(Scheme::File) => {
Expand All @@ -814,12 +818,11 @@ fn preprocess_url(

#[cfg(feature = "non-pep508-extensions")]
if let Some(working_dir) = working_dir {
return Ok(
VerbatimUrl::parse_path(path, working_dir).with_given(url.to_string())
);
return Ok(VerbatimUrl::parse_path(path.as_ref(), working_dir)
.with_given(url.to_string()));
}

Ok(VerbatimUrl::parse_absolute_path(path)
Ok(VerbatimUrl::parse_absolute_path(path.as_ref())
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
Expand All @@ -831,24 +834,25 @@ fn preprocess_url(
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Some(_) => {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Ok(VerbatimUrl::from_str(url).map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
input: cursor.to_string(),
})?)
Ok(VerbatimUrl::parse_url(expanded.as_ref())
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(VerbatimUrlError::Url(err)),
start,
len,
input: cursor.to_string(),
})?
.with_given(url.to_string()))
}

// Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz`
_ => {
#[cfg(feature = "non-pep508-extensions")]
if let Some(working_dir) = working_dir {
return Ok(
VerbatimUrl::parse_path(url, working_dir).with_given(url.to_string())
);
return Ok(VerbatimUrl::parse_path(expanded.as_ref(), working_dir)
.with_given(url.to_string()));
}

Ok(VerbatimUrl::parse_absolute_path(url)
Ok(VerbatimUrl::parse_absolute_path(expanded.as_ref())
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
Expand All @@ -862,10 +866,12 @@ fn preprocess_url(
// Ex) `../editable/`
#[cfg(feature = "non-pep508-extensions")]
if let Some(working_dir) = working_dir {
return Ok(VerbatimUrl::parse_path(url, working_dir).with_given(url.to_string()));
return Ok(
VerbatimUrl::parse_path(expanded.as_ref(), working_dir).with_given(url.to_string())
);
}

Ok(VerbatimUrl::parse_absolute_path(url)
Ok(VerbatimUrl::parse_absolute_path(expanded.as_ref())
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
Expand Down
62 changes: 18 additions & 44 deletions crates/pep508-rs/src/verbatim_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ pub struct VerbatimUrl {
}

impl VerbatimUrl {
/// Parse a URL from a string, expanding any environment variables.
pub fn parse(given: impl AsRef<str>) -> Result<Self, ParseError> {
let url = Url::parse(&expand_env_vars(given.as_ref(), Escape::Url))?;
Ok(Self { url, given: None })
}

/// Create a [`VerbatimUrl`] from a [`Url`].
pub fn from_url(url: Url) -> Self {
Self { url, given: None }
Expand All @@ -48,15 +42,18 @@ impl VerbatimUrl {
Self { url, given: None }
}

/// Parse a URL from a string, expanding any environment variables.
pub fn parse_url(given: impl AsRef<str>) -> Result<Self, ParseError> {
let url = Url::parse(given.as_ref())?;
Ok(Self { url, given: None })
}

/// Parse a URL from an absolute or relative path.
#[cfg(feature = "non-pep508-extensions")] // PEP 508 arguably only allows absolute file URLs.
pub fn parse_path(path: impl AsRef<str>, working_dir: impl AsRef<Path>) -> Self {
// Expand any environment variables.
let path = PathBuf::from(expand_env_vars(path.as_ref(), Escape::Path).as_ref());

pub fn parse_path(path: impl AsRef<Path>, working_dir: impl AsRef<Path>) -> Self {
// Convert the path to an absolute path, if necessary.
let path = if path.is_absolute() {
path
let path = if path.as_ref().is_absolute() {
path.as_ref().to_path_buf()
} else {
working_dir.as_ref().join(path)
};
Expand All @@ -71,15 +68,12 @@ impl VerbatimUrl {
}

/// Parse a URL from an absolute path.
pub fn parse_absolute_path(path: impl AsRef<str>) -> Result<Self, VerbatimUrlError> {
// Expand any environment variables.
let path = PathBuf::from(expand_env_vars(path.as_ref(), Escape::Path).as_ref());

pub fn parse_absolute_path(path: impl AsRef<Path>) -> Result<Self, VerbatimUrlError> {
// Convert the path to an absolute path, if necessary.
let path = if path.is_absolute() {
path
let path = if path.as_ref().is_absolute() {
path.as_ref().to_path_buf()
} else {
return Err(VerbatimUrlError::RelativePath(path));
return Err(VerbatimUrlError::RelativePath(path.as_ref().to_path_buf()));
};

// Normalize the path.
Expand Down Expand Up @@ -128,9 +122,7 @@ impl std::str::FromStr for VerbatimUrl {
type Err = VerbatimUrlError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
.map(|url| url.with_given(s.to_owned()))
.map_err(|e| VerbatimUrlError::Url(s.to_owned(), e))
Ok(Self::parse_url(s).map(|url| url.with_given(s.to_owned()))?)
}
}

Expand All @@ -152,23 +144,14 @@ impl Deref for VerbatimUrl {
#[derive(thiserror::Error, Debug)]
pub enum VerbatimUrlError {
/// Failed to parse a URL.
#[error("{0}")]
Url(String, #[source] ParseError),
#[error(transparent)]
Url(#[from] ParseError),

/// Received a relative path, but no working directory was provided.
#[error("relative path without a working directory: {0}")]
RelativePath(PathBuf),
}

/// Whether to apply percent-encoding when expanding environment variables.
#[derive(Debug, Clone, PartialEq, Eq)]
enum Escape {
/// Apply percent-encoding.
Url,
/// Do not apply percent-encoding.
Path,
}

/// Expand all available environment variables.
///
/// This is modeled off of pip's environment variable expansion, which states:
Expand All @@ -184,7 +167,7 @@ enum Escape {
/// Valid characters in variable names follow the `POSIX standard
/// <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
/// to uppercase letter, digits and the `_` (underscore).
fn expand_env_vars(s: &str, escape: Escape) -> Cow<'_, str> {
pub fn expand_env_vars(s: &str) -> Cow<'_, str> {
// Generate the project root, to be used via the `${PROJECT_ROOT}`
// environment variable.
static PROJECT_ROOT_FRAGMENT: Lazy<String> = Lazy::new(|| {
Expand All @@ -198,21 +181,12 @@ fn expand_env_vars(s: &str, escape: Escape) -> Cow<'_, str> {
RE.replace_all(s, |caps: &regex::Captures<'_>| {
let name = caps.name("name").unwrap().as_str();
std::env::var(name).unwrap_or_else(|_| match name {
// Ensure that the variable is URL-escaped, if necessary.
"PROJECT_ROOT" => match escape {
Escape::Url => PROJECT_ROOT_FRAGMENT.replace(' ', "%20"),
Escape::Path => PROJECT_ROOT_FRAGMENT.to_string(),
},
"PROJECT_ROOT" => PROJECT_ROOT_FRAGMENT.to_string(),
_ => caps["var"].to_owned(),
})
})
}

/// Expand all available environment variables in a path-like string.
pub fn expand_path_vars(path: &str) -> Cow<'_, str> {
expand_env_vars(path, Escape::Path)
}

/// Like [`Url::parse`], but only splits the scheme. Derived from the `url` crate.
pub fn split_scheme(s: &str) -> Option<(&str, &str)> {
/// <https://url.spec.whatwg.org/#c0-controls-and-space>
Expand Down
Loading

0 comments on commit 7220894

Please sign in to comment.