diff --git a/Cargo.lock b/Cargo.lock index 8db76edbbd..6759fe84f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -451,9 +451,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.4" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf" +checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" dependencies = [ "jobserver", "libc", @@ -976,14 +976,15 @@ checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" [[package]] name = "demand" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32fc891cc4764e78af29352555c5b837c62a863ebde041f97bd232de926b552c" +checksum = "6baac05ea4b0164abd840ea135c0156ad7025d0920cd720ee4e654b822d94454" dependencies = [ "console", "fuzzy-matcher", "itertools 0.13.0", "once_cell", + "signal-hook", "termcolor", ] @@ -1698,11 +1699,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2326,9 +2327,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libgit2-sys" diff --git a/Cargo.toml b/Cargo.toml index 8da2acc38e..f4a1c6c408 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,8 +25,8 @@ include = [ "/src/plugins/core/assets/**", "/aqua-registry/pkgs/**/registry.yaml", ] -rust-version = "1.79" build = "build.rs" +rust-version = "1.82" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -78,7 +78,7 @@ git2 = "<1" glob = "0.3" globset = "0.4" heck = "0.5" -home = "= 0.5.9" # TODO: bump this when we can bump msrv +home = "0.5" humantime = "2" indenter = "0.3" indexmap = { version = "2", features = ["serde"] } diff --git a/docs/cli/sync/python.md b/docs/cli/sync/python.md index 767d3822e4..e5f3636835 100644 --- a/docs/cli/sync/python.md +++ b/docs/cli/sync/python.md @@ -17,7 +17,7 @@ Get tool versions from pyenv ### `--uv` -Sync tool versions from uv +Sync tool versions with uv (2-way sync) Examples: diff --git a/e2e/cli/test_sync_nvm b/e2e/sync/test_sync_nvm similarity index 100% rename from e2e/cli/test_sync_nvm rename to e2e/sync/test_sync_nvm diff --git a/e2e/cli/test_sync_nvm_slow b/e2e/sync/test_sync_nvm_slow similarity index 100% rename from e2e/cli/test_sync_nvm_slow rename to e2e/sync/test_sync_nvm_slow diff --git a/e2e/sync/test_sync_python_uv b/e2e/sync/test_sync_python_uv new file mode 100644 index 0000000000..4d0b7320d4 --- /dev/null +++ b/e2e/sync/test_sync_python_uv @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +assert "mise use -g uv python@3.11.3" +assert "mise x -- uv python install 3.11.1" +export UV_PYTHON_DOWNLOADS=never +assert "mise sync python --uv" +assert "mise x python@3.11.1 -- python -V" "Python 3.11.1" +assert "mise x -- uv run -p 3.11.3 -- python -V" "Python 3.11.3" diff --git a/mise.usage.kdl b/mise.usage.kdl index 2c44927cda..54fa2be6df 100644 --- a/mise.usage.kdl +++ b/mise.usage.kdl @@ -1268,7 +1268,7 @@ This won't overwrite any existing installs but will overwrite any existing symli $ uv run -p 3.10.0 -- python -V - uses mise-provided python " flag "--pyenv" help="Get tool versions from pyenv" - flag "--uv" help="Sync tool versions from uv" + flag "--uv" help="Sync tool versions with uv (2-way sync)" } cmd "ruby" help="Symlinks all ruby tool versions from an external tool into mise" { after_long_help r"Examples: diff --git a/src/backend/cargo.rs b/src/backend/cargo.rs index a6bc22b815..42774a4aac 100644 --- a/src/backend/cargo.rs +++ b/src/backend/cargo.rs @@ -92,9 +92,9 @@ impl Backend for CargoBackend { if let Some(bin) = opts.get("bin") { cmd = cmd.arg(format!("--bin={bin}")); } - if !opts + if opts .get("locked") - .is_some_and(|v| v.to_lowercase() == "false") + .is_none_or(|v| v.to_lowercase() != "false") { cmd = cmd.arg("--locked"); } diff --git a/src/cli/sync/python.rs b/src/cli/sync/python.rs index 9ae7406852..d44d63bcb9 100644 --- a/src/cli/sync/python.rs +++ b/src/cli/sync/python.rs @@ -1,5 +1,6 @@ use eyre::Result; use itertools::sorted; +use std::env::consts::{ARCH, OS}; use crate::env::PYENV_ROOT; use crate::{backend, config, dirs, env, file}; @@ -16,7 +17,7 @@ pub struct SyncPython { #[clap(long)] pyenv: bool, - /// Sync tool versions from uv + /// Sync tool versions with uv (2-way sync) #[clap(long)] uv: bool, } @@ -79,36 +80,33 @@ impl SyncPython { } } - // TODO: disable reverse syncing until there is a way to deal with these 2 files that uv needs: - // ❯ diff -rq uv mise - // Only in uv/lib/python3.11: EXTERNALLY-MANAGED - // Files uv/lib/python3.11/_sysconfigdata__darwin_darwin.py and mise/lib/python3.11/_sysconfigdata__darwin_darwin.py differ - // See https://github.com/jdx/mise/issues/3654 - //let subdirs = file::dir_subdirs(&installed_python_versions_path)?; - //for v in sorted(subdirs) { - // if v.starts_with(".") { - // continue; - // } - // let src = installed_python_versions_path.join(&v); - // if src.is_symlink() { - // continue; - // } - // // ~/.local/share/uv/python/cpython-3.10.16-macos-aarch64-none - // // ~/.local/share/uv/python/cpython-3.13.0-linux-x86_64-gnu - // let os = OS; - // let arch = if cfg!(target_arch = "x86_64") { - // "x86_64-gnu" - // } else if cfg!(target_arch = "aarch64") { - // "aarch64-none" - // } else { - // ARCH - // }; - // let dst = uv_versions_path.join(format!("cpython-{v}-{os}-{arch}")); - // if !dst.exists() { - // file::make_symlink(&src, &dst)?; - // miseprintln!("Synced python@{v} from mise to uv"); - // } - //} + let subdirs = file::dir_subdirs(&installed_python_versions_path)?; + for v in sorted(subdirs) { + if v.starts_with(".") { + continue; + } + let src = installed_python_versions_path.join(&v); + if src.is_symlink() { + continue; + } + // ~/.local/share/uv/python/cpython-3.10.16-macos-aarch64-none + // ~/.local/share/uv/python/cpython-3.13.0-linux-x86_64-gnu + let os = OS; + let arch = if cfg!(target_arch = "x86_64") { + "x86_64-gnu" + } else if cfg!(target_arch = "aarch64") { + "aarch64-none" + } else { + ARCH + }; + let dst = uv_versions_path.join(format!("cpython-{v}-{os}-{arch}")); + if !dst.exists() { + // TODO: uv doesn't support symlinked dirs + // https://github.com/astral-sh/uv/blob/e65a273f1b6b7c3ab129d902e93adeda4da20636/crates/uv-python/src/managed.rs#L196 + file::clone_dir(&src, &dst)?; + miseprintln!("Synced python@{v} from mise to uv"); + } + } Ok(()) } } diff --git a/src/file.rs b/src/file.rs index 7d62ccdf25..ad13a18f07 100644 --- a/src/file.rs +++ b/src/file.rs @@ -749,6 +749,17 @@ pub fn desymlink_path(p: &Path) -> PathBuf { } } +pub fn clone_dir(from: &PathBuf, to: &PathBuf) -> Result<()> { + if cfg!(macos) { + cmd!("cp", "-cR", from, to).run()?; + } else if cfg!(windows) { + cmd!("robocopy", from, to, "/MIR").run()?; + } else { + cmd!("cp", "--reflink=auto", "-r", from, to).run()?; + } + Ok(()) +} + #[cfg(test)] mod tests { diff --git a/src/http.rs b/src/http.rs index 10c2c6db0b..51162e7acf 100644 --- a/src/http.rs +++ b/src/http.rs @@ -211,10 +211,10 @@ fn with_github_auth(url: &Url, mut req: RequestBuilder) -> RequestBuilder { fn display_github_rate_limit(resp: &Response) { let status = resp.status().as_u16(); if status == 403 || status == 429 { - if !resp + if resp .headers() .get("x-ratelimit-remaining") - .is_some_and(|r| r == "0") + .is_none_or(|r| r != "0") { return; } diff --git a/src/main.rs b/src/main.rs index 250f2a5659..b81f2215e7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,6 +61,7 @@ mod shell; mod shims; mod shorthands; mod sops; +mod sysconfig; pub(crate) mod task; pub(crate) mod tera; pub(crate) mod timeout; diff --git a/src/plugins/core/python.rs b/src/plugins/core/python.rs index 14111442db..9138fa2a17 100644 --- a/src/plugins/core/python.rs +++ b/src/plugins/core/python.rs @@ -10,7 +10,7 @@ use crate::http::{HTTP, HTTP_FETCH}; use crate::install_context::InstallContext; use crate::toolset::{ToolRequest, ToolVersion, Toolset}; use crate::ui::progress_report::SingleReport; -use crate::{cmd, dirs, file, plugins}; +use crate::{cmd, dirs, file, plugins, sysconfig}; use eyre::{bail, eyre}; use flate2::read::GzDecoder; use itertools::Itertools; @@ -208,8 +208,33 @@ impl PythonPlugin { file::rename(&entry, install.join(filename))?; } } - #[cfg(unix)] - file::make_symlink(&install.join("bin/python3"), &install.join("bin/python"))?; + + let re_digits = regex!(r"\d+"); + let version_parts = tv.version.split('.').collect_vec(); + let major = re_digits + .find(version_parts[0]) + .and_then(|m| m.as_str().parse().ok()); + let minor = re_digits + .find(version_parts[1]) + .and_then(|m| m.as_str().parse().ok()); + let suffix = version_parts + .get(2) + .map(|s| re_digits.replace(s, "").to_string()); + if cfg!(unix) { + if let (Some(major), Some(minor), Some(suffix)) = (major, minor, suffix) { + if tv.request.options().get("patch_sysconfig") != Some(&"false".to_string()) { + sysconfig::update_sysconfig(&install, major, minor, &suffix)?; + } + } else { + debug!("failed to update sysconfig with version {}", tv.version); + } + } + + if !install.join("bin").join("python").exists() { + #[cfg(unix)] + file::make_symlink(&install.join("bin/python3"), &install.join("bin/python"))?; + } + Ok(()) } diff --git a/src/sysconfig/LICENSE-MIT b/src/sysconfig/LICENSE-MIT new file mode 100644 index 0000000000..ec2236bb89 --- /dev/null +++ b/src/sysconfig/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Astral Software Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/sysconfig/cursor.rs b/src/sysconfig/cursor.rs new file mode 100644 index 0000000000..d6abb9fc4a --- /dev/null +++ b/src/sysconfig/cursor.rs @@ -0,0 +1,148 @@ +#![allow(dead_code)] + +use std::str::Chars; + +pub(super) const EOF_CHAR: char = '\0'; + +/// A cursor represents a pointer in the source code. +/// +/// Based on [`rustc`'s `Cursor`](https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs) +#[derive(Clone, Debug)] +pub(super) struct Cursor<'src> { + /// An iterator over the [`char`]'s of the source code. + chars: Chars<'src>, + + /// Stores the previous character for debug assertions. + #[cfg(debug_assertions)] + prev_char: char, +} + +impl<'src> Cursor<'src> { + pub(super) fn new(source: &'src str) -> Self { + Self { + chars: source.chars(), + #[cfg(debug_assertions)] + prev_char: EOF_CHAR, + } + } + + /// Returns the previous character. Useful for debug assertions. + #[cfg(debug_assertions)] + pub(super) const fn previous(&self) -> char { + self.prev_char + } + + /// Peeks the next character from the input stream without consuming it. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. + pub(super) fn first(&self) -> char { + self.chars.clone().next().unwrap_or(EOF_CHAR) + } + + /// Peeks the second character from the input stream without consuming it. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. + pub(super) fn second(&self) -> char { + let mut chars = self.chars.clone(); + chars.next(); + chars.next().unwrap_or(EOF_CHAR) + } + + /// Returns the remaining text to lex. + /// + /// Use [`Cursor::text_len`] to get the length of the remaining text. + pub(super) fn rest(&self) -> &'src str { + self.chars.as_str() + } + + /// Returns `true` if the cursor is at the end of file. + pub(super) fn is_eof(&self) -> bool { + self.chars.as_str().is_empty() + } + + /// Moves the cursor to the next character, returning the previous character. + /// Returns [`None`] if there is no next character. + pub(super) fn bump(&mut self) -> Option { + let prev = self.chars.next()?; + + #[cfg(debug_assertions)] + { + self.prev_char = prev; + } + + Some(prev) + } + + pub(super) fn eat_char(&mut self, c: char) -> bool { + if self.first() == c { + self.bump(); + true + } else { + false + } + } + + pub(super) fn eat_char2(&mut self, c1: char, c2: char) -> bool { + let mut chars = self.chars.clone(); + if chars.next() == Some(c1) && chars.next() == Some(c2) { + self.bump(); + self.bump(); + true + } else { + false + } + } + + pub(super) fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool { + let mut chars = self.chars.clone(); + if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) { + self.bump(); + self.bump(); + self.bump(); + true + } else { + false + } + } + + pub(super) fn eat_if(&mut self, mut predicate: F) -> Option + where + F: FnMut(char) -> bool, + { + if predicate(self.first()) && !self.is_eof() { + self.bump() + } else { + None + } + } + + /// Eats symbols while predicate returns true or until the end of file is reached. + #[inline] + pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { + // It was tried making optimized version of this for eg. line comments, but + // LLVM can inline all of this and compile it down to fast iteration over bytes. + while predicate(self.first()) && !self.is_eof() { + self.bump(); + } + } + + /// Skips the next `count` bytes. + /// + /// ## Panics + /// - If `count` is larger than the remaining bytes in the input stream. + /// - If `count` indexes into a multi-byte character. + pub(super) fn skip_bytes(&mut self, count: usize) { + #[cfg(debug_assertions)] + { + self.prev_char = self.chars.as_str()[..count] + .chars() + .next_back() + .unwrap_or('\0'); + } + + self.chars = self.chars.as_str()[count..].chars(); + } + + /// Skips to the end of the input stream. + pub(super) fn skip_to_end(&mut self) { + self.chars = "".chars(); + } +} diff --git a/src/sysconfig/mod.rs b/src/sysconfig/mod.rs new file mode 100644 index 0000000000..57765add8c --- /dev/null +++ b/src/sysconfig/mod.rs @@ -0,0 +1,385 @@ +//! Patch `sysconfig` data in a Python installation. +//! +//! Inspired by: , +//! available under the MIT license: +//! +//! ```text +//! Copyright 2024 Ulrik Sverdrup "bluss" +//! +//! Permission is hereby granted, free of charge, to any person obtaining a copy of +//! this software and associated documentation files (the "Software"), to deal in +//! the Software without restriction, including without limitation the rights to +//! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +//! the Software, and to permit persons to whom the Software is furnished to do so, +//! subject to the following conditions: +//! +//! The above copyright notice and this permission notice shall be included in all +//! copies or substantial portions of the Software. +//! +//! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +//! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +//! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +//! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +//! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +//! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +//! ``` + +use crate::sysconfig::parser::{Error as ParseError, SysconfigData, Value}; +use std::collections::BTreeMap; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::LazyLock; + +mod cursor; +mod parser; + +/// Replacement mode for sysconfig values. +#[derive(Debug)] +enum ReplacementMode { + Partial { from: String }, + Full, +} + +/// A replacement entry to patch in sysconfig data. +#[derive(Debug)] +struct ReplacementEntry { + mode: ReplacementMode, + to: String, +} + +impl ReplacementEntry { + /// Patches a sysconfig value either partially (replacing a specific word) or fully. + fn patch(&self, entry: &str) -> String { + match &self.mode { + ReplacementMode::Partial { from } => entry + .split_whitespace() + .map(|word| if word == from { &self.to } else { word }) + .collect::>() + .join(" "), + ReplacementMode::Full => self.to.clone(), + } + } +} + +/// Mapping for sysconfig keys to lookup and replace with the appropriate entry. +static DEFAULT_VARIABLE_UPDATES: LazyLock> = + LazyLock::new(|| { + BTreeMap::from_iter([ + ( + "CC".to_string(), + ReplacementEntry { + mode: ReplacementMode::Partial { + from: "clang".to_string(), + }, + to: "cc".to_string(), + }, + ), + ( + "CXX".to_string(), + ReplacementEntry { + mode: ReplacementMode::Partial { + from: "clang++".to_string(), + }, + to: "c++".to_string(), + }, + ), + ( + "BLDSHARED".to_string(), + ReplacementEntry { + mode: ReplacementMode::Partial { + from: "clang".to_string(), + }, + to: "cc".to_string(), + }, + ), + ( + "LDSHARED".to_string(), + ReplacementEntry { + mode: ReplacementMode::Partial { + from: "clang".to_string(), + }, + to: "cc".to_string(), + }, + ), + ( + "LDCXXSHARED".to_string(), + ReplacementEntry { + mode: ReplacementMode::Partial { + from: "clang++".to_string(), + }, + to: "c++".to_string(), + }, + ), + ( + "LINKCC".to_string(), + ReplacementEntry { + mode: ReplacementMode::Partial { + from: "clang".to_string(), + }, + to: "cc".to_string(), + }, + ), + ( + "AR".to_string(), + ReplacementEntry { + mode: ReplacementMode::Full, + to: "ar".to_string(), + }, + ), + ]) + }); + +/// Update the `sysconfig` data in a Python installation. +pub(crate) fn update_sysconfig( + install_root: &Path, + major: u8, + minor: u8, + suffix: &str, +) -> Result<(), Error> { + // Find the `_sysconfigdata_` file in the Python installation. + let real_prefix = std::path::absolute(install_root)?; + let sysconfigdata = find_sysconfigdata(&real_prefix, major, minor, suffix)?; + trace!( + "Discovered `sysconfig` data at: {}", + sysconfigdata.display() + ); + + // Update the `_sysconfigdata_` file in-memory. + let contents = std::fs::read_to_string(&sysconfigdata)?; + let data = SysconfigData::from_str(&contents)?; + let data = patch_sysconfigdata(data, &real_prefix); + let contents = data.to_string_pretty()?; + + // Write the updated `_sysconfigdata_` file. + let mut file = std::fs::OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(&sysconfigdata)?; + file.write_all(contents.as_bytes())?; + file.sync_data()?; + + Ok(()) +} + +/// Find the `_sysconfigdata_` file in a Python installation. +/// +/// For example, on macOS, returns `{real_prefix}/lib/python3.12/_sysconfigdata__darwin_darwin.py"`. +fn find_sysconfigdata( + real_prefix: &Path, + major: u8, + minor: u8, + suffix: &str, +) -> Result { + // Find the `lib` directory in the Python installation. + let lib = real_prefix + .join("lib") + .join(format!("python{major}.{minor}{suffix}")); + if !lib.exists() { + return Err(Error::MissingLib); + } + + // Probe the `lib` directory for `_sysconfigdata_`. + for entry in lib.read_dir()? { + let entry = entry?; + + if entry.path().extension().is_none_or(|ext| ext != "py") { + continue; + } + + if !entry + .path() + .file_stem() + .and_then(|stem| stem.to_str()) + .is_some_and(|stem| stem.starts_with("_sysconfigdata_")) + { + continue; + } + + let metadata = entry.metadata()?; + if metadata.is_symlink() { + continue; + }; + + if metadata.is_file() { + return Ok(entry.path()); + } + } + + Err(Error::MissingSysconfigdata) +} + +/// Patch the given `_sysconfigdata_` contents. +fn patch_sysconfigdata(mut data: SysconfigData, real_prefix: &Path) -> SysconfigData { + /// Update the `/install` prefix in a whitespace-separated string. + fn update_prefix(s: &str, real_prefix: &Path) -> String { + s.split_whitespace() + .map(|part| { + if let Some(rest) = part.strip_prefix("/install") { + if rest.is_empty() { + real_prefix.display().to_string() + } else { + real_prefix.join(&rest[1..]).display().to_string() + } + } else { + part.to_string() + } + }) + .collect::>() + .join(" ") + } + + /// Remove any references to `-isysroot` in a whitespace-separated string. + fn remove_isysroot(s: &str) -> String { + // If we see `-isysroot`, drop it and the next part. + let mut parts = s.split_whitespace().peekable(); + let mut result = Vec::with_capacity(parts.size_hint().0); + while let Some(part) = parts.next() { + if part == "-isysroot" { + parts.next(); + } else { + result.push(part); + } + } + result.join(" ") + } + + // Patch each value, as needed. + let mut count = 0; + for (key, value) in data.iter_mut() { + let Value::String(value) = value else { + continue; + }; + let patched = update_prefix(value, real_prefix); + let mut patched = remove_isysroot(&patched); + + if let Some(replacement_entry) = DEFAULT_VARIABLE_UPDATES.get(key) { + patched = replacement_entry.patch(&patched); + } + + if *value != patched { + trace!("Updated `{key}` from `{value}` to `{patched}`"); + count += 1; + *value = patched; + } + } + + match count { + 0 => trace!("No updates required"), + 1 => trace!("Updated 1 value"), + n => trace!("Updated {n} values"), + } + + // Mark the Python installation as standalone. + data.insert("PYTHON_BUILD_STANDALONE".to_string(), Value::Int(1)); + + data +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Python installation is missing a `lib` directory")] + MissingLib, + #[error("Python installation is missing a `_sysconfigdata_` file")] + MissingSysconfigdata, + #[error(transparent)] + Parse(#[from] ParseError), + #[error(transparent)] + Json(#[from] serde_json::Error), +} + +#[cfg(test)] +#[cfg(unix)] +mod tests { + use super::*; + + #[test] + fn update_real_prefix() -> Result<(), Error> { + let sysconfigdata = [ + ("BASEMODLIBS", ""), + ("BINDIR", "/install/bin"), + ("BINLIBDEST", "/install/lib/python3.10"), + ("BLDLIBRARY", "-L. -lpython3.10"), + ("BUILDPYTHON", "python.exe"), + ("prefix", "/install/prefix"), + ("exec_prefix", "/install/exec_prefix"), + ("base", "/install/base"), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), Value::String(v.to_string()))) + .collect::(); + + let real_prefix = Path::new("/real/prefix"); + let data = patch_sysconfigdata(sysconfigdata, real_prefix); + + insta::assert_snapshot!(data.to_string_pretty()?, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "BASEMODLIBS": "", + "BINDIR": "/real/prefix/bin", + "BINLIBDEST": "/real/prefix/lib/python3.10", + "BLDLIBRARY": "-L. -lpython3.10", + "BUILDPYTHON": "python.exe", + "PYTHON_BUILD_STANDALONE": 1, + "base": "/real/prefix/base", + "exec_prefix": "/real/prefix/exec_prefix", + "prefix": "/real/prefix/prefix" + } + "###); + + Ok(()) + } + + #[test] + fn test_replacements() -> Result<(), Error> { + let sysconfigdata = [ + ("CC", "clang -pthread"), + ("CXX", "clang++ -pthread"), + ("AR", "/tools/llvm/bin/llvm-ar"), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), Value::String(v.to_string()))) + .collect::(); + + let real_prefix = Path::new("/real/prefix"); + let data = patch_sysconfigdata(sysconfigdata, real_prefix); + + insta::assert_snapshot!(data.to_string_pretty()?, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "AR": "ar", + "CC": "cc -pthread", + "CXX": "c++ -pthread", + "PYTHON_BUILD_STANDALONE": 1 + } + "###); + + Ok(()) + } + + #[test] + fn remove_isysroot() -> Result<(), Error> { + let sysconfigdata = [ + ("BLDSHARED", "clang -bundle -undefined dynamic_lookup -arch arm64 -isysroot /Applications/MacOSX14.2.sdk"), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), Value::String(v.to_string()))) + .collect::(); + + let real_prefix = Path::new("/real/prefix"); + let data = patch_sysconfigdata(sysconfigdata, real_prefix); + + insta::assert_snapshot!(data.to_string_pretty()?, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "BLDSHARED": "cc -bundle -undefined dynamic_lookup -arch arm64", + "PYTHON_BUILD_STANDALONE": 1 + } + "###); + + Ok(()) + } +} diff --git a/src/sysconfig/parser.rs b/src/sysconfig/parser.rs new file mode 100644 index 0000000000..427bf327af --- /dev/null +++ b/src/sysconfig/parser.rs @@ -0,0 +1,475 @@ +use std::collections::BTreeMap; +use std::str::FromStr; + +use serde::Serialize; +use serde_json::ser::PrettyFormatter; + +use crate::sysconfig::cursor::Cursor; + +/// A value in the [`SysconfigData`] map. +/// +/// Values are assumed to be either strings or integers. +#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] +#[serde(untagged)] +pub(super) enum Value { + String(String), + Int(i32), +} + +/// The data extracted from a `_sysconfigdata_` file. +#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] +pub(super) struct SysconfigData(BTreeMap); + +impl SysconfigData { + /// Returns an iterator over the key-value pairs in the map. + pub(super) fn iter_mut(&mut self) -> std::collections::btree_map::IterMut { + self.0.iter_mut() + } + + /// Inserts a key-value pair into the map. + pub(super) fn insert(&mut self, key: String, value: Value) -> Option { + self.0.insert(key, value) + } + + /// Formats the `sysconfig` data as a pretty-printed string. + pub(super) fn to_string_pretty(&self) -> Result { + let output = { + let mut buf = Vec::new(); + let mut serializer = serde_json::Serializer::with_formatter( + &mut buf, + PrettyFormatter::with_indent(b" "), + ); + self.0.serialize(&mut serializer)?; + String::from_utf8(buf).unwrap() + }; + Ok(format!( + "# system configuration generated and used by the sysconfig module\nbuild_time_vars = {output}\n", + )) + } +} + +impl std::fmt::Display for SysconfigData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let output = { + let mut buf = Vec::new(); + let mut serializer = serde_json::Serializer::new(&mut buf); + self.0.serialize(&mut serializer).unwrap(); + String::from_utf8(buf).unwrap() + }; + write!(f, "{output}",) + } +} + +impl FromIterator<(String, Value)> for SysconfigData { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +/// Parse the `_sysconfigdata_` file (e.g., `{real_prefix}/lib/python3.12/_sysconfigdata__darwin_darwin.py"` +/// on macOS). +/// +/// `_sysconfigdata_` is structured as follows: +/// +/// 1. A comment on the first line (e.g., `# system configuration generated and used by the sysconfig module`). +/// 2. An assignment to `build_time_vars` (e.g., `build_time_vars = { ... }`). +/// +/// The right-hand side of the assignment is a JSON object. The keys are strings, and the values +/// are strings or numbers. +impl FromStr for SysconfigData { + type Err = Error; + + fn from_str(s: &str) -> Result { + // Read the first line of the file. + let Some(s) = + s.strip_prefix("# system configuration generated and used by the sysconfig module\n") + else { + return Err(Error::MissingHeader); + }; + + // Read the assignment to `build_time_vars`. + let Some(s) = s.strip_prefix("build_time_vars") else { + return Err(Error::MissingAssignment); + }; + + let mut cursor = Cursor::new(s); + + cursor.eat_while(is_python_whitespace); + if !cursor.eat_char('=') { + return Err(Error::MissingAssignment); + } + cursor.eat_while(is_python_whitespace); + + if !cursor.eat_char('{') { + return Err(Error::MissingOpenBrace); + } + + let mut map = BTreeMap::new(); + loop { + let Some(next) = cursor.bump() else { + return Err(Error::UnexpectedEof); + }; + + match next { + '\'' | '"' => { + // Parse key. + let key = parse_string(&mut cursor, next)?; + + cursor.eat_while(is_python_whitespace); + cursor.eat_char(':'); + cursor.eat_while(is_python_whitespace); + + // Parse value + let value = match cursor.first() { + '\'' | '"' => Value::String(parse_concatenated_string(&mut cursor)?), + '-' => { + cursor.bump(); + Value::Int(-parse_int(&mut cursor)?) + } + c if c.is_ascii_digit() => Value::Int(parse_int(&mut cursor)?), + c => return Err(Error::UnexpectedCharacter(c)), + }; + + // Insert into map. + map.insert(key, value); + + // Skip optional comma. + cursor.eat_while(is_python_whitespace); + cursor.eat_char(','); + cursor.eat_while(is_python_whitespace); + } + + // Skip whitespace. + ' ' | '\n' | '\r' | '\t' => {} + + // When we see a closing brace, we're done. + '}' => { + break; + } + + c => return Err(Error::UnexpectedCharacter(c)), + } + } + + Ok(Self(map)) + } +} + +/// Parse a Python string literal. +/// +/// Expects the previous character to be the opening quote character. +fn parse_string(cursor: &mut Cursor, quote: char) -> Result { + let mut result = String::new(); + loop { + let Some(c) = cursor.bump() else { + return Err(Error::UnexpectedEof); + }; + match c { + '\\' => { + // Handle escaped quotes. + if cursor.first() == quote { + // Consume the backslash. + cursor.bump(); + result.push(quote); + continue; + } + + // Keep the backslash and following character. + result.push('\\'); + result.push(cursor.first()); + cursor.bump(); + } + + // Consume closing quote. + c if c == quote => { + break; + } + + c => { + result.push(c); + } + } + } + Ok(result) +} + +/// Parse a Python string, which may be a concatenation of multiple string literals. +/// +/// Expects the cursor to start at an opening quote character. +fn parse_concatenated_string(cursor: &mut Cursor) -> Result { + let mut result = String::new(); + loop { + let Some(c) = cursor.bump() else { + return Err(Error::UnexpectedEof); + }; + match c { + '\'' | '"' => { + // Parse a new string fragment and append it. + result.push_str(&parse_string(cursor, c)?); + } + c if is_python_whitespace(c) => { + // Skip whitespace between fragments + } + c => return Err(Error::UnexpectedCharacter(c)), + } + + // Lookahead to the end of the string. + if matches!(cursor.first(), ',' | '}') { + break; + } + } + Ok(result) +} + +/// Parse an integer literal. +/// +/// Expects the cursor to start at the first digit of the integer. +fn parse_int(cursor: &mut Cursor) -> Result { + let mut result = String::new(); + loop { + let c = cursor.first(); + if !c.is_ascii_digit() { + break; + } + result.push(c); + cursor.bump(); + } + result.parse() +} + +/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens) +/// characters. +const fn is_python_whitespace(c: char) -> bool { + matches!( + c, + // Space, tab, form-feed, newline, or carriage return + ' ' | '\t' | '\x0C' | '\n' | '\r' + ) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Missing opening brace")] + MissingOpenBrace, + #[error("Unexpected character: {0}")] + UnexpectedCharacter(char), + #[error("Unexpected end of file")] + UnexpectedEof, + #[error("Failed to parse integer")] + ParseInt(#[from] std::num::ParseIntError), + #[error("`_sysconfigdata_` is missing a header comment")] + MissingHeader, + #[error("`_sysconfigdata_` is missing an assignment to `build_time_vars`")] + MissingAssignment, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_string() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1", + "key2": 42, + "key3": "multi-part" " string" + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1", + "key2": 42, + "key3": "multi-part string" + } + "###); + } + + #[test] + fn test_parse_trailing_comma() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1", + "key2": 42, + "key3": "multi-part" " string", + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1", + "key2": 42, + "key3": "multi-part string" + } + "###); + } + + #[test] + fn test_parse_integer_values() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": 12345, + "key2": -15 + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": 12345, + "key2": -15 + } + "###); + } + + #[test] + fn test_parse_escaped_quotes() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value with \"escaped quotes\"", + "key2": 'single-quoted \'escaped\'' + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value with \"escaped quotes\"", + "key2": "single-quoted 'escaped'" + } + "###); + } + + #[test] + fn test_parse_concatenated_strings() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "multi-" + "line " + "string" + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "multi-line string" + } + "###); + } + + #[test] + fn test_missing_header_error() { + let input = indoc::indoc!( + r#" + build_time_vars = { + "key1": "value1" + } + "# + ); + + let result = input.parse::(); + assert!(matches!(result, Err(Error::MissingHeader))); + } + + #[test] + fn test_missing_assignment_error() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + { + "key1": "value1" + } + "# + ); + + let result = input.parse::(); + assert!(matches!(result, Err(Error::MissingAssignment))); + } + + #[test] + fn test_unexpected_character_error() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": &123 + } + "# + ); + + let result = input.parse::(); + assert!( + result.is_err(), + "Expected parsing to fail due to unexpected character" + ); + } + + #[test] + fn test_unexpected_eof() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": 123 + "# + ); + + let result = input.parse::(); + assert!( + result.is_err(), + "Expected parsing to fail due to unexpected character" + ); + } + + #[test] + fn test_unexpected_comma() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": 123,, + } + "# + ); + + let result = input.parse::(); + assert!( + result.is_err(), + "Expected parsing to fail due to unexpected character" + ); + } +} diff --git a/xtasks/fig/src/mise.ts b/xtasks/fig/src/mise.ts index 27e0ba8efb..c9004810fb 100644 --- a/xtasks/fig/src/mise.ts +++ b/xtasks/fig/src/mise.ts @@ -2398,7 +2398,7 @@ const completionSpec: Fig.Spec = { "name": [ "--uv" ], - "description": "Sync tool versions from uv", + "description": "Sync tool versions with uv (2-way sync)", "isRepeatable": false } ]