From f6b47f725f425faed2cec9b2af10de6e42137fa4 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 10 Dec 2024 12:33:44 -0500 Subject: [PATCH] uv-resolver: deduplicate resolution markers Since we don't (currently) include conflict markers with our `resolution-markers` in the lock file, it's possible that we end up with duplicate markers. This happens when the resolver creates more than one fork with the same PEP 508 markers but different conflict markers, _and_ where those PEP 508 markers don't simplify to "always true" after accounting for `requires-python`. This change should be a strict improvement on the status quo. We aren't removing any information. It is possible that we should be writing conflict markers here (like we do for dependency edges), but I haven't been able to come up with a case or think through a scenario where they are necessary. Fixes #9296 --- crates/uv-resolver/src/lock/mod.rs | 62 ++++++++---- crates/uv/tests/it/lock_conflict.rs | 152 ++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 18 deletions(-) diff --git a/crates/uv-resolver/src/lock/mod.rs b/crates/uv-resolver/src/lock/mod.rs index 934333be4c51..c9941ce790a7 100644 --- a/crates/uv-resolver/src/lock/mod.rs +++ b/crates/uv-resolver/src/lock/mod.rs @@ -618,16 +618,8 @@ impl Lock { if !self.fork_markers.is_empty() { let fork_markers = each_element_on_its_line_array( - self.fork_markers - .iter() - .map(|marker| { - // TODO(ag): Consider whether `resolution-markers` should actually - // include conflicting marker info. In which case, we should serialize - // the entire `UniversalMarker` (taking care to still make the PEP 508 - // simplified). - SimplifiedMarkerTree::new(&self.requires_python, marker.pep508()) - }) - .filter_map(super::requires_python::SimplifiedMarkerTree::try_to_string), + deduplicated_simplified_pep508_markers(&self.fork_markers, &self.requires_python) + .into_iter(), ); if !fork_markers.is_empty() { doc.insert("resolution-markers", value(fork_markers)); @@ -1976,14 +1968,8 @@ impl Package { if !self.fork_markers.is_empty() { let fork_markers = each_element_on_its_line_array( - self.fork_markers - .iter() - // TODO(ag): Consider whether `resolution-markers` should actually - // include conflicting marker info. In which case, we should serialize - // the entire `UniversalMarker` (taking care to still make the PEP 508 - // simplified). - .map(|marker| SimplifiedMarkerTree::new(requires_python, marker.pep508())) - .filter_map(super::requires_python::SimplifiedMarkerTree::try_to_string), + deduplicated_simplified_pep508_markers(&self.fork_markers, requires_python) + .into_iter(), ); if !fork_markers.is_empty() { table.insert("resolution-markers", value(fork_markers)); @@ -4214,6 +4200,46 @@ fn each_element_on_its_line_array(elements: impl Iterator Vec { + // NOTE(ag): It's possible that `resolution-markers` should actually + // include conflicting marker info. In which case, we should serialize + // the entire `UniversalMarker` (taking care to still make the PEP 508 + // simplified). At present, we don't include that info. And as a result, + // this can lead to duplicate markers, since each represents a fork with + // the same PEP 508 marker but a different conflict marker. We strip the + // conflict marker, which can leave duplicate PEP 508 markers. + // + // So if we did include the conflict marker, then we wouldn't need to do + // deduplication. + // + // Why don't we include conflict markers though? At present, it's just + // not clear that they are necessary. So by the principle of being + // conservative, we don't write them. In particular, I believe the original + // reason for `resolution-markers` is to prevent non-deterministic locking. + // But it's not clear that that can occur for conflict markers. + let mut simplified = vec![]; + // Deduplicate without changing order. + let mut seen = FxHashSet::default(); + for marker in markers { + let simplified_marker = SimplifiedMarkerTree::new(requires_python, marker.pep508()); + let Some(simplified_string) = simplified_marker.try_to_string() else { + continue; + }; + if seen.insert(simplified_string.clone()) { + simplified.push(simplified_string); + } + } + simplified +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/uv/tests/it/lock_conflict.rs b/crates/uv/tests/it/lock_conflict.rs index 9c3067785ad1..de649109ef6a 100644 --- a/crates/uv/tests/it/lock_conflict.rs +++ b/crates/uv/tests/it/lock_conflict.rs @@ -6902,3 +6902,155 @@ fn extra_inferences() -> Result<()> { Ok(()) } + +/// This is a regression test[1] for repeated resolution markers in the +/// lock file. +/// +/// Before this test was fixed, the `resolution-markers` in the lock file +/// below looked like this: +/// +/// ```text +/// resolution-markers = [ +/// "sys_platform != 'linux'", +/// "sys_platform == 'linux'", +/// "sys_platform != 'linux'", +/// "sys_platform == 'linux'", +/// ] +/// ``` +/// +/// [1]: +#[test] +fn deduplicate_resolution_markers() -> Result<()> { + let context = TestContext::new("3.12"); + + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#" + [project] + name = "pkg" + version = "0.1.0" + description = "Add your description here" + readme = "README.md" + requires-python = ">=3.12" + dependencies = [] + + [project.optional-dependencies] + x1 = [ + "idna==3.5 ; sys_platform != 'linux'", + "idna==3.6 ; sys_platform == 'linux'", + ] + x2 = [ + "markupsafe==2.0.0 ; sys_platform != 'linux'", + "markupsafe==2.1.0 ; sys_platform == 'linux'", + ] + + [tool.uv] + conflicts = [ + [ + { extra = "x1" }, + { extra = "x2" }, + ], + ] + "#, + )?; + + uv_snapshot!(context.filters(), context.lock(), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 5 packages in [TIME] + "###); + + let lock = context.read("uv.lock"); + insta::with_settings!({ + filters => context.filters(), + }, { + assert_snapshot!( + lock, + @r###" + version = 1 + requires-python = ">=3.12" + resolution-markers = [ + "sys_platform != 'linux'", + "sys_platform == 'linux'", + ] + conflicts = [[ + { package = "pkg", extra = "x1" }, + { package = "pkg", extra = "x2" }, + ]] + + [options] + exclude-newer = "2024-03-25T00:00:00Z" + + [[package]] + name = "idna" + version = "3.5" + source = { registry = "https://pypi.org/simple" } + resolution-markers = [ + "sys_platform != 'linux'", + ] + sdist = { url = "https://files.pythonhosted.org/packages/9b/c4/db3e4b22ebc18ee797dae8e14b5db68e5826ae6337334c276f1cb4ff84fb/idna-3.5.tar.gz", hash = "sha256:27009fe2735bf8723353582d48575b23c533cc2c2de7b5a68908d91b5eb18d08", size = 64640 } + wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/65/9c7a31be86861d43da3d4f8661f677b38120320540773a04979ad6fa9ecd/idna-3.5-py3-none-any.whl", hash = "sha256:79b8f0ac92d2351be5f6122356c9a592c96d81c9a79e4b488bf2a6a15f88057a", size = 61566 }, + ] + + [[package]] + name = "idna" + version = "3.6" + source = { registry = "https://pypi.org/simple" } + resolution-markers = [ + "sys_platform == 'linux'", + ] + sdist = { url = "https://files.pythonhosted.org/packages/bf/3f/ea4b9117521a1e9c50344b909be7886dd00a519552724809bb1f486986c2/idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", size = 175426 } + wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/e7/a82b05cf63a603df6e68d59ae6a68bf5064484a0718ea5033660af4b54a9/idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f", size = 61567 }, + ] + + [[package]] + name = "markupsafe" + version = "2.0.0" + source = { registry = "https://pypi.org/simple" } + resolution-markers = [ + "sys_platform != 'linux'", + ] + sdist = { url = "https://files.pythonhosted.org/packages/67/6a/5b3ed5c122e20c33d2562df06faf895a6b91b0a6b96a4626440ffe1d5c8e/MarkupSafe-2.0.0.tar.gz", hash = "sha256:4fae0677f712ee090721d8b17f412f1cbceefbf0dc180fe91bab3232f38b4527", size = 18466 } + + [[package]] + name = "markupsafe" + version = "2.1.0" + source = { registry = "https://pypi.org/simple" } + resolution-markers = [ + "sys_platform == 'linux'", + ] + sdist = { url = "https://files.pythonhosted.org/packages/62/0f/52c009332fdadd484e898dc8f2acca0663c1031b3517070fd34ad9c1b64e/MarkupSafe-2.1.0.tar.gz", hash = "sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f", size = 18546 } + + [[package]] + name = "pkg" + version = "0.1.0" + source = { virtual = "." } + + [package.optional-dependencies] + x1 = [ + { name = "idna", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, + { name = "idna", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, + ] + x2 = [ + { name = "markupsafe", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, + { name = "markupsafe", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, + ] + + [package.metadata] + requires-dist = [ + { name = "idna", marker = "sys_platform == 'linux' and extra == 'x1'", specifier = "==3.6" }, + { name = "idna", marker = "sys_platform != 'linux' and extra == 'x1'", specifier = "==3.5" }, + { name = "markupsafe", marker = "sys_platform == 'linux' and extra == 'x2'", specifier = "==2.1.0" }, + { name = "markupsafe", marker = "sys_platform != 'linux' and extra == 'x2'", specifier = "==2.0.0" }, + ] + "### + ); + }); + + Ok(()) +}