Skip to content

Commit

Permalink
store all known releases for Python Build Standalone known versions (#…
Browse files Browse the repository at this point in the history
…21776)

As first mentioned by @huonw in
#21710 (review)
and more fully discussed in
#21748, the addition of
filtering PBS releases by the PBS release tag means that users may now
see a breaking change if Pants scrapes newer versions of PBS releases,
replaces some of the existing "known versions" data with the
newly-scraped release metadata, and the user selects an older release
via a PBS release constraint.

In that case, the user would see an error because Pants would be unable
to select that specific PBS version anymore due to no longer having any
metadata regarding that PBS release tag.

The solution is for Pants to store metadata for _all_ PBS release tags.
Newer versions will only add to the PBS "known versions" data and not
replace nor delete any metadata. With all metadata available, a user who
selects a particular PBS release via the release constraints will still
continue to see that release be selected. And users who want the latest
matching PBS release will continue to get the latest PBS release (known
to Pants).

Closes #21748.
  • Loading branch information
tdyas authored Dec 18, 2024
1 parent 9c7441f commit 627d533
Show file tree
Hide file tree
Showing 5 changed files with 2,527 additions and 1,346 deletions.
2 changes: 1 addition & 1 deletion docs/notes/2.25.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ The `helm_infer.external_docker_images` glob syntax has been generalized. In ad

The AWS Lambda backend now provides built-in complete platforms for the Python 3.13 runtime.

The Python Build Standalone backend (`pants.backend.python.providers.experimental.python_build_standalone`) now supports filtering PBS releases via their "release tag" via [the new `--python-build-standalone-release-constraints` option](https://www.pantsbuild.org/2.25/reference/subsystems/python-build-standalone-python-provider#release_constraints).
The Python Build Standalone backend (`pants.backend.python.providers.experimental.python_build_standalone`) now supports filtering PBS releases via their "release tag" via [the new `--python-build-standalone-release-constraints` option](https://www.pantsbuild.org/2.25/reference/subsystems/python-build-standalone-python-provider#release_constraints). THe PBS "known versions" database now contains metadata on all known PBS versions, and not just the latest PBS release tag per Python patchlevel.

Also, the PBS "release tag" will be inferred for PBS releases supplied via the `--python-build-standalone-known-python-versions` option from the given URLs if those URLs conform to the naming convention used by the PBS project. The new advanced option `--python-build-standalone-require-inferrable-release-tag` controls whether Pants requires the tag to be inferrable. This option currently defaults to `False`, but will be migrated to `True` in a future Pants release. (The release tag cannot currently be specified via `--python-build-standalone-known-python-versions` since changing that option would not be a backward compatible change.)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,28 +64,16 @@ class PBSPythonInfo(TypedDict):
url: str
sha256: str
size: int
tag: str


PBSVersionsT = dict[str, dict[str, dict[str, PBSPythonInfo]]]


@functools.cache
def load_pbs_pythons() -> dict[str, dict[str, PBSPythonInfo]]:
def load_pbs_pythons() -> PBSVersionsT:
versions_info = json.loads(read_sibling_resource(__name__, "versions_info.json"))
pbs_release_metadata = versions_info["pythons"]

# Filter out any PBS releases for which we do not have `tag` metadata.
py_versions_to_delete: set[str] = set()
for py_version, platforms_for_ver in pbs_release_metadata.items():
all_have_tag = all(
platform_data.get("tag") is not None
for platform_name, platform_data in platforms_for_ver.items()
)
if not all_have_tag:
py_versions_to_delete.add(py_version)

for py_version_to_delete in py_versions_to_delete:
del pbs_release_metadata[py_version_to_delete]

return cast("dict[str, dict[str, PBSPythonInfo]]", pbs_release_metadata)
return cast("PBSVersionsT", pbs_release_metadata)


class PBSPythonProviderSubsystem(Subsystem):
Expand Down Expand Up @@ -178,9 +166,7 @@ def release_constraints(self) -> ConstraintsList:
f"The `[{PBSPythonProviderSubsystem.options_scope}].release_constraints option` is not valid: {e}"
) from None

def get_user_supplied_pbs_pythons(
self, require_tag: bool
) -> dict[str, dict[str, PBSPythonInfo]]:
def get_user_supplied_pbs_pythons(self, require_tag: bool) -> PBSVersionsT:
extract_re = re.compile(r"^cpython-([0-9.]+)\+([0-9]+)-.*\.tar\.\w+$")

def extract_version_and_tag(url: str) -> tuple[str, str] | None:
Expand All @@ -193,7 +179,7 @@ def extract_version_and_tag(url: str) -> tuple[str, str] | None:

return None

user_supplied_pythons: dict[str, dict[str, PBSPythonInfo]] = {}
user_supplied_pythons: dict[str, dict[str, dict[str, PBSPythonInfo]]] = {}

for version_info in self.known_python_versions or []:
version_parts = version_info.split("|")
Expand All @@ -218,7 +204,7 @@ def extract_version_and_tag(url: str) -> tuple[str, str] | None:
)
tag = inferred_tag

if require_tag and tag is None:
if tag is None:
raise ExternalToolError(
f"While parsing the `[{PBSPythonProviderSubsystem.options_scope}].known_python_versions` option, "
f'the PBS release "tag" could not be inferred from the supplied URL: {url}'
Expand All @@ -228,25 +214,31 @@ def extract_version_and_tag(url: str) -> tuple[str, str] | None:

if py_version not in user_supplied_pythons:
user_supplied_pythons[py_version] = {}
if tag not in user_supplied_pythons[py_version]:
user_supplied_pythons[py_version][tag] = {}

pbs_python_info = PBSPythonInfo(
url=url, sha256=sha256, size=int(filesize), tag=tag or ""
)
user_supplied_pythons[py_version][platform] = pbs_python_info
pbs_python_info = PBSPythonInfo(url=url, sha256=sha256, size=int(filesize))
user_supplied_pythons[py_version][tag][platform] = pbs_python_info

return user_supplied_pythons

def get_all_pbs_pythons(self) -> dict[str, dict[str, PBSPythonInfo]]:
def get_all_pbs_pythons(self) -> PBSVersionsT:
all_pythons = load_pbs_pythons().copy()

user_supplied_pythons = self.get_user_supplied_pbs_pythons(
user_supplied_pythons: PBSVersionsT = self.get_user_supplied_pbs_pythons(
require_tag=self.require_inferrable_release_tag
)
for py_version, platform_metadatas_for_py_version in user_supplied_pythons.items():
for platform_name, platform_metadata in platform_metadatas_for_py_version.items():
if py_version not in all_pythons:
all_pythons[py_version] = {}
all_pythons[py_version][platform_name] = platform_metadata
for py_version, release_metadatas_for_py_version in user_supplied_pythons.items():
for (
release_tag,
platform_metadata_for_releases,
) in release_metadatas_for_py_version.items():
for platform_name, platform_metadata in platform_metadata_for_releases.items():
if py_version not in all_pythons:
all_pythons[py_version] = {}
if release_tag not in all_pythons[py_version]:
all_pythons[py_version][release_tag] = {}
all_pythons[py_version][release_tag][platform_name] = platform_metadata

return all_pythons

Expand All @@ -258,31 +250,31 @@ class PBSPythonProvider(PythonProvider):
def _choose_python(
interpreter_constraints: InterpreterConstraints,
universe: Iterable[str],
pbs_versions: Mapping[str, Mapping[str, PBSPythonInfo]],
pbs_versions: Mapping[str, Mapping[str, Mapping[str, PBSPythonInfo]]],
platform: Platform,
release_constraints: ConstraintSatisfied,
) -> tuple[str, PBSPythonInfo]:
) -> tuple[str, Version, PBSPythonInfo]:
"""Choose the highest supported patchlevel of the lowest supported major/minor version
consistent with any PBS release constraint."""

# Construct a list of candidate PBS releases.
candidate_pbs_releases: list[tuple[tuple[int, int, int], PBSPythonInfo]] = []
candidate_pbs_releases: list[tuple[tuple[int, int, int], Version, PBSPythonInfo]] = []
supported_python_triplets = interpreter_constraints.enumerate_python_versions(universe)
for triplet in supported_python_triplets:
triplet_str = ".".join(map(str, triplet))
pbs_version_metadata = pbs_versions.get(triplet_str)
if not pbs_version_metadata:
continue

pbs_version_platform_metadata = pbs_version_metadata.get(platform.value)
if not pbs_version_platform_metadata:
continue
for tag, platform_metadata in pbs_version_metadata.items():
if not release_constraints.is_satisified(Version(tag)):
continue

tag = pbs_version_platform_metadata.get("tag")
if tag and not release_constraints.is_satisified(Version(tag)):
continue
pbs_version_platform_metadata = platform_metadata.get(platform.value)
if not pbs_version_platform_metadata:
continue

candidate_pbs_releases.append((triplet, pbs_version_platform_metadata))
candidate_pbs_releases.append((triplet, Version(tag), pbs_version_platform_metadata))

if not candidate_pbs_releases:
raise Exception(
Expand All @@ -303,15 +295,18 @@ def _choose_python(
# Choose the highest supported patchlevel of the lowest supported major/minor version
# by searching until the major/minor version increases or the search ends (in which case the
# last candidate is the one).
candidate_pbs_releases.sort(key=lambda x: x[0])
for i, (version_triplet, metadata) in enumerate(candidate_pbs_releases):
#
# This also sorts by release tag in ascending order. So it chooses the highest available PBS
# release for that chosen Python version.
candidate_pbs_releases.sort(key=lambda x: (x[0], x[1]))
for i, (py_version_triplet, pbs_version, metadata) in enumerate(candidate_pbs_releases):
if (
# Last candidate, we're good!
i == len(candidate_pbs_releases) - 1
# Next candidate is the next major/minor version, so this is the highest patchlevel.
or candidate_pbs_releases[i + 1][0][0:2] != version_triplet[0:2]
or candidate_pbs_releases[i + 1][0][0:2] != py_version_triplet[0:2]
):
return (".".join(map(str, version_triplet)), metadata)
return (".".join(map(str, py_version_triplet)), pbs_version, metadata)

raise AssertionError("The loop should have returned the final item.")

Expand All @@ -332,7 +327,7 @@ async def get_python(
) -> PythonExecutable:
versions_info = pbs_subsystem.get_all_pbs_pythons()

python_version, pbs_py_info = _choose_python(
python_version, _pbs_version, pbs_py_info = _choose_python(
request.interpreter_constraints,
python_setup.interpreter_versions_universe,
versions_info,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from pants.engine.target import Target
from pants.testutil.option_util import create_subsystem
from pants.testutil.rule_runner import RuleRunner, mock_console
from pants.version import Version


@pytest.fixture
Expand Down Expand Up @@ -161,11 +162,10 @@ def test_tag_inference_from_url() -> None:
)

user_supplied_pbs_versions = subsystem.get_user_supplied_pbs_pythons(require_tag=False)
assert user_supplied_pbs_versions["3.10.13"]["linux_arm"] == pbs.PBSPythonInfo(
assert user_supplied_pbs_versions["3.10.13"]["20240224"]["linux_arm"] == pbs.PBSPythonInfo(
url="https://github.com/indygreg/python-build-standalone/releases/download/20240224/cpython-3.10.13%2B20240224-aarch64-unknown-linux-gnu-install_only.tar.gz",
sha256="abc123",
size=123,
tag="20240224",
)

# Confirm whether requiring tag inference results in an error.
Expand Down Expand Up @@ -218,53 +218,52 @@ def test_release_constraint_evaluation(rule_runner: RuleRunner) -> None:
ics = InterpreterConstraints(["cpython==3.9.*"])
universe = ["3.9"]

def make_version(tag: str):
def make_platform_metadata():
return {
"linux_arm64": {
"tag": tag,
"sha256": "abc123",
"size": 1,
"url": "foobar",
},
"linux_x86_64": {
"tag": tag,
"sha256": "abc123",
"size": 1,
"url": "https://example.com/foo.zip",
},
"macos_arm64": {
"tag": tag,
"sha256": "abc123",
"size": 1,
"url": "https://example.com/foo.zip",
},
"macos_x86_64": {
"tag": tag,
"sha256": "abc123",
"size": 1,
"url": "https://example.com/foo.zip",
},
}

pbs_versions = {
"3.9.18": make_version("20241001"),
"3.9.19": make_version("20241101"),
"3.9.20": make_version("20241201"),
"3.9.18": {"20241001": make_platform_metadata()},
"3.9.19": {"20241101": make_platform_metadata()},
"3.9.20": {"20241201": make_platform_metadata()},
}

platform = rule_runner.request(Platform, [])

rc = ConstraintsList.parse(">=20241001,<20241201")
version, _info = pbs._choose_python(ics, universe, pbs_versions, platform, rc)
version, pbs_version, _info = pbs._choose_python(ics, universe, pbs_versions, platform, rc)
assert version == "3.9.19"
assert pbs_version == Version("20241101")

# Ensure that exception occurs if no version matches.
rc = ConstraintsList.parse("==20250101")
with pytest.raises(
Exception,
match="Failed to find a supported Python Build Standalone for Interpreter Constraint",
):
_version, _info = pbs._choose_python(ics, universe, pbs_versions, platform, rc)
_version, _pbs_version, _info = pbs._choose_python(
ics, universe, pbs_versions, platform, rc
)

# Ensure that PBS versions with no tag metadata are filtered out so there is no "match".
actual_pbs_versions = pbs.load_pbs_pythons()
Expand All @@ -273,4 +272,17 @@ def make_version(tag: str):
Exception,
match="Failed to find a supported Python Build Standalone for Interpreter Constraint",
):
_version, _info = pbs._choose_python(ics, universe, actual_pbs_versions, platform, rc)
_version, _pbs_version, _info = pbs._choose_python(
ics, universe, actual_pbs_versions, platform, rc
)

# Ensure that the highest release for a particualr version is chosen.
pbs_versions = {
"3.9.18": {"20241001": make_platform_metadata()},
"3.9.19": {"20241101": make_platform_metadata(), "20241115": make_platform_metadata()},
"3.10.15": {"20241115": make_platform_metadata()},
}
rc = ConstraintsList.parse(">=20241001,<=20241201")
version, pbs_version, _info = pbs._choose_python(ics, universe, pbs_versions, platform, rc)
assert version == "3.9.19"
assert pbs_version == Version("20241115")
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,37 @@ def _compute_sha256(url):
return sha256_hash.hexdigest()


def scrape_release(release, scraped_releases, asset_map, sha256_map):
scraped_releases.add(release.tag_name)
assets = release.get_assets()
for asset in assets:
# NB: From https://python-build-standalone.readthedocs.io/en/latest/running.html#obtaining-distributions
# > Casual users will likely want to use the install_only archive,
# > as most users do not need the build artifacts present in the full archive.
is_applicable = any(
f"{machine}-{osname}-install_only" in asset.name
for machine, osname in itertools.product(
["aarch64", "x86_64"], ["apple-darwin", "unknown-linux-gnu"]
)
)
if not is_applicable:
continue

is_checksum = asset.name.endswith(".sha256")
if is_checksum:
shasum = requests.get(asset.browser_download_url).text.strip()
sha256_map[asset.name.removesuffix(".sha256")] = shasum
else:
asset_map[asset.name] = asset


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--scrape-all-releases", dest="scrape_all_releases", action="store_true")
parser.add_argument(
"--scrape-release", metavar="RELEASE", dest="scrape_releases", action="append"
)
parser.add_argument("-v", "--verbose", action="store_true")
options = parser.parse_args()

print("Starting to scrape GitHub PBS releases.")
Expand All @@ -69,32 +94,20 @@ def main() -> None:
tag_name = release.tag_name

if (
tag_name not in scraped_releases
or options.scrape_all_releases
or tag_name in options.scrape_releases
options.scrape_all_releases
or (options.scrape_releases and tag_name in options.scrape_releases)
or (not options.scrape_releases and tag_name not in scraped_releases)
):
print(f"Scraping release tag `{tag_name}`.")
scraped_releases.add(release.tag_name)
assets = release.get_assets()
for asset in assets:
# NB: From https://python-build-standalone.readthedocs.io/en/latest/running.html#obtaining-distributions
# > Casual users will likely want to use the install_only archive,
# > as most users do not need the build artifacts present in the full archive.
is_applicable = any(
f"{machine}-{osname}-install_only" in asset.name
for machine, osname in itertools.product(
["aarch64", "x86_64"], ["apple-darwin", "unknown-linux-gnu"]
)
)
if not is_applicable:
continue

is_checksum = asset.name.endswith(".sha256")
if is_checksum:
shasum = requests.get(asset.browser_download_url).text.strip()
sha256_map[asset.name.removesuffix(".sha256")] = shasum
else:
asset_map[asset.name] = asset
scrape_release(
release=release,
scraped_releases=scraped_releases,
asset_map=asset_map,
sha256_map=sha256_map,
)
else:
if options.verbose:
print(f"Skipped release tag `{tag_name}.")

print("Finished scraping releases.")

Expand All @@ -110,18 +123,19 @@ def main() -> None:
python_version, pbs_release_tag = matched_versions.groups()[1:3]
if python_version not in pythons_dict:
pythons_dict[python_version] = {}
if pbs_release_tag not in pythons_dict[python_version]:
pythons_dict[python_version][pbs_release_tag] = {}

name_parts = asset.name.replace("darwin", "macos").replace("aarch64", "arm64").split("-")
pants_platform_tag = f"{name_parts[4]}_{name_parts[2]}"
sha256sum = sha256_map.get(asset.name)
if sha256sum is None:
sha256sum = _compute_sha256(asset.browser_download_url)

pythons_dict[python_version][pants_platform_tag] = {
pythons_dict[python_version][pbs_release_tag][pants_platform_tag] = {
"url": asset.browser_download_url,
"sha256": sha256sum,
"size": asset.size,
"tag": pbs_release_tag,
}

VERSIONS_PATH.write_text(json.dumps(versions_info, sort_keys=True, indent=2))
Expand Down
Loading

0 comments on commit 627d533

Please sign in to comment.