diff --git a/dev/constraints-updated-version-check.py b/dev/constraints-updated-version-check.py index 767df85743aa7..1b128f18a50d7 100644 --- a/dev/constraints-updated-version-check.py +++ b/dev/constraints-updated-version-check.py @@ -28,14 +28,17 @@ import os import re import urllib.request +from contextlib import contextmanager from datetime import datetime +from pathlib import Path from urllib.error import HTTPError, URLError import rich_click as click from packaging import version from rich.console import Console +from rich.syntax import Syntax -console = Console(width=400, color_system="standard") +console = Console(color_system="standard") def parse_constraints_generation_date(lines): @@ -157,9 +160,14 @@ def get_first_newer_release_date_str(releases, current_version): return datetime.fromisoformat(upload_time_str.replace("Z", "+00:00")).strftime("%Y-%m-%d") -def main(python_version, mode, selected_packages=None, explain_why=False): +def main( + python_version: str, + mode: str, + selected_packages: set[str] | None = None, + explain_why: bool = False, + verbose: bool = False, +): lines = get_constraints_file(python_version) - constraints_date = parse_constraints_generation_date(lines) if constraints_date: console.print( @@ -167,6 +175,20 @@ def main(python_version, mode, selected_packages=None, explain_why=False): ) console.print() + packages = parse_packages_from_lines(lines, selected_packages) + col_widths, format_str, headers, total_width = get_table_format(packages) + print_table_header(format_str, headers, total_width) + + outdated_count, skipped_count, explanations = process_packages( + packages, constraints_date, mode, explain_why, verbose, col_widths, format_str + ) + + print_table_footer(total_width, len(packages), outdated_count, skipped_count, mode) + if explain_why and explanations: + print_explanations(explanations) + + +def parse_packages_from_lines(lines: list[str], selected_packages: set[str] | None) -> list[tuple[str, str]]: packages = [] for line in lines: line = line.strip() @@ -176,9 +198,11 @@ def main(python_version, mode, selected_packages=None, explain_why=False): pkg_name = match.group(1) if not selected_packages or (pkg_name in selected_packages): packages.append((pkg_name, match.group(2))) + return packages - max_pkg_length = get_max_package_length(packages) +def get_table_format(packages: list[tuple[str, str]]): + max_pkg_length = get_max_package_length(packages) col_widths = { "Library Name": max(35, max_pkg_length), "Constraint Version": 18, @@ -189,7 +213,6 @@ def main(python_version, mode, selected_packages=None, explain_why=False): "# Versions Behind": 19, "PyPI Link": 60, } - format_str = ( f"{{:<{col_widths['Library Name']}}} | " f"{{:<{col_widths['Constraint Version']}}} | " @@ -200,7 +223,6 @@ def main(python_version, mode, selected_packages=None, explain_why=False): f"{{:<{col_widths['# Versions Behind']}}} | " f"{{:<{col_widths['PyPI Link']}}}" ) - headers = [ "Library Name", "Constraint Version", @@ -211,167 +233,252 @@ def main(python_version, mode, selected_packages=None, explain_why=False): "# Versions Behind", "PyPI Link", ] + total_width = sum(col_widths.values()) + (len(col_widths) - 1) * 3 + return col_widths, format_str, headers, total_width - if not explain_why: - console.print(f"[bold magenta]{format_str.format(*headers)}[/]") - total_width = sum(col_widths.values()) + (len(col_widths) - 1) * 3 - console.print(f"[magenta]{'=' * total_width}[/]") - else: - total_width = sum(col_widths.values()) + (len(col_widths) - 1) * 3 - outdated_count = 0 - skipped_count = 0 +def print_table_header(format_str: str, headers: list[str], total_width: int): + console.print(f"[bold magenta]{format_str.format(*headers)}[/]") + console.print(f"[magenta]{'=' * total_width}[/]") - if explain_why and packages: - import shutil - import subprocess - from contextlib import contextmanager - from pathlib import Path - @contextmanager - def pyproject_backup_restore(pyproject_path): - backup_path = pyproject_path.with_suffix(pyproject_path.suffix + ".bak") - shutil.copyfile(pyproject_path, backup_path) - try: - yield - finally: - if backup_path.exists(): - shutil.copyfile(backup_path, pyproject_path) - backup_path.unlink() - - def run_uv_sync(cmd, cwd): - env = os.environ.copy() - env.pop("VIRTUAL_ENV", None) - result = subprocess.run(cmd, cwd=cwd, env=env, capture_output=True, text=True) - return result.stdout + result.stderr - - airflow_pyproject = Path(__file__).parent.parent / "pyproject.toml" - airflow_pyproject = airflow_pyproject.resolve() - repo_root = airflow_pyproject.parent - with pyproject_backup_restore(airflow_pyproject): - for pkg, _ in packages: - console.print(f"[bold blue]\n--- Explaining for {pkg} ---[/]") - # 1. Run uv sync --all-packages - before_output = run_uv_sync( - ["uv", "sync", "--all-packages", "--resolution", "highest"], cwd=repo_root - ) - with open("/tmp/uv_sync_before.txt", "w") as f: - f.write(before_output) - # 2. Get latest version from PyPI - pypi_url = f"https://pypi.org/pypi/{pkg}/json" - with urllib.request.urlopen(pypi_url) as resp: - data = json.loads(resp.read().decode("utf-8")) - latest_version = data["info"]["version"] - # 3. Update pyproject.toml (append dependency line) - lines = airflow_pyproject.read_text().splitlines() - new_lines = [] - in_deps = False - dep_added = False - for line in lines: - new_lines.append(line) - if line.strip() == "dependencies = [": - in_deps = True - elif in_deps and line.strip().startswith("]") and not dep_added: - # Add the new dependency just before closing bracket - new_lines.insert(-1, f' "{pkg}=={latest_version}",') - dep_added = True - in_deps = False - if not dep_added: - # fallback: just append at the end - new_lines.append(f' "{pkg}=={latest_version}",') - airflow_pyproject.write_text("\n".join(new_lines) + "\n") - # 4. Run uv sync --all-packages and capture output - after_output = run_uv_sync( - ["uv", "sync", "--all-packages", "--resolution", "highest"], cwd=repo_root - ) - with open("/tmp/uv_sync_after.txt", "w") as f: - f.write(after_output) - console.print(f"[yellow]uv sync output for {pkg}=={latest_version}:[/]") - console.print(after_output) - return +def print_table_footer(total_width: int, total_pkgs: int, outdated_count: int, skipped_count: int, mode: str): + console.print(f"[magenta]{'=' * total_width}[/]") + console.print(f"[bold cyan]\nTotal packages checked:[/] [white]{total_pkgs}[/]") + console.print(f"[bold yellow]Outdated packages found:[/] [white]{outdated_count}[/]") + if mode == "diff-constraints": + console.print( + f"[bold blue]Skipped packages (updated after constraints generation):[/] [white]{skipped_count}[/]" + ) - for pkg, pinned_version in packages: - try: - pypi_url = f"https://pypi.org/pypi/{pkg}/json" - with urllib.request.urlopen(pypi_url) as resp: - data = json.loads(resp.read().decode("utf-8")) - latest_version = data["info"]["version"] - releases = data["releases"] - latest_release_date = "N/A" - constraint_release_date = "N/A" +def print_explanations(explanations: list[str]): + console.print("\n[bold magenta]Upgrade Explanations:[/]") + for explanation in explanations: + console.print(explanation) + + +def process_packages( + packages: list[tuple[str, str]], + constraints_date: datetime | None, + mode: str, + explain_why: bool, + verbose: bool, + col_widths: dict, + format_str: str, +) -> tuple[int, int, list[str]]: + import subprocess + import tempfile + from contextlib import contextmanager + from pathlib import Path + + @contextmanager + def preserve_pyproject_file(pyproject_path: Path): + original_content = pyproject_path.read_text() + try: + yield + finally: + pyproject_path.write_text(original_content) + + def run_uv_sync(cmd: list[str], cwd: Path) -> subprocess.CompletedProcess: + env = os.environ.copy() + env.pop("VIRTUAL_ENV", None) + if verbose: + console.print(f"[cyan]Running command:[/] [white]{' '.join(cmd)}[/] [dim]in {cwd}[/]") + result = subprocess.run(cmd, cwd=cwd, env=env, capture_output=True, text=True) + return result + + def fetch_pypi_data(pkg: str) -> dict: + pypi_url = f"https://pypi.org/pypi/{pkg}/json" + with urllib.request.urlopen(pypi_url) as resp: + return json.loads(resp.read().decode("utf-8")) + + def get_release_dates(releases: dict, version: str) -> str: + if releases.get(version): + return ( + datetime.fromisoformat(releases[version][0]["upload_time_iso_8601"].replace("Z", "+00:00")) + .replace(tzinfo=None) + .strftime("%Y-%m-%d") + ) + return "N/A" + + def update_pyproject_dependency(pyproject_path: Path, pkg: str, latest_version: str): + lines = pyproject_path.read_text().splitlines() + new_lines = [] + in_deps = False + dep_added = False + for line in lines: + new_lines.append(line) + if line.strip() == "dependencies = [": + in_deps = True + elif in_deps and line.strip().startswith("]") and not dep_added: + new_lines.insert(-1, f' "{pkg}=={latest_version}",') + dep_added = True + in_deps = False + if not dep_added: + new_lines.append(f' "{pkg}=={latest_version}",') + pyproject_path.write_text("\n".join(new_lines) + "\n") - if releases.get(latest_version): - latest_release_date = ( - datetime.fromisoformat( - releases[latest_version][0]["upload_time_iso_8601"].replace("Z", "+00:00") - ) - .replace(tzinfo=None) - .strftime("%Y-%m-%d") - ) + airflow_pyproject = Path(__file__).parent.parent / "pyproject.toml" + airflow_pyproject = airflow_pyproject.resolve() + repo_root = airflow_pyproject.parent - if releases.get(pinned_version): - constraint_release_date = ( - datetime.fromisoformat( - releases[pinned_version][0]["upload_time_iso_8601"].replace("Z", "+00:00") - ) - .replace(tzinfo=None) - .strftime("%Y-%m-%d") - ) + outdated_count = 0 + skipped_count = 0 + explanations = [] + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir_path = Path(temp_dir) + for pkg, pinned_version in packages: + try: + data = fetch_pypi_data(pkg) + latest_version = data["info"]["version"] + releases = data["releases"] + latest_release_date = get_release_dates(releases, latest_version) + constraint_release_date = get_release_dates(releases, pinned_version) is_latest_version = pinned_version == latest_version versions_behind = count_versions_between(releases, pinned_version, latest_version) versions_behind_str = str(versions_behind) if versions_behind > 0 else "" - if should_show_package(releases, latest_version, constraints_date, mode, is_latest_version): - # Use the first newer release date instead of latest version date - first_newer_date_str = get_first_newer_release_date_str(releases, pinned_version) - status = get_status_emoji( - first_newer_date_str or constraint_release_date, - datetime.now().strftime("%Y-%m-%d"), - is_latest_version, - ) - pypi_link = f"https://pypi.org/project/{pkg}/{latest_version}" - - color = ( - "green" - if is_latest_version - else ("yellow" if status.startswith("📢") or status.startswith("⚠") else "red") + print_package_table_row( + pkg=pkg, + pinned_version=pinned_version, + constraint_release_date=constraint_release_date, + latest_version=latest_version, + latest_release_date=latest_release_date, + releases=releases, + col_widths=col_widths, + format_str=format_str, + is_latest_version=is_latest_version, + versions_behind_str=versions_behind_str, ) - string_to_print = format_str.format( - pkg, - pinned_version[: col_widths["Constraint Version"]], - constraint_release_date[: col_widths["Constraint Date"]], - latest_version[: col_widths["Latest Version"]], - latest_release_date[: col_widths["Latest Date"]], - # The utf character takes 2 bytes so we need to subtract 1 - status[: col_widths["📢 Status"]], - versions_behind_str, - pypi_link, - ) - console.print(f"[{color}]{string_to_print}[/]") if not is_latest_version: outdated_count += 1 else: skipped_count += 1 - except HTTPError as e: - console.print(f"[bold red]Error fetching {pkg} from PyPI: HTTP {e.code}[/]") - continue - except URLError as e: - console.print(f"[bold red]Error fetching {pkg} from PyPI: {e.reason}[/]") - continue - except Exception as e: - console.print(f"[bold red]Error processing {pkg}: {str(e)}[/]") - continue + if explain_why and not is_latest_version: + explanation = explain_package_upgrade( + pkg, + pinned_version, + latest_version, + airflow_pyproject, + repo_root, + temp_dir_path, + run_uv_sync, + update_pyproject_dependency, + verbose, + ) + explanations.append(explanation) + except HTTPError as e: + console.print(f"[bold red]Error fetching {pkg} from PyPI: HTTP {e.code}[/]") + continue + except URLError as e: + console.print(f"[bold red]Error fetching {pkg} from PyPI: {e.reason}[/]") + continue + except Exception as e: + console.print(f"[bold red]Error processing {pkg}: {str(e)}[/]") + continue + return outdated_count, skipped_count, explanations + + +def print_package_table_row( + pkg: str, + pinned_version: str, + constraint_release_date: str, + latest_version: str, + latest_release_date: str, + releases: dict, + col_widths: dict, + format_str: str, + is_latest_version: bool, + versions_behind_str: str, +): + first_newer_date_str = get_first_newer_release_date_str(releases, pinned_version) + status = get_status_emoji( + first_newer_date_str or constraint_release_date, + datetime.now().strftime("%Y-%m-%d"), + is_latest_version, + ) + pypi_link = f"https://pypi.org/project/{pkg}/{latest_version}" + color = ( + "green" + if is_latest_version + else ("yellow" if status.startswith("📢") or status.startswith("⚠") else "red") + ) + string_to_print = format_str.format( + pkg, + pinned_version[: col_widths["Constraint Version"]], + constraint_release_date[: col_widths["Constraint Date"]], + latest_version[: col_widths["Latest Version"]], + latest_release_date[: col_widths["Latest Date"]], + status[: col_widths["📢 Status"]], + versions_behind_str, + pypi_link, + ) + console.print(f"[{color}]{string_to_print}[/]") + + +def explain_package_upgrade( + pkg: str, + pinned_version: str, + latest_version: str, + airflow_pyproject: Path, + repo_root: Path, + temp_dir_path: Path, + run_uv_sync, + update_pyproject_dependency, + verbose: bool, +) -> str: + explanation = ( + f"[bold blue]\n--- Explaining for {pkg} (current: {pinned_version}, latest: {latest_version}) ---[/]" + ) - console.print(f"[magenta]{'=' * total_width}[/]") - console.print(f"[bold cyan]\nTotal packages checked:[/] [white]{len(packages)}[/]") - console.print(f"[bold yellow]Outdated packages found:[/] [white]{outdated_count}[/]") - if mode == "diff-constraints": - console.print( - f"[bold blue]Skipped packages (updated after constraints generation):[/] [white]{skipped_count}[/]" + @contextmanager + def preserve_pyproject_file(pyproject_path: Path): + original_content = pyproject_path.read_text() + try: + yield + finally: + pyproject_path.write_text(original_content) + + with preserve_pyproject_file(airflow_pyproject): + before_result = run_uv_sync( + [ + "uv", + "sync", + "--all-packages", + "--resolution", + "highest", + "--refresh", + ], + cwd=repo_root, + ) + (temp_dir_path / "uv_sync_before.txt").write_text(before_result.stdout + before_result.stderr) + update_pyproject_dependency(airflow_pyproject, pkg, latest_version) + if verbose: + syntax = Syntax( + airflow_pyproject.read_text(), "toml", theme="monokai", line_numbers=True, word_wrap=False + ) + explanation += "\n" + str(syntax) + after_result = run_uv_sync( + [ + "uv", + "sync", + "--all-packages", + "--resolution", + "highest", + "--refresh", + ], + cwd=repo_root, ) + (temp_dir_path / "uv_sync_after.txt").write_text(after_result.stdout + after_result.stderr) + if after_result.returncode == 0: + explanation += f"\n[bold yellow]Package {pkg} can be upgraded from {pinned_version} to {latest_version} without conflicts.[/]." + else: + explanation += f"\n[yellow]uv sync output for {pkg}=={latest_version}:[/]\n" + explanation += after_result.stdout + after_result.stderr + return explanation @click.command() @@ -400,11 +507,23 @@ def run_uv_sync(cmd, cwd): default=False, help="For each selected package, attempts to upgrade to the latest version and explains why it cannot be upgraded.", ) -def cli(python_version, mode, selected_packages, explain_why): +@click.option( + "--verbose", + is_flag=True, + default=False, + help="Print the temporary pyproject.toml file used for each package when running --explain-why.", +) +def cli( + python_version: str, + mode: str, + selected_packages: str | None, + explain_why: bool, + verbose: bool, +): selected_packages_set = None if selected_packages: selected_packages_set = set(pkg.strip() for pkg in selected_packages.split(",") if pkg.strip()) - main(python_version, mode, selected_packages_set, explain_why) + main(python_version, mode, selected_packages_set, explain_why, verbose) if __name__ == "__main__":