diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml new file mode 100644 index 0000000000..4898d21f7f --- /dev/null +++ b/.github/workflows/test-integration.yml @@ -0,0 +1,36 @@ +name: Integration Tests +on: + push: + branches: + - master + pull_request: null + merge_group: null + + +# Integration tests interact with GitHub resources in the integration test infrastructure and therefore +# cannot run concurrently with other integration tests. +concurrency: + group: cf-scripts-integration-tests + cancel-in-progress: false + +jobs: + setup-repositories: + name: Set up Integration Test Repositories + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 + + - name: setup-micromamba + uses: mamba-org/setup-micromamba@f8b8a1e23a26f60a44c853292711bacfd3eac822 # v1 + with: + environment-file: conda-lock.yml + environment-name: cf-scripts + condarc-file: autotick-bot/condarc + + - name: Set up Integration Test Repositories + run: python -m tests_integration.setup_repositories + env: + GH_TOKEN_STAGING_CONDA_FORGE: ${{ secrets.GH_TOKEN_STAGING_CONDA_FORGE }} + GH_TOKEN_STAGING_BOT_USER: ${{ secrets.GH_TOKEN_STAGING_BOT_USER }} + GH_TOKEN_STAGING_REGRO: ${{ secrets.GH_TOKEN_STAGING_REGRO }} diff --git a/tests_integration/README.md b/tests_integration/README.md new file mode 100644 index 0000000000..2da49cc331 --- /dev/null +++ b/tests_integration/README.md @@ -0,0 +1,35 @@ +# Integration Tests +This directory contains integration tests for the autotick-bot. +The tests are run against actual GitHub repositories, and are used to verify that the +bot works as expected in an environment closely resembling production. + +## Environment Variables +The tests require the following environment variables to be set: + +| Variable | Description | +|--------------------------------|------------------------------------------------------------------------------------------------| +| `GH_TOKEN_STAGING_CONDA_FORGE` | Personal Access Token (PAT) for the `conda-forge-bot-staging` GitHub organization (see below). | +| `GH_TOKEN_STAGING_BOT_USER` | PAT for `cf-regro-autotick-bot-staging` GitHub user (see below). | +| `GH_TOKEN_STAGING_REGRO` | PAT for the `regro-staging` GitHub organization (see below). | +| `GITHUB_OUTPUT` | Set by GitHub. Name of an output file for script outputs. | +| `GITHUB_RUN_ID` | Set by GitHub. ID of the current run. Used as random seed. | + + +### GitHub Token Permissions +All tokens should have the following permissions: + +**Repository Access:** All repositories. + +**Repository Permissions:** +- Actions: read and write +- Administration: read and write +- Contents: read and write +- Metadata: read-only +- Pull requests: read and write +- Workflows: read and write + +**Organization Permissions:** None. + +## Structure of the Test Case Definitions +Inside the `definitions` module, each feedstock that is part of the test suite has its own +submodule. diff --git a/tests_integration/__init__.py b/tests_integration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/collect_test_scenarios.py b/tests_integration/collect_test_scenarios.py new file mode 100644 index 0000000000..0bd1c7b3c3 --- /dev/null +++ b/tests_integration/collect_test_scenarios.py @@ -0,0 +1,93 @@ +import collections +import os +import random + +from tests_integration.shared import DEFINITIONS_DIR, ENV_GITHUB_RUN_ID + +SKIP_TEST_CASES = {"__init__"} + + +def collect_integration_test_cases() -> dict[str, list[str]]: + """ + For each feedstock, return a list of all test cases that should be run for it. + The test cases do not include the feedstock name or the .py extension. + + Example return value: + { + "feedstock1": ["aarch_migration", "version_update"], + "feedstock2": ["version_update"], + } + + The return value of this function is deterministic (sorted by feedstock name and test case name). + """ + test_cases = collections.defaultdict(list) + + for test_case in DEFINITIONS_DIR.glob("*/*.py"): + test_case_name = test_case.stem + if test_case_name in SKIP_TEST_CASES: + continue + feedstock = test_case.parent.name + test_cases[feedstock].append(test_case_name) + + return dict( + sorted( + (feedstock, sorted(test_cases)) + for feedstock, test_cases in test_cases.items() + ) + ) + + +def get_number_of_test_scenarios(integration_test_cases: dict[str, list[str]]) -> int: + return max(len(test_cases) for test_cases in integration_test_cases.values()) + + +def get_all_test_scenario_ids( + integration_test_cases: dict[str, list[str]], +) -> list[int]: + return list(range(get_number_of_test_scenarios(integration_test_cases))) + + +def init_random(): + random.seed(int(os.environ[ENV_GITHUB_RUN_ID])) + + +def get_test_scenario(scenario_id: int) -> dict[str, str]: + """ + Get the test scenario for the given ID. + The scenario is a dictionary with the feedstock name as key and the test case name as value. + + Test scenarios are pseudo-randomly generated with the GitHub run ID as seed. + """ + init_random() + integration_test_cases = collect_integration_test_cases() + + n_scenarios = get_number_of_test_scenarios(integration_test_cases) + + if n_scenarios < 0 or scenario_id >= n_scenarios: + raise ValueError( + f"Invalid scenario ID: {scenario_id}. Must be between 0 and {n_scenarios - 1}." + ) + + # make sure that each feedstock has exactly n_scenarios test cases + # We have to cut the additional test cases here to avoid that some test cases are not run. + test_cases_extended = { + feedstock: ( + test_cases + * (n_scenarios // len(test_cases) + (n_scenarios % len(test_cases) > 0)) + )[:n_scenarios] + for feedstock, test_cases in integration_test_cases.items() + } + + for test_cases in test_cases_extended.values(): + random.shuffle(test_cases) + + def pop_test_scenario(): + scenario: dict[str, str] = {} + for feedstock in test_cases_extended: + scenario[feedstock] = test_cases_extended[feedstock].pop() + return scenario + + for _ in range(scenario_id): + pop_test_scenario() + + return pop_test_scenario() diff --git a/tests_integration/definitions/__init__.py b/tests_integration/definitions/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/llvmdev/__init__.py b/tests_integration/definitions/llvmdev/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/llvmdev/case1.py b/tests_integration/definitions/llvmdev/case1.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/llvmdev/case2.py b/tests_integration/definitions/llvmdev/case2.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/llvmdev/case3.py b/tests_integration/definitions/llvmdev/case3.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/pydantic/__init__.py b/tests_integration/definitions/pydantic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/pydantic/case1.py b/tests_integration/definitions/pydantic/case1.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/definitions/pydantic/case2.py b/tests_integration/definitions/pydantic/case2.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_integration/setup_repositories.py b/tests_integration/setup_repositories.py new file mode 100644 index 0000000000..4bc6c50537 --- /dev/null +++ b/tests_integration/setup_repositories.py @@ -0,0 +1,215 @@ +""" +This module is used by the integration tests to set up the GitHub repositories +that are needed for running the tests. + +We do not *create* any repositories within the bot's user account here. This is handled in the prepare function of the +test cases themselves because tests could purposefully rely on the actual bot itself to create repositories. + +However, we do delete unnecessary feedstocks from the bot's user account. + +After the repositories are set up, we write a list of all test scenario ids to be run to $GITHUB_OUTPUT. +""" + +import logging +from collections.abc import Iterable +from dataclasses import dataclass +from typing import Protocol + +from github import Github +from github.Repository import Repository + +from tests_integration.collect_test_scenarios import ( + collect_integration_test_cases, + get_all_test_scenario_ids, +) +from tests_integration.shared import ( + DEFINITIONS_DIR, + FEEDSTOCK_SUFFIX, + GITHUB_OUTPUT_KEY_SCENARIO_IDS, + REGRO_ACCOUNT_REPOS, + GitHubAccount, + get_github_token, + is_user_account, + write_github_output, +) + +LOGGER = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class GitHubAccountSetup: + """ + Information about the setup of a GitHub account for the integration tests. + """ + + account: GitHubAccount + """ + The GitHub account for which the setup is done. + """ + + target_names: set[str] + """ + The names of the repositories that should exist after the preparation (excluding the suffix). + """ + + suffix: str | None = None + """ + If given, only repositories with the given suffix are considered for deletion and the target names + are extended with the suffix. + """ + + delete_only: bool = False + """ + If True, only delete unnecessary repositories and do not create any new ones. + """ + + +class RepositoryOwner(Protocol): + def create_repo(self, name: str) -> Repository: + pass + + def get_repo(self, name: str) -> Repository: + pass + + def get_repos(self) -> Iterable[Repository]: + pass + + +def setup_logging(level: int | str): + logging.basicConfig(level=level) + + +def get_test_feedstock_names() -> set[str]: + """ + Returns the list of feedstock names that are needed for the integration tests. + The names do not include the "-feedstock" suffix. + """ + # note: the trailing "/" is needed to only get the directories, + # because of a Python bug this only works in Python 3.11 or later (which is fine) + # https://bugs.python.org/issue22276 + return {path.name for path in DEFINITIONS_DIR.glob("*/")} + + +def _or_empty_set(value: set[str]) -> set[str] | str: + """ + Logging helper function that returns "{}" if the given set is empty. + """ + return value or "{}" + + +def prepare_repositories( + owner: RepositoryOwner, + owner_name: str, + target_names: Iterable[str], + delete_only: bool, + suffix: str | None = None, +): + """ + Prepares the repositories of a certain owner for the integration tests. + Unnecessary repositories are deleted and missing repositories are created. + + :param owner: The owner of the repositories. + :param owner_name: The name of the owner (for logging). + :param target_names: The names of the repositories that should exist after the preparation (excluding the suffix). + :param suffix: If given, only repositories with the given suffix are considered for deletion and the target names + are extended with the suffix. + :param delete_only: If True, only delete unnecessary repositories and do not create any new ones. + """ + existing_names = {repo.name for repo in owner.get_repos()} + target_names = set(target_names) + + if suffix: + existing_names = {name for name in existing_names if name.endswith(suffix)} + target_names = {name + suffix for name in target_names} + + to_delete = existing_names - target_names + to_create = target_names - existing_names + + LOGGER.info( + "Deleting the following repositories for %s: %s", + owner_name, + _or_empty_set(to_delete), + ) + for name in to_delete: + owner.get_repo(name).delete() + + if delete_only: + return + + LOGGER.info( + "Creating the following repositories for %s: %s", + owner_name, + _or_empty_set(to_create), + ) + for name in to_create: + owner.create_repo(name) + + +def prepare_accounts(setup_infos: Iterable[GitHubAccountSetup]): + """ + Prepares the repositories of all GitHub accounts for the integration tests. + """ + for setup_info in setup_infos: + # for each account, we need to create a separate GitHub instance because different tokens are needed + github = Github(get_github_token(setup_info.account)) + + owner: RepositoryOwner + if is_user_account(setup_info.account): + current_user = github.get_user() + if current_user.login != setup_info.account: + raise ValueError("The token is not for the expected user") + owner = current_user + else: + owner = github.get_organization(setup_info.account) + + prepare_repositories( + owner=owner, + owner_name=setup_info.account, + target_names=setup_info.target_names, + delete_only=setup_info.delete_only, + suffix=setup_info.suffix, + ) + + +def prepare_all_accounts(): + test_feedstock_names = get_test_feedstock_names() + logging.info("Test feedstock names: %s", _or_empty_set(test_feedstock_names)) + + setup_infos: list[GitHubAccountSetup] = [ + GitHubAccountSetup( + GitHubAccount.CONDA_FORGE_ORG, + test_feedstock_names, + FEEDSTOCK_SUFFIX, + ), + GitHubAccountSetup( + GitHubAccount.BOT_USER, + test_feedstock_names, + FEEDSTOCK_SUFFIX, + delete_only=True, # see the top-level comment for the reason + ), + GitHubAccountSetup( + GitHubAccount.REGRO_ORG, + REGRO_ACCOUNT_REPOS, + ), + ] + + prepare_accounts(setup_infos) + + +def _format_scenario_ids(scenario_ids: list[int]) -> str: + return "[" + ", ".join(str(s_id) for s_id in scenario_ids) + "]" + + +def write_scenario_ids(): + ids = get_all_test_scenario_ids(collect_integration_test_cases()) + write_github_output(GITHUB_OUTPUT_KEY_SCENARIO_IDS, _format_scenario_ids(ids)) + + +def main(): + setup_logging(logging.INFO) + prepare_all_accounts() + write_scenario_ids() + + +if __name__ == "__main__": + main() diff --git a/tests_integration/shared.py b/tests_integration/shared.py new file mode 100644 index 0000000000..32b0fb4108 --- /dev/null +++ b/tests_integration/shared.py @@ -0,0 +1,53 @@ +import os +from enum import StrEnum +from pathlib import Path + + +class GitHubAccount(StrEnum): + CONDA_FORGE_ORG = "conda-forge-bot-staging" + BOT_USER = "regro-cf-autotick-bot-staging" + REGRO_ORG = "regro-staging" + + +GITHUB_TOKEN_ENV_VARS: dict[GitHubAccount, str] = { + GitHubAccount.CONDA_FORGE_ORG: "GH_TOKEN_STAGING_CONDA_FORGE", + GitHubAccount.BOT_USER: "GH_TOKEN_STAGING_BOT_USER", + GitHubAccount.REGRO_ORG: "GH_TOKEN_STAGING_REGRO", +} + +IS_USER_ACCOUNT: dict[GitHubAccount, bool] = { + GitHubAccount.CONDA_FORGE_ORG: False, + GitHubAccount.BOT_USER: True, + GitHubAccount.REGRO_ORG: False, +} + +REGRO_ACCOUNT_REPOS = { + "cf-graph-countyfair", +} + +ENV_GITHUB_OUTPUT = "GITHUB_OUTPUT" +ENV_GITHUB_RUN_ID = "GITHUB_RUN_ID" +""" +Used as a random seed for the integration tests. +""" + +GITHUB_OUTPUT_KEY_SCENARIO_IDS = "scenario_ids" + + +DEFINITIONS_DIR = Path(__file__).parent / "definitions" + + +def get_github_token(account: GitHubAccount) -> str: + return os.environ[GITHUB_TOKEN_ENV_VARS[account]] + + +def is_user_account(account: GitHubAccount) -> bool: + return IS_USER_ACCOUNT[account] + + +def write_github_output(key: str, value: str): + with open(os.environ[ENV_GITHUB_OUTPUT], "a") as f: + f.write(f"{key}={value}\n") + + +FEEDSTOCK_SUFFIX = "-feedstock"