From 2ce4eb4f9798964035dda51f612a19c3f9e960c6 Mon Sep 17 00:00:00 2001 From: David Riazati <9407960+driazati@users.noreply.github.com> Date: Mon, 21 Feb 2022 02:28:49 -0800 Subject: [PATCH] [ci] Add auto-updating `last-successful` branch (#10056) This adds a script that runs on a cron to discover the last commit where CI all passed (every job was successful and `tvm-ci/branch` is included) and updates a git tag `green` to point to this commit on `main`. This can be used for checking out the latest unbroken TVM, which can be useful for developers wanting a good changeset to base their changes on or for infra needing a clean, up-to-date TVM. --- .../update_last_successful_branch.yml | 43 ++++ tests/python/unittest/test_ci.py | 101 +++++++++ tests/scripts/git_utils.py | 8 +- tests/scripts/update_branch.py | 192 ++++++++++++++++++ 4 files changed, 341 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/update_last_successful_branch.yml create mode 100755 tests/scripts/update_branch.py diff --git a/.github/workflows/update_last_successful_branch.yml b/.github/workflows/update_last_successful_branch.yml new file mode 100644 index 0000000000000..1e8def4040aea --- /dev/null +++ b/.github/workflows/update_last_successful_branch.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# GH actions. +# We use it to cover windows and mac builds +# Jenkins is still the primary CI + +name: Update last-successful branch + +on: + schedule: + - cron: "0/15 * * * *" + workflow_dispatch: + +concurrency: + group: update-last-successful-branch + cancel-in-progress: true + +jobs: + update-last-successful-branch: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: Update last-successful branch + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + python tests/scripts/update_branch.py || echo step failed diff --git a/tests/python/unittest/test_ci.py b/tests/python/unittest/test_ci.py index f5183d79b768b..d3e7c79b88ed2 100644 --- a/tests/python/unittest/test_ci.py +++ b/tests/python/unittest/test_ci.py @@ -68,6 +68,107 @@ def run(pr_body, expected_reviewers): ) +def test_update_branch(tmpdir_factory): + update_script = REPO_ROOT / "tests" / "scripts" / "update_branch.py" + + def run(statuses, expected_rc, expected_output): + git = TempGit(tmpdir_factory.mktemp("tmp_git_dir")) + git.run("init") + git.run("checkout", "-b", "main") + git.run("remote", "add", "origin", "https://github.com/apache/tvm.git") + commit = { + "statusCheckRollup": {"contexts": {"nodes": statuses}}, + "oid": "123", + "messageHeadline": "hello", + } + data = { + "data": { + "repository": { + "defaultBranchRef": {"target": {"history": {"edges": [], "nodes": [commit]}}} + } + } + } + proc = subprocess.run( + [str(update_script), "--dry-run", "--testonly-json", json.dumps(data)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + cwd=git.cwd, + ) + + if proc.returncode != expected_rc: + raise RuntimeError( + f"Wrong return code:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}" + ) + + if expected_output not in proc.stdout: + raise RuntimeError( + f"Missing {expected_output}:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}" + ) + + # Missing expected tvm-ci/branch test + run( + statuses=[ + { + "context": "test", + "state": "SUCCESS", + } + ], + expected_rc=1, + expected_output="No good commits found in the last 1 commits", + ) + + # Only has the right passing test + run( + statuses=[ + { + "context": "tvm-ci/branch", + "state": "SUCCESS", + } + ], + expected_rc=0, + expected_output="Found last good commit: 123: hello", + ) + + # Check with many statuses + run( + statuses=[ + { + "context": "tvm-ci/branch", + "state": "SUCCESS", + }, + { + "context": "tvm-ci/branch2", + "state": "SUCCESS", + }, + { + "context": "tvm-ci/branch3", + "state": "FAILED", + }, + ], + expected_rc=1, + expected_output="No good commits found in the last 1 commits", + ) + run( + statuses=[ + { + "context": "tvm-ci/branch", + "state": "SUCCESS", + }, + { + "context": "tvm-ci/branch2", + "state": "SUCCESS", + }, + { + "context": "tvm-ci/branch3", + "state": "SUCCESS", + }, + ], + expected_rc=0, + expected_output="Found last good commit: 123: hello", + ) + + def test_skip_ci(tmpdir_factory): skip_ci_script = REPO_ROOT / "tests" / "scripts" / "git_skip_ci.py" diff --git a/tests/scripts/git_utils.py b/tests/scripts/git_utils.py index 530abe8029a6e..0885907130013 100644 --- a/tests/scripts/git_utils.py +++ b/tests/scripts/git_utils.py @@ -88,8 +88,10 @@ def parse_remote(remote: str) -> Tuple[str, str]: return m.groups() -def git(command): +def git(command, **kwargs): command = ["git"] + command print("Running", command) - proc = subprocess.run(command, stdout=subprocess.PIPE, check=True) - return proc.stdout.decode().strip() + proc = subprocess.run(command, stdout=subprocess.PIPE, encoding="utf-8", **kwargs) + if proc.returncode != 0: + raise RuntimeError(f"Command failed {command}:\nstdout:\n{proc.stdout}") + return proc.stdout.strip() diff --git a/tests/scripts/update_branch.py b/tests/scripts/update_branch.py new file mode 100755 index 0000000000000..8f25587422178 --- /dev/null +++ b/tests/scripts/update_branch.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import tempfile +from typing import Any, Dict + +from git_utils import git, GitHubRepo, parse_remote + + +_commit_query_fields = """ + messageHeadline + oid + statusCheckRollup { + contexts(last:100) { + nodes { + ... on CheckRun { + conclusion + status + name + checkSuite { + workflowRun { + workflow { + name + } + } + } + } + ... on StatusContext { + context + state + } + } + } + } +""" + + +def commits_query(user: str, repo: str, cursor: str = None): + """ + Create a GraphQL query to find the last N commits along with their statuses + and some metadata (paginated after 'cursor') + """ + after = "" + if cursor is not None: + after = f', after:"{cursor}"' + + return f""" + {{ + repository(name: "{repo}", owner: "{user}") {{ + defaultBranchRef {{ + target {{ + ... on Commit {{ + history(first: 15{after}) {{ + edges {{ cursor }} + nodes {{ + {_commit_query_fields} + }} + }} + }} + }} + }} + }} + }} + """ + + +def commit_passed_ci(commit: Dict[str, Any]) -> bool: + """ + Returns true if all of a commit's statuses are SUCCESS + """ + statuses = commit["statusCheckRollup"]["contexts"]["nodes"] + + # GitHub Actions statuses are different from external GitHub statuses, so + # unify them into 1 representation + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads + unified_statuses = [] + for status in statuses: + if "context" in status: + # Parse non-GHA status + unified_statuses.append((status["context"], status["state"] == "SUCCESS")) + else: + # Parse GitHub Actions item + workflow = status["checkSuite"]["workflowRun"]["workflow"]["name"] + name = f"{workflow} / {status['name']}" + unified_statuses.append((name, status["conclusion"] == "SUCCESS")) + + print(f"Statuses on {commit['oid']}:", json.dumps(unified_statuses, indent=2)) + + # Assert that specific jobs are present in the commit statuses (i.e. don't + # approve if CI was broken and didn't schedule a job) + expected_jobs = {"tvm-ci/branch"} + job_names = {name for name, status in unified_statuses} + for job in expected_jobs: + if job not in job_names: + # Did not find expected job name + return False + + passed_ci = all(status for name, status in unified_statuses) + return passed_ci + + +def update_branch(user: str, repo: str, sha: str, branch_name: str) -> None: + git(["fetch", "origin", sha]) + git(["reset", "--hard", "FETCH_HEAD"]) + try: + git(["branch", "-D", branch_name]) + except RuntimeError: + # Ignore failures (i.e. the branch did not exist in the first place) + pass + git(["checkout", "-b", branch_name]) + + # Create and push the branch + git(["push", "origin", "--force", branch_name]) + print(f"Pushed branch {branch_name} with commit {sha}") + + +if __name__ == "__main__": + help = "Push the a branch to the last commit that passed all CI runs" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--remote", default="origin", help="ssh remote to parse") + parser.add_argument("--dry-run", action="store_true", help="don't submit to GitHub") + parser.add_argument("--branch", default="last-successful", help="branch name") + parser.add_argument( + "--testonly-json", help="(testing) data to use instead of fetching from GitHub" + ) + args = parser.parse_args() + + remote = git(["config", "--get", f"remote.{args.remote}.url"]) + user, repo = parse_remote(remote) + # TODO: Remove this before landing + user, repo = ("apache", "tvm") + + if args.testonly_json: + r = json.loads(args.testonly_json) + else: + github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) + q = commits_query(user, repo) + r = github.graphql(q) + + commits = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"] + + # Limit GraphQL pagination + MAX_COMMITS_TO_CHECK = 50 + i = 0 + + while i < MAX_COMMITS_TO_CHECK: + # Check each commit + for commit in commits: + if commit_passed_ci(commit): + print(f"Found last good commit: {commit['oid']}: {commit['messageHeadline']}") + if not args.dry_run: + update_branch( + user=user, + repo=repo, + sha=commit["oid"], + branch_name=args.branch, + ) + # Nothing to do after updating the branch, exit early + exit(0) + + # No good commit found, proceed to next page of results + edges = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"] + if len(edges) == 0: + break + else: + q = commits_query(user, repo, cursor=edges[-1]["cursor"]) + r = github.graphql(q) + commits = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"] + + # Backstop to prevent looking through all the past commits + i += len(commits) + + print(f"No good commits found in the last {len(commits)} commits") + exit(1)