From b0ef2fc0171dbc83d039ca4efcd0305dce15f1dc Mon Sep 17 00:00:00 2001
From: Florin Blanaru <florin.blanaru96@gmail.com>
Date: Thu, 25 Aug 2022 16:43:06 +0100
Subject: [PATCH] [CI] Assert some unittests are not skipped in CI (#12436)

This PR adds a script that does a diff of skipped tests between the latest successful build on the main and the current branch. Then, it posts a comment with the report on the open PR.

#11670
---
 .github/workflows/tests_bot.yml               |  21 ++
 tests/python/ci/test_ci.py                    | 179 ++++++++++++
 tests/scripts/github_skipped_tests_comment.py | 256 ++++++++++++++++++
 3 files changed, 456 insertions(+)
 create mode 100644 .github/workflows/tests_bot.yml
 create mode 100755 tests/scripts/github_skipped_tests_comment.py
diff --git a/.github/workflows/tests_bot.yml b/.github/workflows/tests_bot.yml
new file mode 100644
index 000000000000..e9d7d81375e4
--- /dev/null
+++ b/.github/workflows/tests_bot.yml
@@ -0,0 +1,21 @@
+
+name: tests-bot
+on:
+  status
+jobs:
+  run-tests-bot:
+    if: ${{ github.repository == 'apache/tvm' && github.event.state == 'success' && github.event.context == 'tvm-ci/pr-head' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Comment skipped tests
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.CI_RESOURCES_AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_RESOURCES_AWS_SECRET_ACCESS_KEY }}
+          AWS_DEFAULT_REGION: us-west-2
+          COMMIT_SHA: ${{ github.event.sha }}
+          TARGET_URL: ${{ github.event.target_url }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -eux
+          python tests/scripts/github_skipped_tests_comment.py
\ No newline at end of file
diff --git a/tests/python/ci/test_ci.py b/tests/python/ci/test_ci.py
index 1e2008fdd7ba..c45a0d8d8ee0 100644
--- a/tests/python/ci/test_ci.py
+++ b/tests/python/ci/test_ci.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """Test various CI scripts and GitHub Actions workflows"""
+import shutil
 import subprocess
 import json
 import textwrap
@@ -33,6 +34,184 @@ def parameterize_named(*values):
     return pytest.mark.parametrize(",".join(keys), [tuple(d.values()) for d in values])
 
 
+# pylint: disable=line-too-long
+TEST_DATA_SKIPPED_BOT = {
+    "found-diff": {
+        "main_xml_file": "unittest/file1.xml",
+        "main_xml_content": """<?xml version="1.0" encoding="utf-8"?>
+                <testsuites>
+                    <testsuite errors="0" failures="0" hostname="13e7c5f749d8" name="python-unittest-gpu-0-shard-1-ctypes" skipped="102"
+                               tests="165" time="79.312" timestamp="2022-08-10T22:39:36.673781">
+                        <testcase classname="ctypes.tests.python.unittest.test_auto_scheduler_search_policy"
+                                  name="test_sketch_search_policy_cuda_rpc_runner" time="9.679">
+                        </testcase>
+                    </testsuite>
+                </testsuites>
+                """,
+        "pr_xml_file": "unittest/file2.xml",
+        "pr_xml_content": """<?xml version="1.0" encoding="utf-8"?>
+                <testsuites>
+                    <testsuite errors="0" failures="0" hostname="13e7c5f749d8" name="python-unittest-gpu-0-shard-1-ctypes" skipped="102"
+                               tests="165" time="79.312" timestamp="2022-08-10T22:39:36.673781">
+                        <testcase classname="ctypes.tests.python.unittest.test_auto_scheduler_search_policy"
+                                  name="test_sketch_search_policy_cuda_rpc_runner" time="9.679">
+                            <skipped message="This test is skipped" type="pytest.skip">
+                                Skipped
+                            </skipped>
+                        </testcase>
+                        <testcase classname="ctypes.tests.python.unittest.test_roofline"
+                                  name="test_estimate_peak_bandwidth[cuda]" time="4.679">
+                            <skipped message="This is another skippe test" type="pytest.skip">
+                                Skipped
+                            </skipped>
+                        </testcase>
+                    </testsuite>
+                </testsuites>
+                """,
+        "target_url": "https://ci.tlcpack.ai/job/tvm/job/PR-11594/3/display/redirect",
+        "s3_prefix": "tvm-jenkins-artifacts-prod",
+        "jenkins_prefix": "ci.tlcpack.ai",
+        "common_main_build": """{"build_number": "4115", "state": "success"}""",
+        "commit_sha": "SHA",
+        "expected_url": "issues/11594/comments",
+        "expected_body": """<!---skipped-tests-comment-->\n\nThe list below shows some tests that ran in main SHA but were skipped in the CI build of SHA:\n```\nunittest -> ctypes.tests.python.unittest.test_auto_scheduler_search_policy#test_sketch_search_policy_cuda_rpc_runner\nunittest -> ctypes.tests.python.unittest.test_roofline#test_estimate_peak_bandwidth[cuda]\n```\nA detailed report of ran tests is [here](https://ci.tlcpack.ai/job/tvm/job/PR-11594/3/testReport/).""",
+    },
+    "no-diff": {
+        "main_xml_file": "unittest/file1.xml",
+        "main_xml_content": """<?xml version="1.0" encoding="utf-8"?>
+                <testsuites>
+                    <testsuite errors="0" failures="0" hostname="13e7c5f749d8" name="python-unittest-gpu-0-shard-1-ctypes" skipped="102"
+                               tests="165" time="79.312" timestamp="2022-08-10T22:39:36.673781">
+                        <testcase classname="ctypes.tests.python.unittest.test_auto_scheduler_search_policy"
+                                  name="test_sketch_search_policy_cuda_rpc_runner" time="9.679">
+                            <skipped message="This test is skipped" type="pytest.skip">
+                                Skipped
+                            </skipped>
+                        </testcase>
+                    </testsuite>
+                </testsuites>
+                """,
+        "pr_xml_file": "unittest/file2.xml",
+        "pr_xml_content": """<?xml version="1.0" encoding="utf-8"?>
+                <testsuites>
+                    <testsuite errors="0" failures="0" hostname="13e7c5f749d8" name="python-unittest-gpu-0-shard-1-ctypes" skipped="102"
+                               tests="165" time="79.312" timestamp="2022-08-10T22:39:36.673781">
+                        <testcase classname="ctypes.tests.python.unittest.test_auto_scheduler_search_policy"
+                                  name="test_sketch_search_policy_cuda_rpc_runner" time="9.679">
+                            <skipped message="This test is skipped" type="pytest.skip">
+                                Skipped
+                            </skipped>
+                        </testcase>
+                    </testsuite>
+                </testsuites>
+                """,
+        "target_url": "https://ci.tlcpack.ai/job/tvm/job/PR-11594/3/display/redirect",
+        "s3_prefix": "tvm-jenkins-artifacts-prod",
+        "jenkins_prefix": "ci.tlcpack.ai",
+        "common_main_build": """{"build_number": "4115", "state": "success"}""",
+        "commit_sha": "SHA",
+        "expected_url": "issues/11594/comments",
+        "expected_body": """<!---skipped-tests-comment-->\n\nNo additional skipped tests found in this branch for commit SHA.""",
+    },
+    "unable-to-run": {
+        "main_xml_file": "unittest/file1.xml",
+        "main_xml_content": """<?xml version="1.0" encoding="utf-8"?>
+                    <testsuites>
+                    </testsuites>
+                    """,
+        "pr_xml_file": "unittest/file2.xml",
+        "pr_xml_content": """<?xml version="1.0" encoding="utf-8"?>
+                    <testsuites>
+                    </testsuites>
+                    """,
+        "target_url": "https://ci.tlcpack.ai/job/tvm/job/PR-11594/3/display/redirect",
+        "s3_prefix": "tvm-jenkins-artifacts-prod",
+        "jenkins_prefix": "ci.tlcpack.ai",
+        "common_main_build": """{"build_number": "4115", "state": "failed"}""",
+        "commit_sha": "SHA",
+        "expected_url": "issues/11594/comments",
+        "expected_body": """<!---skipped-tests-comment-->\n\nUnable to run tests bot because main failed to pass CI at SHA.""",
+    },
+}
+# pylint: enable=line-too-long
+
+
+@tvm.testing.skip_if_wheel_test
+@pytest.mark.parametrize(
+    [
+        "main_xml_file",
+        "main_xml_content",
+        "pr_xml_file",
+        "pr_xml_content",
+        "target_url",
+        "s3_prefix",
+        "jenkins_prefix",
+        "common_main_build",
+        "commit_sha",
+        "expected_url",
+        "expected_body",
+    ],
+    [tuple(d.values()) for d in TEST_DATA_SKIPPED_BOT.values()],
+    ids=TEST_DATA_SKIPPED_BOT.keys(),
+)
+# pylint: enable=line-too-long
+def test_skipped_tests_comment(
+    tmpdir_factory,
+    main_xml_file,
+    main_xml_content,
+    pr_xml_file,
+    pr_xml_content,
+    target_url,
+    s3_prefix,
+    jenkins_prefix,
+    common_main_build,
+    commit_sha,
+    expected_url,
+    expected_body,
+):
+    """
+    Test that a comment with a link to the docs is successfully left on PRs
+    """
+    skipped_tests_script = REPO_ROOT / "tests" / "scripts" / "github_skipped_tests_comment.py"
+
+    def write_xml_file(root_dir, xml_file, xml_content):
+        shutil.rmtree(root_dir, ignore_errors=True)
+        file = root_dir / xml_file
+        file.parent.mkdir(parents=True)
+        with open(file, "w") as f:
+            f.write(textwrap.dedent(xml_content))
+
+    git = TempGit(tmpdir_factory.mktemp("tmp_git_dir"))
+    git.run("init")
+    git.run("checkout", "-b", "main")
+    git.run("remote", "add", "origin", "https://github.com/apache/tvm.git")
+
+    pr_test_report_dir = Path(git.cwd) / "pr-reports"
+    write_xml_file(pr_test_report_dir, pr_xml_file, pr_xml_content)
+    main_test_report_dir = Path(git.cwd) / "main-reports"
+    write_xml_file(main_test_report_dir, main_xml_file, main_xml_content)
+
+    proc = subprocess.run(
+        [
+            str(skipped_tests_script),
+            "--dry-run",
+            f"--s3-prefix={s3_prefix}",
+            f"--jenkins-prefix={jenkins_prefix}",
+            f"--common-main-build={common_main_build}",
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        env={"TARGET_URL": target_url, "COMMIT_SHA": commit_sha},
+        encoding="utf-8",
+        cwd=git.cwd,
+        check=False,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}")
+
+    assert f"Dry run, would have posted {expected_url} with data {expected_body}." in proc.stderr
+
+
 @tvm.testing.skip_if_wheel_test
 @pytest.mark.parametrize(
     "target_url,base_url,commit_sha,expected_url,expected_body",
diff --git a/tests/scripts/github_skipped_tests_comment.py b/tests/scripts/github_skipped_tests_comment.py
new file mode 100755
index 000000000000..ef0630620b97
--- /dev/null
+++ b/tests/scripts/github_skipped_tests_comment.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import json
+import os
+import logging
+import argparse
+import subprocess
+import sys
+from urllib import error
+from xml.etree import ElementTree
+
+import requests
+
+from git_utils import git, GitHubRepo, parse_remote
+from cmd_utils import init_log
+
+SKIPPED_TESTS_COMMENT_MARKER = "<!---skipped-tests-comment-->\n\n"
+GITHUB_ACTIONS_BOT_LOGIN = "github-actions[bot]"
+
+PR_TEST_REPORT_DIR = "pr-reports"
+MAIN_TEST_REPORT_DIR = "main-reports"
+
+
+def run_subprocess(command):
+    logging.info(f"Running command {command}")
+    proc = subprocess.run(command, shell=True, stdout=subprocess.PIPE, encoding="utf-8")
+    if proc.returncode != 0:
+        raise RuntimeError(f"Command failed {command}:\nstdout:\n{proc.stdout}")
+    return proc
+
+
+def retrieve_test_report(s3_url, target_dir):
+    command = f"aws s3 cp {s3_url} {target_dir} --recursive"
+    run_subprocess(command)
+
+
+def get_common_commit_sha():
+    command = "git merge-base origin/main HEAD"
+    proc = run_subprocess(command)
+    return proc.stdout.strip()
+
+
+def get_main_jenkins_build_number(github, common_commit):
+    json = github.get(f"commits/{common_commit}/status")
+    for status in reversed(json["statuses"]):
+        if status["context"] != "tvm-ci/branch":
+            continue
+        state = status["state"]
+        target_url = str(status["target_url"])
+        build_number = (
+            target_url[target_url.find("job/main") : len(target_url)]
+            .strip("job/main/")
+            .strip("/display/redirect")
+        )
+        assert build_number.isdigit()
+        return {"build_number": build_number, "state": state}
+    raise RuntimeError(f"Failed to find main build number for commit {common_commit}")
+
+
+def retrieve_test_reports(common_main_build, pr_number, build_number, s3_prefix):
+    cur_build_s3_link = (
+        f"s3://{s3_prefix}/tvm/PR-{str(pr_number)}/{str(build_number)}/pytest-results"
+    )
+    retrieve_test_report(cur_build_s3_link, PR_TEST_REPORT_DIR)
+
+    common_build_s3_link = f"s3://{s3_prefix}/tvm/main/{common_main_build}/pytest-results"
+    retrieve_test_report(common_build_s3_link, MAIN_TEST_REPORT_DIR)
+
+
+def get_pr_and_build_numbers(target_url):
+    target_url = target_url[target_url.find("PR-") : len(target_url)]
+    split = target_url.split("/")
+    pr_number = split[0].strip("PR-")
+    build_number = split[1]
+    return {"pr_number": pr_number, "build_number": build_number}
+
+
+def build_test_set(directory):
+    subdir_to_skipped = {}
+    subdirs = [
+        item for item in os.listdir(directory) if os.path.isdir(os.path.join(directory, item))
+    ]
+    for subdir in subdirs:
+        subdir_to_skipped[subdir] = set()
+        for root, _, files in os.walk(directory + "/" + subdir):
+            for file in files:
+                test_report = ElementTree.parse(root + "/" + file)
+                for testcase in test_report.iter("testcase"):
+                    skipped = testcase.find("skipped")
+                    if skipped is not None:
+                        key = testcase.attrib["classname"] + "#" + testcase.attrib["name"]
+                        subdir_to_skipped[subdir].add(key)
+    return subdir_to_skipped
+
+
+def to_node_name(dir_name: str):
+    return dir_name.replace("_", ": ", 1)
+
+
+def build_comment(
+    common_commit_sha,
+    common_main_build,
+    skipped_list,
+    pr_number,
+    build_number,
+    commit_sha,
+    jenkins_prefix,
+):
+    if common_main_build["state"] != "success":
+        return f"{SKIPPED_TESTS_COMMENT_MARKER}Unable to run tests bot because main failed to pass CI at {common_commit_sha}."
+
+    if len(skipped_list) == 0:
+        return f"{SKIPPED_TESTS_COMMENT_MARKER}No additional skipped tests found in this branch for commit {commit_sha}."
+
+    text = (
+        f"{SKIPPED_TESTS_COMMENT_MARKER}The list below shows some tests that ran in main {common_commit_sha} but were "
+        f"skipped in the CI build of {commit_sha}:\n"
+        f"```\n"
+    )
+    for skip in skipped_list:
+        text += skip + "\n"
+    text += (
+        f"```\nA detailed report of ran tests is [here](https://{jenkins_prefix}/job/tvm/job/PR-{str(pr_number)}"
+        f"/{str(build_number)}/testReport/)."
+    )
+    return text
+
+
+def get_pr_comments(github, url):
+    try:
+        return github.get(url)
+    except error.HTTPError as e:
+        logging.exception(f"Failed to retrieve PR comments: {url}: {e}")
+        return []
+
+
+def search_for_docs_comment(comments):
+    for comment in comments:
+        if (
+            comment["user"]["login"] == GITHUB_ACTIONS_BOT_LOGIN
+            and SKIPPED_TESTS_COMMENT_MARKER in comment["body"]
+        ):
+            return comment
+    return None
+
+
+if __name__ == "__main__":
+    help = (
+        "Compares the skipped tests of this PR against the last successful build on main. Also comments on the PR "
+        "issue when tests are skipped in this PR and not on main."
+    )
+    parser = argparse.ArgumentParser(description=help)
+    parser.add_argument("--remote", default="origin", help="ssh remote to parse")
+    parser.add_argument("--s3-prefix", default="tvm-jenkins-artifacts-prod")
+    parser.add_argument("--jenkins-prefix", default="ci.tlcpack.ai")
+    parser.add_argument("--common-main-build")
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        default=False,
+        help="run but don't send any request to GitHub",
+    )
+    args = parser.parse_args()
+    init_log()
+
+    remote = git(["config", "--get", f"remote.{args.remote}.url"])
+    user, repo = parse_remote(remote)
+
+    target_url = os.environ["TARGET_URL"]
+    pr_and_build = get_pr_and_build_numbers(target_url)
+
+    commit_sha = os.environ["COMMIT_SHA"]
+
+    if not args.dry_run:
+        github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo)
+        common_commit_sha = get_common_commit_sha()
+        common_main_build = get_main_jenkins_build_number(github, common_commit_sha)
+        retrieve_test_reports(
+            common_main_build=common_main_build["build_number"],
+            pr_number=pr_and_build["pr_number"],
+            build_number=pr_and_build["build_number"],
+            s3_prefix=args.s3_prefix,
+        )
+    else:
+        assert args.common_main_build is not None
+        common_main_build = json.loads(args.common_main_build)
+        common_commit_sha = os.environ["COMMIT_SHA"]
+
+    main_tests = build_test_set(MAIN_TEST_REPORT_DIR)
+    build_tests = build_test_set(PR_TEST_REPORT_DIR)
+
+    skipped_list = []
+    for subdir, skipped_set in build_tests.items():
+        skipped_main = main_tests[subdir]
+        if skipped_main is None:
+            logging.warning(f"Could not find directory {subdir} in main.")
+            continue
+
+        diff_set = skipped_set - skipped_main
+        if len(diff_set) != 0:
+            for test in diff_set:
+                skipped_list.append(f"{to_node_name(subdir)} -> {test}")
+
+    # Sort the list to maintain an order in the output. Helps when validating the output in tests.
+    skipped_list.sort()
+
+    if len(skipped_list) == 0:
+        logging.info("No skipped tests found.")
+
+    body = build_comment(
+        common_commit_sha,
+        common_main_build,
+        skipped_list,
+        pr_and_build["pr_number"],
+        pr_and_build["build_number"],
+        commit_sha,
+        args.jenkins_prefix,
+    )
+    url = f'issues/{pr_and_build["pr_number"]}/comments'
+    if not args.dry_run:
+        # For now, only comment for PRs open by driazati, gigiblender and areusch.
+        get_pr_url = f'pulls/{pr_and_build["pr_number"]}'
+        pull_request_body = github.get(get_pr_url)
+        author = pull_request_body["user"]["login"]
+        if author not in ["driazati", "gigiblender", "areusch"]:
+            logging.info(f"Skipping this action for user {author}")
+            sys.exit(0)
+
+        pr_comments = get_pr_comments(github, url)
+        comment = search_for_docs_comment(pr_comments)
+
+        if comment is not None:
+            comment_url = comment["url"]
+            comment_id = comment_url[comment_url.find("comments/") : len(comment_url)].strip(
+                "comments/"
+            )
+            github.patch(f"issues/comments/{comment_id}", {"body": body})
+        else:
+            github.post(url, {"body": body})
+    else:
+        logging.info(f"Dry run, would have posted {url} with data {body}.")