From ff068789246bda540ba2eb39bf2abc5faac40fc6 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross@codeplay.com>
Date: Tue, 15 Oct 2024 14:35:26 +0100
Subject: [PATCH] Rewrite cts_exe.py to handle match logic

cts_exe.py has been rewritten to include the logic for handling
.match files. Specifically, it will divide the test space into
two halves; tests that are expected to pass, and those that are
expected to fail.

For the tests expected to pass, it will run them all in the same
gtest invocation with the assumption that it succeeds. For the
tests expected to fail, they will each be ran with individual
gtest invocations. This allows them to freely segfault or abort
without hurting other tests.

In this commit, the match files are (mostly) unchanged, and the
passing and failing tests should be the same. The match file is
treated as a list of failing tests with a few tokens that are
replaced:
* `{{NONDETERMINISTIC}}` ignored, required for compatibility with the
  match checker.
* `{{OPT}}` this test may or may not fail. It's still ran seperately,
  but doesn't report an error on failure.
* `{{.*}}` replaced with `*`; converts "match" wildcard matches to
  "gtest" test name matches.
* `#` and empty lines are ignored and treated as a comment.
* `{{Segmentation` for compatibility, this will cause a failure in
  the "excepted success" execution to not count as an error. This
  matches the behaviour of the prior match test logic.

Some .match files have been fixed and empty ones have been removed.

If GTEST_OUTPUT is specified, we assume that we are being run in
ctest_parser.py and don't do anything fancy.
---
 test/conformance/CMakeLists.txt               |  16 +-
 test/conformance/cts_exe.py                   | 215 +++++++++++++-----
 .../exp_command_buffer_adapter_cuda.match     |   0
 .../exp_command_buffer_adapter_hip.match      |   0
 ...xp_command_buffer_adapter_native_cpu.match |   4 +-
 .../queue/queue_adapter_level_zero.match      |   1 -
 6 files changed, 169 insertions(+), 67 deletions(-)
 mode change 100644 => 100755 test/conformance/cts_exe.py
 delete mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match
 delete mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match
 delete mode 100644 test/conformance/queue/queue_adapter_level_zero.match

diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt
index 9a122e36b4..1c717dd3cb 100644
--- a/test/conformance/CMakeLists.txt
+++ b/test/conformance/CMakeLists.txt
@@ -21,13 +21,13 @@ function(add_test_adapter name adapter backend)
     function(do_add_test tname env)
         if(${UR_CONFORMANCE_ENABLE_MATCH_FILES} AND EXISTS ${MATCH_FILE})
             add_test(NAME ${tname}
-                COMMAND ${CMAKE_COMMAND}
-                -D TEST_FILE=${Python3_EXECUTABLE}
-                -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${TEST_COMMAND}"
-                -D MODE=stdout
-                -D MATCH_FILE=${MATCH_FILE}
-                -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake
-                DEPENDS ${TEST_TARGET_NAME}
+                COMMAND ${Python3_EXECUTABLE} ${UR_CONFORMANCE_TEST_DIR}/cts_exe.py
+                    --failslist ${MATCH_FILE}
+                    --test_command ${PROJECT_BINARY_DIR}/bin/${TEST_TARGET_NAME}
+                    --
+                    --backend=${backend}
+                    --devices_count=${UR_TEST_DEVICES_COUNT}
+                    --platforms_count=${UR_TEST_PLATFORMS_COUNT}
                 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
             )
         else()
@@ -40,7 +40,7 @@ function(add_test_adapter name adapter backend)
         endif()
 
         if(UR_CONFORMANCE_ENABLE_MATCH_FILES)
-            list(APPEND env GTEST_COLOR=no)
+            list(APPEND env GTEST_COLOR=yes)
         endif()
         set_tests_properties(${tname} PROPERTIES
             ENVIRONMENT "${env}"
diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py
old mode 100644
new mode 100755
index 8b2e33d03b..b183b55d6e
--- a/test/conformance/cts_exe.py
+++ b/test/conformance/cts_exe.py
@@ -1,6 +1,6 @@
-#! /usr/bin/env python3
+#!/usr/bin/env python3
 """
- Copyright (C) 2023 Intel Corporation
+ Copyright (C) 2024 Intel Corporation
 
  Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
  See LICENSE.TXT
@@ -11,68 +11,171 @@
 # The match files contain tests that are expected to fail.
 
 import os
-import shlex
 import sys
-from argparse import ArgumentParser
+import argparse
 import subprocess  # nosec B404
-import signal
-import re
-from collections import OrderedDict
 
 
-def _print_cmdline(cmd_args, env, cwd, file=sys.stderr):
-    cwd = shlex.quote(cwd)
-    env_args = " ".join(
-        "%s=%s" % (shlex.quote(k), shlex.quote(v)) for k, v in env.items()
+def _ci():
+    return os.environ.get("CI") is not None
+
+
+def _color():
+    return sys.stdout.isatty() or os.environ.get("GTEST_COLOR").lower() == "yes"
+
+
+def _print_header(header, *args):
+    if _ci():
+        # GitHub CI interprets this as a "group header" and will provide buttons to fold/unfold it
+        print("##[group]{}".format(header.format(*args)))
+    elif _color():
+        # Inverse color
+        print("\033[7m{}\033[27m".format(header.format(*args)))
+    else:
+        print("### {}".format(header.format(*args)))
+
+
+def _print_end_header():
+    if _ci():
+        print("##[endgroup]")
+
+
+def _print_error(header, *args):
+    if _color():
+        # "!!!" on a red background
+        print("\033[41m!!!\033[0m {}".format(header.format(*args)))
+    else:
+        print("!!! {}".format(header.format(*args)))
+
+
+def _print_format(msg, *args):
+    print(msg.format(*args))
+
+
+def _print_environ(env):
+    _print_header("Environment")
+    for k, v in env.items():
+        _print_format("> {} = {}", k, v)
+    _print_end_header()
+
+
+def _check_filter(cmd, filter):
+    """
+    Checks that the filter matches at least one test for the given cmd
+    """
+    sys.stdout.flush()
+    check = subprocess.Popen(  # nosec B603
+        cmd + ["--gtest_list_tests"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.DEVNULL,
+        env=(os.environ | {"GTEST_FILTER": filter}),
     )
-    cmd_str = " ".join(map(shlex.quote, cmd_args))
-    print(f"### env -C {cwd} -i {env_args} {cmd_str}", file=file)
+    if not check.stdout.read(1):
+        return False
+    return True
 
 
-if __name__ == "__main__":
+def _run_cmd(cmd, comment, filter):
+    _print_header("Running suite for: {}", comment)
+    _print_format("### {}", " ".join(cmd))
+
+    # Check tests are found
+    if not _check_filter(cmd, filter):
+        _print_end_header()
+        _print_error("Could not find any tests with this filter")
+        return 2
 
-    parser = ArgumentParser()
+    sys.stdout.flush()
+    result = subprocess.call(  # nosec B603
+        cmd,
+        stdout=sys.stdout,
+        stderr=sys.stdout,
+        env=(os.environ | {"GTEST_FILTER": filter}),
+    )
+    _print_end_header()
+    return result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
     parser.add_argument("--test_command", help="Ctest test case")
-    parser.add_argument("--devices_count", type=str, help="Number of devices on which tests will be run")
-    parser.add_argument("--platforms_count", type=str, help="Number of platforms on which tests will be run")
-    parser.add_argument("--backend", type=str, help="Number of platforms on which tests will be run")
+    parser.add_argument("--failslist", type=str, help="Failure list")
+    parser.add_argument("--", dest="ignored", action="store_true")
+    parser.add_argument("rest", nargs=argparse.REMAINDER)
     args = parser.parse_args()
-    invocation = [
-        args.test_command,
-        "--gtest_brief=1",
-        f"--devices_count={args.devices_count}",
-        f"--platforms_count={args.platforms_count}",
-        f"--backend={args.backend}",
-    ]
-    _print_cmdline(invocation, os.environ, os.getcwd())
-
-    result = subprocess.Popen(  # nosec B603
-        invocation, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
-    )
 
-    pat = re.compile(r'\[( )*FAILED( )*\]')
-    output_list = []
-    test_cases = []
-    for line in result.stdout:
-        output_list.append(line)
-        if pat.search(line):
-            test_case = line.split(" ")[5]
-            test_case = test_case.rstrip(',')
-            test_cases.append(test_case)
-
-    # Every fail has a single corresponding match line but if there are multiple
-    # devices being tested there will be multiple lines with the same failure
-    # message. To avoid matching mismatch, remove lines that differ only by device ID.
-    test_cases = [re.sub(r'ID[0-9]ID', 'X', tc) for tc in test_cases]
-    test_cases = list(OrderedDict.fromkeys(test_cases))
-
-    for tc in test_cases:
-        print(tc)
-
-    rc = result.wait()
-    if rc < 0:
-        print(signal.strsignal(abs(rc)))
-
-    print("#### GTEST_OUTPUT ####", file=sys.stderr)
-    print(''.join(output_list), file=sys.stderr)
-    print("#### GTEST_OUTPUT_END ####", file=sys.stderr)
+    base_invocation = [args.test_command] + args.rest
+
+    if os.environ.get("GTEST_OUTPUT") is not None:
+        # We are being ran purely to generate an output file (likely for ctest_parser.py); falling back to just using
+        # one test execution
+        sys.exit(
+            subprocess.call(  # nosec B603
+                base_invocation, stdout=sys.stdout, stderr=sys.stderr
+            )
+        )
+
+    _print_environ(os.environ)
+
+    # Parse fails list
+    _print_format("Loading fails from {}", args.failslist)
+    fail_patterns = []
+    expected_fail = False
+    with open(args.failslist) as f:
+        for l in f:
+            optional = "{{OPT}}" in l
+            l = l.replace("{{OPT}}", "")
+            l = l.replace("{{.*}}", "*")
+
+            if l.startswith("{{Segmentation fault"):
+                expected_fail = True
+                continue
+            if l.startswith("#"):
+                continue
+            if l.startswith("{{NONDETERMINISTIC}}"):
+                continue
+            if l.strip() == "":
+                continue
+
+            fail_patterns.append(
+                {
+                    "pattern": l.strip(),
+                    "optional": optional,
+                }
+            )
+
+    _print_header("Known failing tests")
+    for fail in fail_patterns:
+        _print_format("> {}", fail)
+    _print_end_header()
+    if len(fail_patterns) == 0:
+        _print_error(
+            "Fail list is empty, if there are no more failures, please remove the file"
+        )
+        sys.exit(2)
+
+    final_result = 0
+
+    # First, run all the known good tests
+    gtest_filter = "-" + (":".join(map(lambda x: x["pattern"], fail_patterns)))
+    if _check_filter(base_invocation, gtest_filter):
+        result = _run_cmd(base_invocation, "known good tests", gtest_filter)
+        if result != 0 and not expected_fail:
+            _print_error("Tests we expected to pass have failed")
+            final_result = result
+    else:
+        _print_format("Note: No tests in this suite are expected to pass")
+
+    # Then run each known failing tests
+    for fail in fail_patterns:
+        result = _run_cmd(
+            base_invocation, "failing test {}".format(fail["pattern"]), fail["pattern"]
+        )
+
+        if result == 0 and not fail["optional"]:
+            _print_error(
+                "Test {} is passing when we expect it to fail!", fail["pattern"]
+            )
+            final_result = 1
+
+    sys.exit(final_result)
diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match
index e69646e18b..2ccc267535 100644
--- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match
+++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match
@@ -13,7 +13,7 @@
 {{OPT}}BufferFillCommandTest.OverrideArgList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
-{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_
+{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
@@ -33,6 +33,6 @@
 {{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/SYCL_NATIVE_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncUpdateTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
-{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_{{.*}}
+{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
diff --git a/test/conformance/queue/queue_adapter_level_zero.match b/test/conformance/queue/queue_adapter_level_zero.match
deleted file mode 100644
index 8b13789179..0000000000
--- a/test/conformance/queue/queue_adapter_level_zero.match
+++ /dev/null
@@ -1 +0,0 @@
-