Add CI testing for purposeful YAML failures. (#29099)

This should catch cases when for some reason we are _not_ running the YAML tests right, and tests that should fail do not fail.
project-chip · Apr 24, 2024 · 4643074 · 4643074
1 parent 9760ad1
commit 4643074
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 6 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -214,6 +214,21 @@ jobs:
  --bridge-app ./out/linux-x64-bridge-${BUILD_VARIANT}/chip-bridge-app \
  "
 
+ - name: Run purposeful failure tests using the python parser sending commands to chip-tool
+ run: |
+ ./scripts/run_in_build_env.sh \
+ "./scripts/tests/run_test_suite.py \
+ --runner chip_tool_python \
+ --include-tags PURPOSEFUL_FAILURE \
+ --chip-tool ./out/linux-x64-chip-tool${CHIP_TOOL_VARIANT}-${BUILD_VARIANT}/chip-tool \
+ run \
+ --iterations 1 \
+ --expected-failures 1 \
+ --keep-going \
+ --test-timeout-seconds 120 \
+ --all-clusters-app ./out/linux-x64-all-clusters-${BUILD_VARIANT}/chip-all-clusters-app \
+ "
+
  - name: Run Tests using chip-repl (skip slow)
  if: github.event_name == 'pull_request'
  run: |
@@ -225,6 +240,7 @@ jobs:
  --exclude-tags IN_DEVELOPMENT \
  --exclude-tags EXTRA_SLOW \
  --exclude-tags SLOW \
+ --exclude-tags PURPOSEFUL_FAILURE \
  run \
  --iterations 1 \
  --test-timeout-seconds 120 \
@@ -337,6 +353,21 @@ jobs:
  --bridge-app ./out/darwin-x64-bridge-${BUILD_VARIANT}/chip-bridge-app \
  "
 
+ - name: Run purposeful failure tests using the python parser sending commands to chip-tool
+ run: |
+ ./scripts/run_in_build_env.sh \
+ "./scripts/tests/run_test_suite.py \
+ --runner chip_tool_python \
+ --include-tags PURPOSEFUL_FAILURE \
+ --chip-tool ./out/darwin-x64-chip-tool${CHIP_TOOL_VARIANT}-${BUILD_VARIANT}/chip-tool \
+ run \
+ --iterations 1 \
+ --expected-failures 1 \
+ --keep-going \
+ --test-timeout-seconds 120 \
+ --all-clusters-app ./out/darwin-x64-all-clusters-${BUILD_VARIANT}/chip-all-clusters-app \
+ "
+
  - name: Uploading core files
  uses: actions/upload-artifact@v3
  if: ${{ failure() && !env.ACT }}

diff --git a/scripts/tests/chiptest/__init__.py b/scripts/tests/chiptest/__init__.py
@@ -197,6 +197,13 @@ def _GetChipReplUnsupportedTests() -> Set[str]:
  }
 
 
+def _GetPurposefulFailureTests() -> Set[str]:
+ """Tests that fail in YAML on purpose."""
+ return {
+ "TestPurposefulFailureEqualities.yaml"
+ }
+
+
 def _AllYamlTests():
  yaml_test_suite_path = Path(_YAML_TEST_SUITE_PATH)
 
@@ -270,6 +277,7 @@ def _AllFoundYamlTests(treat_repl_unsupported_as_in_development: bool, use_short
  extra_slow_tests = _GetExtraSlowTests()
  in_development_tests = _GetInDevelopmentTests()
  chip_repl_unsupported_tests = _GetChipReplUnsupportedTests()
+ purposeful_failure_tests = _GetPurposefulFailureTests()
 
  for path in _AllYamlTests():
  if not _IsValidYamlTest(path.name):
@@ -291,6 +299,9 @@ def _AllFoundYamlTests(treat_repl_unsupported_as_in_development: bool, use_short
  if path.name in in_development_tests:
  tags.add(TestTag.IN_DEVELOPMENT)
 
+ if path.name in purposeful_failure_tests:
+ tags.add(TestTag.PURPOSEFUL_FAILURE)
+
  if treat_repl_unsupported_as_in_development and path.name in chip_repl_unsupported_tests:
  tags.add(TestTag.IN_DEVELOPMENT)
 

diff --git a/scripts/tests/chiptest/test_definition.py b/scripts/tests/chiptest/test_definition.py
@@ -219,6 +219,7 @@ class TestTag(Enum):
  IN_DEVELOPMENT = auto() # test may not pass or undergoes changes
  CHIP_TOOL_PYTHON_ONLY = auto() # test uses YAML features only supported by the CHIP_TOOL_PYTHON runner.
  EXTRA_SLOW = auto() # test uses Sleep and is generally _very_ slow (>= 60s is a typical threshold)
+ PURPOSEFUL_FAILURE = auto() # test fails on purpose
 
  def to_s(self):
  for (k, v) in TestTag.__members__.items():

diff --git a/scripts/tests/run_test_suite.py b/scripts/tests/run_test_suite.py
@@ -174,7 +174,8 @@ def main(context, dry_run, log_level, target, target_glob, target_skip_glob,
  TestTag.MANUAL,
  TestTag.IN_DEVELOPMENT,
  TestTag.FLAKY,
- TestTag.EXTRA_SLOW
+ TestTag.EXTRA_SLOW,
+ TestTag.PURPOSEFUL_FAILURE,
  }
 
  if runtime != TestRunTime.CHIP_TOOL_PYTHON:
@@ -273,9 +274,19 @@ def cmd_list(context):
  default=None,
  type=int,
  help='If provided, fail if a test runs for longer than this time')
+@click.option(
+ '--expected-failures',
+ type=int,
+ default=0,
+ show_default=True,
+ help='Number of tests that are expected to fail in each iteration. Overall test will pass if the number of failures matches this. Nonzero values require --keep-going')
 @click.pass_context
 def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, ota_requestor_app,
- tv_app, bridge_app, chip_repl_yaml_tester, chip_tool_with_python, pics_file, keep_going, test_timeout_seconds):
+ tv_app, bridge_app, chip_repl_yaml_tester, chip_tool_with_python, pics_file, keep_going, test_timeout_seconds, expected_failures):
+ if expected_failures != 0 and not keep_going:
+ logging.exception(f"'--expected-failures {expected_failures}' used without '--keep-going'")
+ sys.exit(2)
+
  runner = chiptest.runner.Runner()
 
  paths_finder = PathsFinder()
@@ -327,8 +338,14 @@ def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, o
  apps_register = AppsRegister()
  apps_register.init()
 
+ def cleanup():
+ apps_register.uninit()
+ if sys.platform == 'linux':
+ chiptest.linux.ShutdownNamespaceForTestExecution()
+
  for i in range(iterations):
  logging.info("Starting iteration %d" % (i+1))
+ observed_failures = 0
  for test in context.obj.tests:
  if context.obj.include_tags:
  if not (test.tags & context.obj.include_tags):
@@ -357,13 +374,17 @@ def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, o
  test_end = time.monotonic()
  logging.exception('%-30s - FAILED in %0.2f seconds' %
  (test.name, (test_end - test_start)))
+ observed_failures += 1
  if not keep_going:
- apps_register.uninit()
+ cleanup()
  sys.exit(2)
 
- apps_register.uninit()
- if sys.platform == 'linux':
- chiptest.linux.ShutdownNamespaceForTestExecution()
+ if observed_failures != expected_failures:
+ logging.exception(f'Iteration {i}: expected failure count {expected_failures}, but got {observed_failures}')
+ cleanup()
+ sys.exit(2)
+
+ cleanup()
 
 
 # On linux, allow an execution shell to be prepared

diff --git a/src/app/tests/suites/TestPurposefulFailureEqualities.yaml b/src/app/tests/suites/TestPurposefulFailureEqualities.yaml
@@ -0,0 +1,44 @@
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Test that purposefully fails in EqualityCommands
+
+config:
+ nodeId: 0x12344321
+ cluster: "EqualityCommands"
+ endpoint: 1
+
+tests:
+ - label: "Wait for the commissioned device to be retrieved"
+ cluster: "DelayCommands"
+ command: "WaitForCommissionee"
+ arguments:
+ values:
+ - name: "nodeId"
+ value: nodeId
+
+ - label:
+ "Compute the result of comparing 0 to 1 and claim that they are equal"
+ command: "UnsignedNumberEquals"
+ arguments:
+ values:
+ - name: "Value1"
+ value: 0
+ - name: "Value2"
+ value: 1
+ response:
+ - values:
+ - name: "Equals"
+ # This is the wrong value on purpose, so this test will fail.
+ value: true