iree-org
diff --git a/‎linalg_ops/attention/generate_e2e_attention_tests.py
+1-2 b/‎linalg_ops/attention/generate_e2e_attention_tests.py
+1-2
diff --git a/‎linalg_ops/convolution/generate_e2e_conv2d_tests.py
+1-2 b/‎linalg_ops/convolution/generate_e2e_conv2d_tests.py
+1-2
diff --git a/‎linalg_ops/matmul/generate_e2e_matmul_tests.py
+1-2 b/‎linalg_ops/matmul/generate_e2e_matmul_tests.py
+1-2
diff --git a/‎sharktank_models/action.yml
+13-2 b/‎sharktank_models/action.yml
+13-2
diff --git a/‎sharktank_models/benchmarks/README.md
+13-3 b/‎sharktank_models/benchmarks/README.md
+13-3
diff --git a/‎sharktank_models/benchmarks/conftest.py
+45-14 b/‎sharktank_models/benchmarks/conftest.py
+45-14
diff --git a/‎sharktank_models/benchmarks/external_test_files/sdxl_pipeline_bench_f16.mlir
-23 b/‎sharktank_models/benchmarks/external_test_files/sdxl_pipeline_bench_f16.mlir
-23
diff --git a/‎sharktank_models/benchmarks/model_benchmark_run.py
+6-5 b/‎sharktank_models/benchmarks/model_benchmark_run.py
+6-5
diff --git a/‎sharktank_models/benchmarks/sdxl/clip_rocm.json
+8-8 b/‎sharktank_models/benchmarks/sdxl/clip_rocm.json
+8-8
diff --git a/‎sharktank_models/benchmarks/sdxl/e2e_rocm.json
-44 b/‎sharktank_models/benchmarks/sdxl/e2e_rocm.json
-44
diff --git a/‎sharktank_models/benchmarks/sdxl/punet_int8_fp16_rocm.json
-35 b/‎sharktank_models/benchmarks/sdxl/punet_int8_fp16_rocm.json
-35
diff --git a/‎sharktank_models/benchmarks/sdxl/punet_int8_fp8_rocm.json
-35 b/‎sharktank_models/benchmarks/sdxl/punet_int8_fp8_rocm.json
-35
@@ -4,8 +4,7 @@
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-"""Generator for e2e attention tests.
-"""
+"""Generator for e2e attention tests."""
 
 import argparse
 import enum
 
@@ -4,8 +4,7 @@
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-"""Generator for e2e conv2d tests.
-"""
+"""Generator for e2e conv2d tests."""
 
 from typing import Optional
 import argparse
 
@@ -4,8 +4,7 @@
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-"""iree_generated_e2e_matmul_test generator for e2e matmul tests.
-"""
+"""iree_generated_e2e_matmul_test generator for e2e matmul tests."""
 
 import argparse
 import enum
 
@@ -19,6 +19,12 @@ inputs:
     description: "Type of target to test"
   GPU:
     description: "Type of GPU to test"
+  EXTERNAL_TEST_FILE_DIRECTORY:
+    description: "The directory of test files and configurations"
+  QUALITY_TEST_DIRECTORY:
+    description: "The directory of quality tests"
+  BENCHMARK_TEST_DIRECTORY:
+    description: "The directory of benchmark tests"
 
 runs:
   using: "composite"
@@ -67,7 +73,9 @@ runs:
           --log-cli-level=info \
           --durations=0 \
           --timeout=1200 \
-          --capture=no
+          --capture=no \
+          --test-file-directory=${{ inputs.QUALITY_TEST_DIRECTORY }} \
+          --external-file-directory=${{ inputs.EXTERNAL_TEST_FILE_DIRECTORY }}
       env:
         ROCM_CHIP: ${{ inputs.CHIP }}
         SKU: ${{ inputs.SKU }}
@@ -82,7 +90,10 @@ runs:
           ${GITHUB_ACTION_PATH}/sharktank_models/benchmarks \
           --log-cli-level=info \
           --retries=7 \
-          --timeout=600 
+          --timeout=600 \
+          --test-file-directory=${{ inputs.BENCHMARK_TEST_DIRECTORY }} \
+          --external-file-directory=${{ inputs.EXTERNAL_TEST_FILE_DIRECTORY }}
+          
         echo "$(<${GITHUB_ACTION_PATH}/job_summary.md )" >> $GITHUB_STEP_SUMMARY
       env:
         ROCM_CHIP: ${{ inputs.CHIP }}
 
@@ -3,14 +3,24 @@
 ### Adding your own model
 
 - To add your own model, create a directory under `benchmarks` and add JSON files that correspond to the submodels and chip. Please follow the [JSON file schema in this README file](#required-and-optional-fields-for-the-json-model-file)
+- Please refer to the sample file `sdxl/clip_rocm.json`
 
 ### How to run the benchmark tests
 
 ```
+# Retrieving test files and external test files
+git clone https://github.com/iree-org/iree.git
+export PATH_TO_TESTS=iree/tests/external/iree-test-suites/sharktank_models/benchmarks
+export PATH_TO_EXTERNAL_FILES=iree/build_tools/pkgci/external_test_suite
+
+# running benchmark tests
+git clone https://github.com/iree-org/iree-test-suites.git
 pytest sharktank_models/benchmarks/ \
     --log-cli-level=info \
     --timeout=600 \
-    --retries=7 
+    --retries=7 \
+    --test-file-directory=${PATH_TO_TESTS} \
+    --external-file-directory=${PATH_TO_EXTERNAL_FILES}
 ```
 
 ### Required and optional fields for the JSON model file
@@ -21,7 +31,7 @@ pytest sharktank_models/benchmarks/ \
 | compilation_required             | optional | boolean | If true, this will let the benchmark test know that it needs to compile a file                                               |
 | compiled_file_name               | optional | string  | When the compilation occurs, this will be the file name                                                                      |
 | compile_flags                    | optional | array   | An array of compiler flag options                                                                                            |
-| mlir_file_path                   | optional | string  | Path to where the mlir file to compile is                                                                                    |
+| mlir_file_name                   | optional | string  | The name of the MLIR file                                                                                                    |
 | modules                          | optional | array   | Specific to e2e, add modules here to include in the benchmarking test                                                        |
 | function_run                     | required | string  | The function that the `iree-benchmark-module` will run adnd benchmark                                                        |
 | benchmark_repetitions            | required | float   | The number of times the benchmark tests will repeat                                                                          |
@@ -31,7 +41,7 @@ pytest sharktank_models/benchmarks/ \
 | golden_time_ms                   | optional | object  | An object of golden times, where the key is the sku and the value is the golden time in ms, (ex: `{"mi250": 100}`)           |
 | golden_dispatch                  | optional | object  | An object of golden dispatches, where the key is the sku and the value is the golden dispatch count, (ex: `{"mi250": 1602}`) |
 | golden_size                      | optional | object  | An object of golden sizes, where the key is the sku and the value is the golden size in bytes, (ex: `{"mi250": 2000000}`)    |
-| specific_chip_to_ignore     | optional | array   | An array of chip values, where the benchmark tests will ignore the chips specified                                           |
+| specific_chip_to_ignore          | optional | array   | An array of chip values, where the benchmark tests will ignore the chips specified                                           |
 | real_weights_file_name           | optional | string  | If real weights is a different file name, specify it here in order to get the correct real weights file                      |
 
 Please feel free to look at any JSON examples under a model directory (ex: sdxl)
@@ -19,14 +19,43 @@
 logger = logging.getLogger(__name__)
 
 
+def pytest_addoption(parser):
+    parser.addoption(
+        "--test-file-directory",
+        action="store",
+        help="The directory of benchmark test JSON files to build and run test cases",
+    )
+
+    parser.addoption(
+        "--external-file-directory",
+        action="store",
+        help="The directory of external test files (ex: E2E MLIR, tuner files)",
+    )
+
+
 def pytest_sessionstart(session):
+    logger.info("Pytest benchmark test session is starting")
     with open("job_summary.md", "a") as job_summary, open(
         "job_summary.json", "w+"
     ) as content:
         print(f"{sku.upper()} Complete Benchmark Summary:\n", file=job_summary)
         json.dump({}, content)
 
-    logger.info("Pytest benchmark test session is starting")
+    # Collect all .json files for benchmark tests
+    session.config.benchmark_test_files = []
+    path_of_benchmark_tests = Path(session.config.getoption("test_file_directory"))
+    test_files = sorted(path_of_benchmark_tests.glob("**/*.json"))
+    session.config.benchmark_test_files.extend(test_files)
+
+    # Keeping track of all external test files and their paths
+    session.config.external_test_files = {}
+    path_of_external_test_files = Path(
+        session.config.getoption("external_file_directory")
+    )
+    external_files = sorted(path_of_external_test_files.glob("*"))
+    for external_file in external_files:
+        file_name = external_file.name
+        session.config.external_test_files[file_name] = external_file
 
 
 def pytest_sessionfinish(session, exitstatus):
@@ -66,30 +95,32 @@ def pytest_sessionfinish(session, exitstatus):
 
 
 def pytest_collect_file(parent, file_path):
-    if (
-        file_path.suffix == ".json"
-        and "job_summary" not in file_path.name
-        and "benchmarks" in str(THIS_DIR)
-    ):
+    # Run only the benchmark test for this directory
+    if "model_benchmark_run" in str(file_path):
         return SharkTankModelBenchmarkTests.from_parent(parent, path=file_path)
 
 
 @dataclass(frozen=True)
 class BenchmarkTestSpec:
     model_name: str
     benchmark_file_name: str
+    file_path: Path
+    external_test_files: dict
 
 
 class SharkTankModelBenchmarkTests(pytest.File):
     def collect(self):
-        path = str(self.path).split("/")
-        benchmark_file_name = path[-1].replace(".json", "")
-        model_name = path[-2]
+        for file_path in self.config.benchmark_test_files:
+            benchmark_file_name = file_path.stem
+            model_name = str(file_path.parent)
 
-        item_name = f"{model_name} :: {benchmark_file_name}"
+            item_name = f"{model_name} :: {benchmark_file_name}"
 
-        spec = BenchmarkTestSpec(
-            model_name=model_name, benchmark_file_name=benchmark_file_name
-        )
+            spec = BenchmarkTestSpec(
+                model_name=model_name,
+                benchmark_file_name=benchmark_file_name,
+                file_path=file_path,
+                external_test_files=self.config.external_test_files,
+            )
 
-        yield ModelBenchmarkRunItem.from_parent(self, name=item_name, spec=spec)
+            yield ModelBenchmarkRunItem.from_parent(self, name=item_name, spec=spec)
@@ -91,15 +91,13 @@ def __init__(self, spec, **kwargs):
         super().__init__(**kwargs)
         self.spec = spec
         self.model_name = self.spec.model_name
+        self.file_path = self.spec.file_path
         self.benchmark_file_name = self.spec.benchmark_file_name
-        SUBMODEL_FILE_PATH = (
-            THIS_DIR / f"{self.model_name}/{self.benchmark_file_name}.json"
-        )
         split_file_name = self.benchmark_file_name.split("_")
         self.submodel_name = "_".join(split_file_name[:-1])
         type_of_backend = split_file_name[-1]
 
-        with open(SUBMODEL_FILE_PATH, "r") as file:
+        with open(self.file_path, "r") as file:
             data = json.load(file)
 
             self.inputs = data.get("inputs", [])
@@ -124,7 +122,10 @@ def __init__(self, spec, **kwargs):
             # custom configurations related to e2e testing
             self.compilation_required = data.get("compilation_required", False)
             self.compiled_file_name = data.get("compiled_file_name")
-            self.mlir_file_path = data.get("mlir_file_path", "")
+            mlir_file_name = data.get("mlir_file_name", "")
+            external_test_files = self.spec.external_test_files
+            if mlir_file_name:
+                self.mlir_file_path = external_test_files.get(mlir_file_name, "")
             self.modules = data.get("modules", [])
             self.device = data.get("device")
 
 
@@ -18,18 +18,18 @@
         "mi308": 1.1
     },
     "golden_time_ms": {
-        "mi250": 14.5,
+        "mi250": 9.0,
         "mi300": 15.0,
-        "mi308": 15.0
+        "mi308": 12.0
     },
     "golden_dispatch": {
-        "mi250": 1139,
-        "mi300": 1139,
-        "mi308": 1139
+        "mi250": 794,
+        "mi300": 794,
+        "mi308": 794
     },
     "golden_size": {
-        "mi250": 860000,
-        "mi300": 860000,
-        "mi308": 860000
+        "mi250": 460000,
+        "mi300": 460000,
+        "mi308": 460000
     }
 }