diff --git a/.github/workflows/run-individual-script-tests.yml b/.github/workflows/run-individual-script-tests.yml new file mode 100644 index 0000000000..04a709c974 --- /dev/null +++ b/.github/workflows/run-individual-script-tests.yml @@ -0,0 +1,33 @@ +# This workflow will run configured tests for any updated CM scripts +name: Individual CM script Tests + +on: + pull_request: + branches: [ "main", "mlperf-inference", "dev" ] + paths: + - 'script/**_cm.json' + - 'script/**_cm.yml' + +jobs: + run-script-tests: + runs-on: ubuntu-latest + steps: + - name: 'Checkout' + uses: actions/checkout@v4 + with: + fetch-depth: 2 + - name: Get changed files + id: getfile + run: | + git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }} + git fetch upstream + echo "files=$(git diff upstream/${{ github.event.pull_request.base.ref }} --name-only | xargs)" >> $GITHUB_OUTPUT + - name: RUN Script Tests + run: | + echo ${{ steps.getfile.outputs.files }} + for file in ${{ steps.getfile.outputs.files }}; do + echo $file + done + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + python3 tests/script/process_tests.py ${{ steps.getfile.outputs.files }} diff --git a/.github/workflows/test-cm-based-ubmission-generation.yml b/.github/workflows/test-cm-based-ubmission-generation.yml new file mode 100644 index 0000000000..4fc6c0bc45 --- /dev/null +++ b/.github/workflows/test-cm-based-ubmission-generation.yml @@ -0,0 +1,42 @@ +# This workflow will test the submission generation capability of CM f + +name: CM based Submission Generation + +on: + pull_request: + branches: [ "main", "dev", "mlperf-inference" ] + paths: + - '.github/workflows/test-submission-generation-non-cm-based-benchmarks.yml' + # - '**' # kept on for all the path instead of submission generation CM script so that this could help in trapping any bugs in any recent submission checker modification also + # - '!**.md' +jobs: + Case-3: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: [ "3.12" ] + division: ["closed", "open"] + category: ["datacenter", "edge"] + exclude: + - os: macos-latest + - os: windows-latest + - division: "open" + - category: "edge" + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + - name: Pull repo where test cases are uploaded + run: | + cm pull repo anandhu-eng@inference --checkout=submission-generation-tests + - name: Submission generation(model_mapping.json not present but model name is matching with the official one in submission checker) - ${{ matrix.category }} ${{ matrix.division }} + run: | + cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=$HOME/CM/repos/anandhu-eng@inference/case-3/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --category=${{ matrix.category }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet diff --git a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml index ed73c7ad03..597121fb0d 100644 --- a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml +++ b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml @@ -5,7 +5,7 @@ name: MLPerf loadgen with HuggingFace bert onnx fp32 squad model on: pull_request: - branches: [ "main", "dev" ] + branches: [ "main", "dev", "mlperf-inference" ] paths: - '.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml' - '**' @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.12" ] + python-version: [ "3.10", "3.12" ] steps: - uses: actions/checkout@v3 @@ -30,7 +30,6 @@ jobs: run: | python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - cm run script --quiet --tags=get,sys-utils-cm - name: Test MLPerf loadgen with HuggingFace bert onnx fp32 squad model run: | - cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx --quiet + cm run script --tags=python,app,loadgen-generic,_onnxruntime,_custom,_huggingface,_model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1 --quiet diff --git a/.github/workflows/test-nvidia-mlperf-implementation.yml b/.github/workflows/test-nvidia-mlperf-implementation.yml index e51634c468..0691a58604 100644 --- a/.github/workflows/test-nvidia-mlperf-implementation.yml +++ b/.github/workflows/test-nvidia-mlperf-implementation.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "04 18 * * *" #to be adjusted + - cron: "19 11 * * *" #to be adjusted jobs: build_nvidia: diff --git a/automation/script/module.py b/automation/script/module.py index 183f7c48c0..ce509b75cc 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -2425,10 +2425,46 @@ def test(self, i): alias = meta.get('alias','') uid = meta.get('uid','') - if console: logging.info(path) - logging.info(' Test: TBD') + test_config = meta.get('tests', '') + if test_config: + logging.info(test_config) + test_all_variations = test_config.get('test-all-variations', False) + use_docker = test_config.get('use_docker', False) + if test_all_variations: + variations = meta.get("variations") + individual_variations = [ v for v in variations if variations[v].get('group', '') == '' and str(variations[v].get('exclude-in-test', '')).lower() not in [ "1", "true", "yes" ] ] + tags_string = ",".join(meta.get("tags")) + for variation in individual_variations: + run_tags = f"{tags_string},_{variation}" + if use_docker: + docker_images = test_config.get('docker_images', [ "ubuntu-22.04" ]) + for docker_image in docker_images: + ii = {'action':'docker', + 'automation':'script', + 'tags': run_tags, + 'quiet': i.get('quiet'), + 'docker_image': docker_image, + 'docker_image_name': alias + } + if i.get('docker_cm_repo', '') != '': + ii['docker_cm_repo'] = i['docker_cm_repo'] + if i.get('docker_cm_repo_branch', '') != '': + ii['docker_cm_repo_branch'] = i['docker_cm_repo_branch'] + + r = self.cmind.access(ii) + if r['return'] > 0: + return r + else: + r = self.cmind.access({'action':'run', + 'automation':'script', + 'tags': run_tags, + 'quiet': i.get('quiet') }) + if r['return'] > 0: + return r + + logging.info(' Test: WIP') return {'return':0, 'list': lst} diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 92c8a5f717..c4e508c352 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1692,7 +1692,7 @@ def docker(i): env=i.get('env', {}) noregenerate_docker_file = i.get('docker_noregenerate', False) - norecreate_docker_image = i.get('docker_norecreate', False) + norecreate_docker_image = i.get('docker_norecreate', True) if i.get('docker_skip_build', False): noregenerate_docker_file = True diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index 34d6c97b95..f82893e39a 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -15,7 +15,7 @@ def fill_from_json(file_path, keys, sut_info): with open(file_path, 'r') as f: data = json.load(f) for key in keys: - if key in data and sut_info[key] is None: + if key in data and (sut_info[key] is None or sut_info[key] == "default"): sut_info[key] = data[key] elif key in data and sut_info[key] != data[key]: return -1 # error saying there is a mismatch in the value of a key @@ -149,7 +149,8 @@ def generate_submission(i): "implementation": None, "device": None, "framework": None, - "run_config": None + "framework_version": "default", + "run_config": "default" } # variable to store the system meta model_mapping_combined = {} # to store all the model mapping related to an SUT @@ -209,6 +210,7 @@ def generate_submission(i): implementation = sut_info["implementation"] device = sut_info["device"] framework = sut_info["framework"].replace(" ","_") + framework_version = sut_info["framework_version"] run_config = sut_info["run_config"] new_res = f"{system}-{implementation}-{device}-{framework}-{run_config}" else: @@ -234,7 +236,7 @@ def generate_submission(i): system_meta_default['framework'] = framework + " " + framework_version else: print(parts) - return {'return': 1, 'error': f"The required details for generating the inference submission:\n1.system_name\n2.implementation\n3.framework\n4.run_config\nInclude a cm-sut-info.json file with the above content in {result_path}"} + return {'return': 1, 'error': f"The required details for generating the inference submission:\n1.hardware_name\n2.implementation\n3.Device\n4.framework\n5.framework_version\n6.run_config\nInclude a cm-sut-info.json or sut-info.json file with the above content in {result_path}"} platform_prefix = inp.get('platform_prefix', '') if platform_prefix: diff --git a/script/get-generic-sys-util/_cm.json b/script/get-generic-sys-util/_cm.json index a1f9d09918..08df073dfa 100644 --- a/script/get-generic-sys-util/_cm.json +++ b/script/get-generic-sys-util/_cm.json @@ -721,5 +721,12 @@ } } } + }, + "tests": { + "test-all-variations": "yes", + "use_docker": "yes", + "docker_images": [ + "ubuntu-22.04" + ] } } diff --git a/script/get-generic-sys-util/run.sh b/script/get-generic-sys-util/run.sh index 27c2f62867..5074e756b8 100644 --- a/script/get-generic-sys-util/run.sh +++ b/script/get-generic-sys-util/run.sh @@ -1,6 +1,19 @@ -#!/bin/bash +# Safe execution of a command stored in a variable +cmd="${CM_SYS_UTIL_INSTALL_CMD}" +echo "$cmd" -cmd=${CM_SYS_UTIL_INSTALL_CMD} -echo $cmd -eval $cmd -test $? -eq 0 || exit $? +# Execute the command and capture the exit status directly +if ! eval "$cmd"; then + echo "Command failed with status $?" + if [[ "${CM_TMP_FAIL_SAFE}" == 'yes' ]]; then + # Exit safely if fail-safe is enabled + echo "Fail-safe is enabled, exiting with status 0" + exit 0 + else + # Otherwise exit with the actual error status + exit $? + fi +else + #echo "Command succeeded" + exit 0 +fi diff --git a/script/get-ml-model-huggingface-zoo/download_model.py b/script/get-ml-model-huggingface-zoo/download_model.py index 4e6e9c86e8..87f9b25aeb 100644 --- a/script/get-ml-model-huggingface-zoo/download_model.py +++ b/script/get-ml-model-huggingface-zoo/download_model.py @@ -28,8 +28,7 @@ model_filenames = model_filename.split(',') if ',' in model_filename else [model_filename] - # First must be model - base_model_filename = model_filenames[0] + base_model_filepath = None files = [] if full_subfolder!='': @@ -93,15 +92,17 @@ def list_hf_files(path): xrevision = None if revision == '' else revision xsubfolder = None if subfolder == '' else subfolder - hf_hub_download(repo_id=model_stub, + downloaded_path = hf_hub_download(repo_id=model_stub, subfolder=xsubfolder, filename=model_filename, - force_filename=model_filename, revision=xrevision, cache_dir=os.getcwd()) + print(downloaded_path) + if not base_model_filepath: + base_model_filepath = downloaded_path print ('') with open('tmp-run-env.out', 'w') as f: - f.write(f"CM_ML_MODEL_FILE_WITH_PATH={os.path.join(os.getcwd(),base_model_filename)}") + f.write(f"CM_ML_MODEL_FILE_WITH_PATH={base_model_filepath}") diff --git a/script/get-platform-details/_cm.json b/script/get-platform-details/_cm.json index 11c57baa18..b02576ad9e 100644 --- a/script/get-platform-details/_cm.json +++ b/script/get-platform-details/_cm.json @@ -66,7 +66,10 @@ "linux" ] }, - "tags": "get,sys-util,generic,_linux-tools" + "tags": "get,sys-util,generic,_linux-tools", + "env": { + "CM_TMP_FAIL_SAFE": "yes" + } } ], "tags": [ diff --git a/script/get-platform-details/run.sh b/script/get-platform-details/run.sh index d3fc7e0d37..ba2194e669 100644 --- a/script/get-platform-details/run.sh +++ b/script/get-platform-details/run.sh @@ -79,7 +79,7 @@ echo "------------------------------------------------------------" >> $OUTPUT_F echo "14. cpupower frequency-info" >> $OUTPUT_FILE eval "cpupower frequency-info" >> $OUTPUT_FILE -test $? -eq 0 || exit $? +test $? -eq 0 || echo "FAILED: cpupower frequency-info" >> $OUTPUT_FILE echo "------------------------------------------------------------" >> $OUTPUT_FILE echo "15. sysctl" >> $OUTPUT_FILE @@ -120,7 +120,7 @@ echo "------------------------------------------------------------" >> $OUTPUT_F echo "21. dmidecode" >> $OUTPUT_FILE if [[ ${CM_SUDO_USER} == "yes" ]]; then eval "${CM_SUDO} dmidecode" >> $OUTPUT_FILE - test $? -eq 0 || exit $? + test $? -eq 0 || echo "FAILED: dmidecode" >> $OUTPUT_FILE else echo "Requires SUDO permission" >> $OUTPUT_FILE fi @@ -129,7 +129,7 @@ echo "------------------------------------------------------------" >> $OUTPUT_F echo "22. BIOS" >> $OUTPUT_FILE if [[ ${CM_SUDO_USER} == "yes" ]]; then eval "${CM_SUDO} dmidecode -t bios" >> $OUTPUT_FILE - test $? -eq 0 || exit $? + test $? -eq 0 || echo "FAILED: dmidecode -t bios" >> $OUTPUT_FILE else echo "Requires SUDO permission" >> $OUTPUT_FILE fi diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 20f8138bad..db7ef766ab 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -62,6 +62,7 @@ def preprocess(i): if len(output_split) > 1 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '')).lower() in [ "1", "true", "yes" ]: #container exists out = output_split[1].split(" ") existing_container_id = out[0] + print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id else: @@ -302,7 +303,7 @@ def update_docker_info(env): if env.get('CM_DOCKER_IMAGE_NAME', '') != '': docker_image_name = env['CM_DOCKER_IMAGE_NAME'] else: - docker_image_name = 'cm-script-'+env['CM_DOCKER_RUN_SCRIPT_TAGS'].replace(',', '-').replace('_','-') + docker_image_name = 'cm-script-'+env['CM_DOCKER_RUN_SCRIPT_TAGS'].replace(',', '-').replace('_','-').replace('+','plus') env['CM_DOCKER_IMAGE_NAME'] = docker_image_name docker_image_tag_extra = env.get('CM_DOCKER_IMAGE_TAG_EXTRA', '-latest') diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 629bc3a254..f0e1c02f5f 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -202,7 +202,7 @@ def preprocess(i): docker_extra_input[k] = inp[k] inp = {} if str(docker_dt).lower() in ["yes", "true", "1"]: - env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'yes' + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' # turning it off for the first run and after that we turn it on if env.get('CM_DOCKER_IMAGE_NAME', '') != '': docker_extra_input['docker_image_name'] = env['CM_DOCKER_IMAGE_NAME'] @@ -256,6 +256,7 @@ def preprocess(i): print(f"\nStop Running loadgen scenario: {scenario} and mode: {mode}") return {'return': 0} # We run commands interactively inside the docker container else: + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'yes' container_id = env_copy['CM_DOCKER_CONTAINER_ID'] env['CM_DOCKER_CONTAINER_ID'] = container_id if state.get('docker', {}): diff --git a/tests/script/process_tests.py b/tests/script/process_tests.py new file mode 100644 index 0000000000..59c3b67694 --- /dev/null +++ b/tests/script/process_tests.py @@ -0,0 +1,25 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files=sys.argv[1:] + +for file in files: + print(file) + if not os.path.isfile(file) or not "script" in file: + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + uid = data['uid'] + + r = cm.access({'action':'test', 'automation':'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con'}) + checks.check_return(r)