From f576d52a5b576fef7034033da4e141a2cc745cfe Mon Sep 17 00:00:00 2001
From: nvitramble <84538536+nvitramble@users.noreply.github.com>
Date: Tue, 12 Jul 2022 09:30:58 -0700
Subject: [PATCH 1/5] Add new BERT calibration dataset (#1171)

---
 calibration/SQuAD-v1.1/README.md              |   7 +-
 ...tion.txt => bert_calibration_features.txt} |   0
 .../SQuAD-v1.1/bert_calibration_qas_ids.txt   | 100 ++++++++++++++++++
 3 files changed, 106 insertions(+), 1 deletion(-)
 rename calibration/SQuAD-v1.1/{bert-calibration.txt => bert_calibration_features.txt} (100%)
 create mode 100755 calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt

diff --git a/calibration/SQuAD-v1.1/README.md b/calibration/SQuAD-v1.1/README.md
index 5f0fad6d3..85b2513d7 100644
--- a/calibration/SQuAD-v1.1/README.md
+++ b/calibration/SQuAD-v1.1/README.md
@@ -1 +1,6 @@
-The calibration file has 100 randomly selected samples from dev-1.1.json, which contains 10570 samples in total.
+The integers in bert_calibration_features.txt correspond to 100 randomly selected indices in the list of features generated from dev-v1.1.json using [convert_examples_to_features()](https://github.com/mlcommons/inference/blob/master/language/bert/create_squad_data.py#L249) with a doc_stride of 128 and a max_seq_len of 384.
+
+The values in bert_calibration_qas_ids.txt correspond to 100 randomly selected qas ids in the dev-v1.1.json file.
+
+Please only use at most 1 calibration file from this folder for calibration.
+
diff --git a/calibration/SQuAD-v1.1/bert-calibration.txt b/calibration/SQuAD-v1.1/bert_calibration_features.txt
similarity index 100%
rename from calibration/SQuAD-v1.1/bert-calibration.txt
rename to calibration/SQuAD-v1.1/bert_calibration_features.txt
diff --git a/calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt b/calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt
new file mode 100755
index 000000000..5d6f45989
--- /dev/null
+++ b/calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt
@@ -0,0 +1,100 @@
+573020f7b2c2fd14005688fa
+56beb6f23aeaaa14008c92a1
+5737a5931c456719005744e9
+5725d79e89a1e219009abf91
+56e0d9e0231d4119001ac43f
+57281ab63acd2414000df496
+57269fab5951b619008f780b
+5726400589a1e219009ac5f0
+572fd264b2c2fd14005684aa
+56f85e71a6d7ea1400e175c4
+5728804b4b864d1900164a47
+57264cac708984140094c1b4
+5726bf135951b619008f7ceb
+5728848cff5b5019007da298
+572fbf21a23a5019007fc93b
+5727448b5951b619008f87a1
+5729e1101d04691400779641
+56e11afbcd28a01900c675c9
+5726642f5951b619008f7159
+56e08d32231d4119001ac2b1
+57265d86f1498d1400e8dd50
+56f7eddca6d7ea1400e172d9
+56de1645cffd8e1900b4b5d1
+5726a5525951b619008f78df
+56f851b1a6d7ea1400e1755e
+572a18a4af94a219006aa7e2
+57286bb84b864d19001649ca
+571bb2269499d21900609cab
+56d7251d0d65d214001983cc
+56f88eafaef2371900626194
+571cde695efbb31900334e16
+57294279af94a219006aa20a
+56bec98e3aeaaa14008c9457
+57269656708984140094cb01
+56be54bdacb8001400a50323
+571c9074dd7acb1400e4c100
+56f8b4d79b226e1400dd0e78
+5710f2e2a58dae1900cd6b73
+572683e6f1498d1400e8e24e
+56f7f2e0aef2371900625cb3
+572fadcbb2c2fd1400568329
+5725fabc89a1e219009ac12a
+5727aa413acd2414000de924
+56e77da237bdd419002c403d
+5729e2b76aef0514001550d2
+57265e11708984140094c3bd
+5726bf325951b619008f7d01
+57335fcad058e614000b5973
+572663a9f1498d1400e8ddf2
+57299ec43f37b3190047850f
+56f80e1daef2371900625d8d
+572689b6dd62a815002e8892
+57264a74708984140094c18c
+57274d1cdd62a815002e9ab2
+572871bc4b864d1900164a04
+56d7018a0d65d214001982c5
+57111713a58dae1900cd6c02
+56bebbbf3aeaaa14008c9317
+57300e2604bcaa1900d770b7
+56f8074faef2371900625d7a
+5727c94bff5b5019007d954b
+5727ffb5ff5b5019007d9a8d
+56e75d5037bdd419002c3ef8
+57273e50dd62a815002e9a05
+5729582b1d046914007792e4
+57290ee2af94a219006aa003
+57286ec63acd2414000df9d4
+572632ceec44d21400f3dc30
+5726f635dd62a815002e9658
+572a1f086aef0514001552c2
+57269344f1498d1400e8e440
+56bec6ac3aeaaa14008c93ff
+57283adcff5b5019007d9f96
+5733266d4776f41900660714
+5725d79e89a1e219009abf94
+57280f974b864d1900164372
+570960cf200fba1400367f04
+570d28bdb3d812140066d4a7
+56e1c0f6cd28a01900c67b2e
+56bec3153aeaaa14008c938b
+57284618ff5b5019007da0ac
+571c3e8cdd7acb1400e4c0a7
+5728fb6a1d04691400778ef6
+5726ef12dd62a815002e95a0
+57296f85af94a219006aa404
+572fe288a23a5019007fcadb
+5727500f708984140094dbff
+572fc659b2c2fd1400568449
+570d3468b3d812140066d545
+572a07c11d046914007796d5
+56e1fc57e3433e140042322c
+573098f38ab72b1400f9c5d5
+56e1b355e3433e14004230b2
+57280cac2ca10214002d9cac
+57287b4a4b864d1900164a2b
+56bf36b93aeaaa14008c9565
+5728202c4b864d19001644ec
+5728dab94b864d1900164f99
+57376a1bc3c5551400e51ec5
+57377083c3c5551400e51ee2

From fc02e9b19d1a3dc1b18cc425d6c2d7b25407ea96 Mon Sep 17 00:00:00 2001
From: Bruno Ferreira <bruno.mgf@gmail.com>
Date: Mon, 18 Jul 2022 16:31:27 +0100
Subject: [PATCH 2/5] Update CLA bot (#1180)

---
 .github/workflows/cla.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml
index 8880b7449..c0e1544d4 100644
--- a/.github/workflows/cla.yml
+++ b/.github/workflows/cla.yml
@@ -1,3 +1,4 @@
+
 name: "cla-bot"
 on:
   issue_comment:
@@ -12,20 +13,19 @@ jobs:
       - name: "MLCommons CLA bot check"
         if: (github.event.comment.body == 'recheck') || github.event_name == 'pull_request_target'
         # Alpha Release
-        uses: sub-mod/github-action@v3
+        uses: mlcommons/cla-bot@master
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           # the below token should have repo scope and must be manually added by you in the repository's secret
           PERSONAL_ACCESS_TOKEN : ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
         with:
           path-to-signatures: 'cla-bot/v1/cla.json'
-          path-to-document: 'https://github.com/mlcommons/systems/blob/main/mlcommons_cla.txt' # e.g. a CLA or a DCO document
           # branch should not be protected
           branch: 'main'
           allowlist: user1,bot*
           remote-organization-name: mlcommons
           remote-repository-name: systems
-
+          
          #below are the optional inputs - If the optional inputs are not given, then default values will be taken
           #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
           #remote-repository-name:  enter the  remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)

From ef8e58e1e612ee991ea537c02e289ae48e8414d2 Mon Sep 17 00:00:00 2001
From: georgelyuan <53881988+georgelyuan@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:40:43 -0700
Subject: [PATCH 3/5] removing obselete audit directory (#1188)

---
 compliance/audit_v0.5/nvidia/TEST01/README    |  61 -------
 .../audit_v0.5/nvidia/TEST01/audit.config     |  18 ---
 .../audit_v0.5/nvidia/TEST01/gnmt/README      |  25 ---
 .../TEST01/gnmt/create_accuracy_baseline.sh   |  19 ---
 .../audit_v0.5/nvidia/TEST01/truncate_log.py  |  51 ------
 .../audit_v0.5/nvidia/TEST01/truncate_log.sh  |  11 --
 .../nvidia/TEST01/verify_accuracy.py          | 103 ------------
 .../nvidia/TEST01/verify_performance.py       | 126 ---------------
 compliance/audit_v0.5/nvidia/TEST03/README    |  34 ----
 .../nvidia/TEST03/download_and_modify_gnmt.sh | 149 ------------------
 .../nvidia/TEST03/modify_gnmt_data.py         | 147 -----------------
 .../nvidia/TEST03/modify_image_data.py        | 120 --------------
 .../nvidia/TEST03/verify_performance.py       | 126 ---------------
 compliance/audit_v0.5/nvidia/TEST04-A/README  |  39 -----
 .../audit_v0.5/nvidia/TEST04-A/audit.config   |  16 --
 .../TEST04-A/verify_test4_performance.py      | 135 ----------------
 compliance/audit_v0.5/nvidia/TEST04-B/README  |   1 -
 .../audit_v0.5/nvidia/TEST04-B/audit.config   |  20 ---
 compliance/audit_v0.5/nvidia/TEST05/README    |  25 ---
 .../audit_v0.5/nvidia/TEST05/audit.config     |  22 ---
 .../nvidia/TEST05/verify_performance.py       | 126 ---------------
 21 files changed, 1374 deletions(-)
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/README
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/audit.config
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/gnmt/README
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/gnmt/create_accuracy_baseline.sh
 delete mode 100755 compliance/audit_v0.5/nvidia/TEST01/truncate_log.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/truncate_log.sh
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/verify_accuracy.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST01/verify_performance.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST03/README
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST03/download_and_modify_gnmt.sh
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST03/modify_gnmt_data.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST03/modify_image_data.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST03/verify_performance.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST04-A/README
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST04-A/audit.config
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST04-A/verify_test4_performance.py
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST04-B/README
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST04-B/audit.config
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST05/README
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST05/audit.config
 delete mode 100644 compliance/audit_v0.5/nvidia/TEST05/verify_performance.py

diff --git a/compliance/audit_v0.5/nvidia/TEST01/README b/compliance/audit_v0.5/nvidia/TEST01/README
deleted file mode 100644
index 1c6c6739b..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/README
+++ /dev/null
@@ -1,61 +0,0 @@
-The purpose of this test is to ensure that valid inferences are being performed in performance mode. By default,
-the inference result that is returned from SUT to Loadgen is not dumped to the accuracy JSON file and thus not
-checked for accuracy. In this test, a portion of the results are dumped to the accuracy JSON randomly with some
-chosen probability. This accuracy JSON file can then be checked against the accuracy JSON generated in accuracy
-mode.
-
-Note that under the MLPerf v0.5 inference rules, certain forms of non-determinism is acceptable, which can cause
-inference results to differ across runs. It is foreseeable that the results obtained during the accuracy run
-can be different from that obtained during the performance run, which will cause the accuracy checking script
-to report failure. Test failure will automatically result in an objection, but the objection can be overturned
-by comparing the quality of the results generated in performance mode to that obtained in accuracy mode. This 
-can be done by using the accuracy measurement scripts provided as part of the repo to ensure that the
-classification accuracy/mAP/BLEU score meets the target. An example is provided for GNMT in the gnmt folder.
-
-If performance with sampling enabled is lower than the submitted performance score, accuracy_log_probability in 
-config file can be reduced from 10 (%) to check that performance approaches reported score.
-
-Note that for high-performance machines, a logging probability of 10% can result in a massive number of logged
-results. To keep the size of the accuracy log file reasonable, accuracy_log_probability should be reduced to
-keep the total number of logged inferences on the order of ~1000 samples for SSD-Large, and ~10000 samples
-elsewhere. The probability setting can be calculated as follows:
-  accuracy_log_probability = 100% * (1000 or 10000) / <expected number of samples completed during performance run>
-Alternatively, if the accuracy log is too large, it can be truncated using the provided truncate_log.sh script
-before uploading results.
-
-UPDATE: An alternate script has been provided for reducing the accuracy log. Check truncate_log.py
-
-The mode is set assuming that an accuracy JSON in accuracy mode already exists (i.e. from the submission results)
-that can be used to for verification. If not, the mode should be changed from PerformanceOnly (mode=2) to 
-AccuracyOnly (mode=1) so that accuracy mode results can be generated.
-
-This test does not use custom dataset or weights.
-
-Instructions
-
-Part I
-Run test with provided audit.config.
-Note that audit.config must be copied to the directory where the benchmark is being run from.
-Verification that audit.config was properly read can be done by checking that the settings in the summary txt 
-file matches what is in audit.config.
-
-Part II
-The first check is to ensure that accuracy during performance portion of run matches that achieved during
-accuracy mode.
-  python verify_accuracy.py -a <path to accuracy mode accuracy json> -p <path to performance mode accuracy json>
-
-Expected outcome:
-  TEST PASS
-
-Part III
-The second check is to ensure that performance with accuracy logging enabled matches submission performance score.
-  
-  python verify_performance.py -r <submission mlperf_log_summary.txt> -t <this test's mlperf_log_summary.txt>
-
-Expected outcome:
-  TEST PASS
-  for sufficiently small but non-zero accuracy_log_probability
-
-Part IV
-Truncate logs to reduce accuracy log file size in preparation for uploading
-  bash ./truncate_log.sh <accuracy_log_file> <number of samples>
diff --git a/compliance/audit_v0.5/nvidia/TEST01/audit.config b/compliance/audit_v0.5/nvidia/TEST01/audit.config
deleted file mode 100644
index 3947cdf5f..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/audit.config
+++ /dev/null
@@ -1,18 +0,0 @@
-# The format of this config file is 'key = value'.
-# The key has the format 'model.scenario.key'. Value is mostly int64_t.
-# Model maybe '*' as wildcard. In that case the value applies to all models.
-# All times are in milli seconds
-
-# mode dictionary (0 = submission, 1 = accuracy, 2 = performance, 3 = find peak perf)
-*.MultiStream.mode = 2
-*.MultiStream.accuracy_log_rng_seed = 123
-*.MultiStream.accuracy_log_probability = 10
-*.Offline.mode = 2
-*.Offline.accuracy_log_rng_seed = 456
-*.Offline.accuracy_log_probability = 10
-*.SingleStream.mode = 2
-*.SingleStream.accuracy_log_rng_seed = 789
-*.SingleStream.accuracy_log_probability = 10
-*.Server.mode = 2
-*.Server.accuracy_log_rng_seed = 147
-*.Server.accuracy_log_probability = 10
diff --git a/compliance/audit_v0.5/nvidia/TEST01/gnmt/README b/compliance/audit_v0.5/nvidia/TEST01/gnmt/README
deleted file mode 100644
index 21d20bb8c..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/gnmt/README
+++ /dev/null
@@ -1,25 +0,0 @@
-In the case where performance mode results differ from accuracy mode results,
-which would automatically result in an objection being raised, the objection
-can be overturned by evaluating the quality of the results in the two modes
-and ensuring that accuracy is maintained.
-create_accuracy_baseline.sh attempts to facilitate this by using the perf
-mode results to create a baseline accuracy log from the accuracy mode results
-that contains the same subset of the full dataset as the provided performance
-mode log. This allows for an apples-to-apples comparison using the GNMT 
-accuracy checking script provided in the MLPerf inference repository.
-The scores should be reported to the result committee who will then review
-and evalute whether the objection will be permitted to be overturned.
-
-Note: You may see a python error about:
- module 'tensorflow' has no attribute 'gfile'
-in which case, replace tf.gfile.GFile with tf.io.gfile.GFile in process_accuracy.py
-Usage:
-1) bash ./create_accuracy_baseline.sh <accuracy_accuracy_log_file> <perf_accuracy_log_file>
-2) python inference/v0.5/translation/gnmt/tensorflow/process_accuracy.py \
-        --accuracy_log <generated baseline> \
-        --reference <GNMT dataset>/gnmt/newstest2014.tok.bpe.32000.de
-3) python inference/v0.5/translation/gnmt/tensorflow/process_accuracy.py \
-        --accuracy_log <perf_accuracy_log_file> \
-        --reference <GNMT dataset>/gnmt/newstest2014.tok.bpe.32000.de
-4) Upload accuracy logs and report BLEU scores
-
diff --git a/compliance/audit_v0.5/nvidia/TEST01/gnmt/create_accuracy_baseline.sh b/compliance/audit_v0.5/nvidia/TEST01/gnmt/create_accuracy_baseline.sh
deleted file mode 100644
index 7fe622efc..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/gnmt/create_accuracy_baseline.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-# Usage:
-# 1) bash ./create_accuracy_baseline.sh <accuracy_accuracy_log_file> <perf_accuracy_log_file>
-# 2) python inference/v0.5/translation/gnmt/tensorflow/process_accuracy.py <perf_accuracy_log_file>
-# 3) python inference/v0.5/translation/gnmt/tensorflow/process_accuracy.py on generated baseline
-# 4) Compare BLEU scores
-
-#!/bin/bash
-accuracy_log=$1
-perf_log=$2
-patterns="unique_patterns.txt"
-accuracy_baseline=$(basename -- "$accuracy_log")
-accuracy_baseline="${accuracy_baseline%.*}"_baseline.json
-
-cut -d ':' -f 2,3 ${perf_log} | cut -d ',' -f 2- | sort | uniq | grep qsl > ${patterns}
-echo '[' > ${accuracy_baseline}
-grep -f ${patterns} ${accuracy_log} >> ${accuracy_baseline}
-sed -i '$ s/,$/]/g' ${accuracy_baseline}
-rm ${patterns}
-echo "Created a baseline accuracy file: ${accuracy_baseline}"
diff --git a/compliance/audit_v0.5/nvidia/TEST01/truncate_log.py b/compliance/audit_v0.5/nvidia/TEST01/truncate_log.py
deleted file mode 100755
index 3c87b0e20..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/truncate_log.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2019 The MLPerf Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import os
-import sys
-import argparse
-import json
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('log')
-
-    return parser.parse_args()
-
-def main(args):
-
-    print('Load log from {0}'.format(args.log))
-    with open(args.log, 'r') as f:
-        results = json.load(f)
-
-    print('Processing log entries')
-    rmap = {}
-    truncated_results = []
-    for j in results:
-        idx = j['qsl_idx']
-        if idx in rmap and rmap[idx] == j['data']:
-            continue
-        else:
-            truncated_results.append(j)
-            if idx not in rmap:
-                rmap[idx] = j['data']
-    print('original: {0} => truncated: {1}'.format(len(results), len(truncated_results)))
-    
-    print('Write truncated log to {0}.new'.format(args.log))
-    with open(args.log+'.new', 'w') as f:
-        json.dump(truncated_results, f, indent=4)
-            
-if __name__ == '__main__':
-    main(parse_args())
diff --git a/compliance/audit_v0.5/nvidia/TEST01/truncate_log.sh b/compliance/audit_v0.5/nvidia/TEST01/truncate_log.sh
deleted file mode 100644
index 52073d41b..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/truncate_log.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-# Usage:
-# 1) bash ./truncate_log.sh <accuracy_log_file>
-
-#!/bin/bash
-log=$1
-samples=$2
-
-head -n $((samples + 1)) ${log} > ${log}.new
-sed -i '$ s/,$/]/g' ${log}.new
-rm ${log}
-mv ${log}.new ${log}
diff --git a/compliance/audit_v0.5/nvidia/TEST01/verify_accuracy.py b/compliance/audit_v0.5/nvidia/TEST01/verify_accuracy.py
deleted file mode 100644
index 91d0e8c2a..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/verify_accuracy.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#! /usr/bin/env python3
-import os
-import sys
-sys.path.append(os.getcwd())
-
-import argparse
-import json
-
-import numpy as np
-
-dtype_map = {
-    "byte": np.byte,
-    "float32": np.float32,
-    "int32": np.int32,
-    "int64": np.int64
-}
-
-def main():
-
-    py3 = sys.version_info >= (3,0)
-    # Parse arguments to identify the path to the accuracy logs from
-    #   the accuracy and performance runs
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--accuracy_log", "-a",
-        help="Specifies the path to the accuracy log from a submission/accuracy run.",
-        default=""
-    )
-    parser.add_argument(
-        "--performance_log", "-p",
-        help="Specifies the path to the accuracy log from a performance run with accuracy log sampling enabled.",
-        default=""
-    )
-    parser.add_argument(
-        "--dtype", default="byte", choices=["byte", "float32", "int32", "int64"], help="data type of the label")
-    args = parser.parse_args()
-
-    print("Verifying accuracy. This might take a while...")
-    acc_log  = args.accuracy_log
-    perf_log = args.performance_log
-    with open(acc_log, "r") as acc_json:
-        acc_data = json.load(acc_json)
-
-    with open(perf_log, "r") as perf_json:
-        perf_data = json.load(perf_json)
-
-    # read accuracy log json and create a dictionary of qsl_idx/data pairs
-    results_dict = {}
-    num_acc_log_duplicate_keys = 0
-    num_acc_log_data_mismatch = 0
-    num_perf_log_qsl_idx_match = 0
-    num_perf_log_data_mismatch = 0
-    num_missing_qsl_idxs = 0
-
-    print("Reading accuracy mode results...")
-    for sample in acc_data:
-        #print sample["qsl_idx"]
-        qsl_idx = sample["qsl_idx"]
-        data = sample["data"]
-        if data == '':
-            data = ""
-        if qsl_idx in results_dict.keys():
-            num_acc_log_duplicate_keys += 1
-            if results_dict[qsl_idx] != data:
-                num_acc_log_data_mismatch += 1
-        else:
-            results_dict[qsl_idx] = data
-
-    print("Reading performance mode results...")
-    for sample in perf_data:
-        qsl_idx = sample["qsl_idx"]
-        data = np.frombuffer(bytes.fromhex(sample['data']), dtype_map[args.dtype]) if py3 == True \
-            else np.frombuffer(bytearray.fromhex(sample['data']), dtype_map[args.dtype])
-
-        if qsl_idx in results_dict.keys():
-            num_perf_log_qsl_idx_match += 1
-            data_perf = np.frombuffer(bytes.fromhex(results_dict[qsl_idx]), dtype_map[args.dtype]) \
-                if py3 == True else np.frombuffer(bytearray.fromhex(results_dict[qsl_idx]), dtype_map[args.dtype])
-            if data_perf.size == 0 or data.size == 0:
-                if data_perf.size != data.size:
-                    num_perf_log_data_mismatch += 1
-            elif data[0] != data_perf[0]:
-                num_perf_log_data_mismatch += 1
-        else:
-            num_missing_qsl_idxs += 1
-
-        results_dict[sample["qsl_idx"]] = sample["data"]
-
-
-    print("num_acc_log_entries = {:}".format(len(acc_data)))
-    print("num_acc_log_duplicate_keys = {:}".format(num_acc_log_duplicate_keys))
-    print("num_acc_log_data_mismatch = {:}".format(num_acc_log_data_mismatch))
-    print("num_perf_log_entries = {:}".format(len(perf_data)))
-    print("num_perf_log_qsl_idx_match = {:}".format(num_perf_log_qsl_idx_match))
-    print("num_perf_log_data_mismatch = {:}".format(num_perf_log_data_mismatch))
-    print("num_missing_qsl_idxs = {:}".format(num_missing_qsl_idxs))
-    if num_perf_log_data_mismatch > 0 :
-        print("TEST FAIL\n");
-    else :
-        print("TEST PASS\n");
-
-if __name__ == '__main__':
-	main()
\ No newline at end of file
diff --git a/compliance/audit_v0.5/nvidia/TEST01/verify_performance.py b/compliance/audit_v0.5/nvidia/TEST01/verify_performance.py
deleted file mode 100644
index 09fcbfb91..000000000
--- a/compliance/audit_v0.5/nvidia/TEST01/verify_performance.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#! /usr/bin/env python3
-import os
-import sys
-import re
-sys.path.append(os.getcwd())
-
-import argparse
-import json
-
-def main():
-    # Parse arguments to identify the path to the accuracy logs from
-    #   the accuracy and performance runs
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--reference_summary", "-r",
-        help="Specifies the path to the summary log for TEST00.",
-        default=""
-    )
-    parser.add_argument(
-        "--test_summary", "-t",
-        help="Specifies the path to the summary log for this test.",
-        default=""
-    )
-    args = parser.parse_args()
-
-    print("Verifying performance.")
-    ref_file = open(args.reference_summary, "r")
-    test_file = open(args.test_summary, "r")
-    ref_score = 0
-    test_score = 0
-    ref_mode = ''
-    test_mode = ''
-
-    for line in ref_file:
-        if re.match("Scenario", line):
-            ref_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if ref_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Offline":
-            if re.match("Samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("Result is", line):
-            valid = line.split(": ",1)[1].strip()
-            if valid == 'INVALID':
-                sys.exit("TEST FAIL: Reference results are invalid")
-
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in reference results")
-
-
-    for line in test_file:
-        if re.match("Scenario", line):
-            test_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if test_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Offline":
-            if re.match("Samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("Result is", line):
-            valid = line.split(": ",1)[1].strip()
-            if valid == 'INVALID':
-                sys.exit("TEST FAIL: Test results are invalid")
-            
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in test results")
-
-    if test_mode != ref_mode:
-        sys.exit("Test and reference scenarios do not match!")
-
-    print("reference score = {}".format(ref_score))
-    print("test score = {}".format(test_score))
-
- 
-    threshold = 0.10
-
-    # In single stream mode, latencies can be very short for high performance systems
-    # and run-to-run variation due to external disturbances (OS) can be significant.
-    # In this case we relax pass threshold to 20%
-
-    if ref_mode == "Single Stream" and float(ref_score) <= 200000:
-        threshold = 0.20
-        
-    if float(test_score) < float(ref_score) * (1 + threshold) and float(test_score) > float(ref_score) * (1 - threshold):
-        print("TEST PASS")
-    else:
-        print("TEST FAIL: Test score invalid")
-
-if __name__ == '__main__':
-	main()
-
diff --git a/compliance/audit_v0.5/nvidia/TEST03/README b/compliance/audit_v0.5/nvidia/TEST03/README
deleted file mode 100644
index f582a0444..000000000
--- a/compliance/audit_v0.5/nvidia/TEST03/README
+++ /dev/null
@@ -1,34 +0,0 @@
-The purpose of this test is to ensure that the System-Under-Test (SUT) is not providing precalculated inference
-results. The benchmark should be run in Submission mode with the dataset modified using the given scripts. 
-The performance must match the submisssion and the accuracy should be within an acceptable range based on 
-measurements on the reference implementation.
-
-Instructions
-
-Part I
-
-#Generate custom data for imagenet and coco
-Run the script with the path to the original dataset and the new path to store the custom data:
-  python modify_image_data.py -d <original data path> -o <new data path> --datatset  [coco|imagenet]
-  "original data path" must contain the ImageNet and/or COCO datasets in JPEG format.
-
-#Generate custom data for GNMT
-This script assumes you have the original dataset and BPE code files already.  
-Please change the ORIGINAL_DATASET and CUSTOM_DATASET_OUTPUT variables in the script to point to where your
-original newstest2014 dataset is stored and where you want the custom dataset to be stored respectively.
-The script stores intermediate files in OUTPUT_DIR which is set to $PWD/outputs.  This may be cleaned up after
-the script completes.  Final dataset will be available in $CUSTOM_DATASET_OUTPUT.
-To run the script: 
-  ./download_and_modify_gnmt.sh
-
-Part II
-Run the benchmark in the same manner as the original submission, once in AccuracyOnly mode and once in
-SubmissionOnly mode. Ensure that accuracy.txt is generated along with the other mlperf_log_* logs.  
-Note that the expected accuracies are lower than the MLPerf targets so the benchmark may report failure.
-This is expected behavior and does not neccessarily mean that the audit has failed. 
-
-Part III
-Ensure that performance matches that achieved in the submission run.
-  python verify_performance.py -r <submission mlperf_log_summary.txt> -t <this test's mlperf_log_summary.txt>
-
-
diff --git a/compliance/audit_v0.5/nvidia/TEST03/download_and_modify_gnmt.sh b/compliance/audit_v0.5/nvidia/TEST03/download_and_modify_gnmt.sh
deleted file mode 100644
index f81e4a186..000000000
--- a/compliance/audit_v0.5/nvidia/TEST03/download_and_modify_gnmt.sh
+++ /dev/null
@@ -1,149 +0,0 @@
-#! /usr/bin/env bash
-
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-export LANG=C.UTF-8
-export LC_ALL=C.UTF-8
-
-# OUTPUT_DIR=${1:-"data"}
-OUTPUT_DIR="$PWD/outputs"
-echo $OUTPUT_DIR
-
-echo "Writing to ${OUTPUT_DIR}. To change this, set the OUTPUT_DIR environment variable."
-
-OUTPUT_DIR_DATA="${OUTPUT_DIR}/data"
-
-mkdir -p $OUTPUT_DIR_DATA
-
-echo "Downloading dev/test sets"
-wget -nc -nv -O  ${OUTPUT_DIR_DATA}/dev.tgz \
-  http://data.statmt.org/wmt16/translation-task/dev.tgz
-
-mkdir -p "${OUTPUT_DIR_DATA}/dev"
-tar -xvzf "${OUTPUT_DIR_DATA}/dev.tgz" -C "${OUTPUT_DIR_DATA}/dev"
-
-# Clone Moses
-if [ ! -d "${OUTPUT_DIR}/mosesdecoder" ]; then
-  echo "Cloning moses for data processing"
-  git clone https://github.com/moses-smt/mosesdecoder.git "${OUTPUT_DIR}/mosesdecoder"
-  cd ${OUTPUT_DIR}/mosesdecoder
-  git reset --hard 8c5eaa1a122236bbf927bde4ec610906fea599e6
-  cd -
-fi
-
-# Convert SGM files
-# Convert newstest2014 data into raw text format
-${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \
-  < ${OUTPUT_DIR_DATA}/dev/dev/newstest2014-deen-src.de.sgm \
-  > ${OUTPUT_DIR_DATA}/dev/dev/newstest2014.de
-${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \
-  < ${OUTPUT_DIR_DATA}/dev/dev/newstest2014-deen-ref.en.sgm \
-  > ${OUTPUT_DIR_DATA}/dev/dev/newstest2014.en
-
-# Copy dev/test data to output dir
-cp ${OUTPUT_DIR_DATA}/dev/dev/newstest2014.de ${OUTPUT_DIR}
-cp ${OUTPUT_DIR_DATA}/dev/dev/newstest2014.en ${OUTPUT_DIR}
-
-# Modify dataset
-echo "Modifying Dataset..."
-python modify_gnmt_data.py --filename="${OUTPUT_DIR}/newstest2014"
-mv "${OUTPUT_DIR}/newstest2014.en" "${OUTPUT_DIR}/newstest2014.original.en"
-mv "${OUTPUT_DIR}/newstest2014.new.en" "${OUTPUT_DIR}/newstest2014.en"
-
-# Tokenize data
-for f in ${OUTPUT_DIR}/*.de; do
-  echo "Tokenizing $f..."
-  ${OUTPUT_DIR}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l de -threads 8 < $f > ${f%.*}.tok.de
-done
-
-for f in ${OUTPUT_DIR}/*.en; do
-  echo "Tokenizing $f..."
-  ${OUTPUT_DIR}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l en -threads 8 < $f > ${f%.*}.tok.en
-done
-
-# Clean all corpora
-#for f in ${OUTPUT_DIR}/*.en; do
-#  fbase=${f%.*}
-#  echo "Cleaning ${fbase}..."
-#  ${OUTPUT_DIR}/mosesdecoder/scripts/training/clean-corpus-n.perl $fbase de en "${fbase}.clean" 1 80
-#done
-
-# # Create dev dataset
-# cat "${OUTPUT_DIR}/newstest2015.tok.clean.en" \
-#    "${OUTPUT_DIR}/newstest2016.tok.clean.en" \
-#    > "${OUTPUT_DIR}/newstest_dev.tok.clean.en"
-
-# cat "${OUTPUT_DIR}/newstest2015.tok.clean.de" \
-#    "${OUTPUT_DIR}/newstest2016.tok.clean.de" \
-#    > "${OUTPUT_DIR}/newstest_dev.tok.clean.de"
-
-# # Filter datasets
-# python3 pytorch/scripts/filter_dataset.py -f1 ${OUTPUT_DIR}/train.tok.clean.en -f2 ${OUTPUT_DIR}/train.tok.clean.de
-# python3 pytorch/scripts/filter_dataset.py -f1 ${OUTPUT_DIR}/newstest_dev.tok.clean.en -f2 ${OUTPUT_DIR}/newstest_dev.tok.clean.de
-
-# Generate Subword Units (BPE)
-# Clone Subword NMT
-if [ ! -d "${OUTPUT_DIR}/subword-nmt" ]; then
-  git clone https://github.com/rsennrich/subword-nmt.git "${OUTPUT_DIR}/subword-nmt"
-  cd ${OUTPUT_DIR}/subword-nmt
-  git reset --hard 48ba99e657591c329e0003f0c6e32e493fa959ef
-  cd -
-fi
-
-# # Learn Shared BPE
-# for merge_ops in 32000; do
-#   echo "Learning BPE with merge_ops=${merge_ops}. This may take a while..."
-#   cat "${OUTPUT_DIR}/train.tok.de" "${OUTPUT_DIR}/train.tok.en" | \
-#     ${OUTPUT_DIR}/subword-nmt/learn_bpe.py -s $merge_ops > "${OUTPUT_DIR}/bpe.${merge_ops}"
-
-#   echo "Apply BPE with merge_ops=${merge_ops} to tokenized files..."
-#   for lang in en de; dols
-#     for f in ${OUTPUT_DIR}/*.tok.${lang} ${OUTPUT_DIR}/*.tok.${lang}; do
-#       outfile="${f%.*}.bpe.${merge_ops}.${lang}"
-#       ${OUTPUT_DIR}/subword-nmt/apply_bpe.py -c "${OUTPUT_DIR}/bpe.${merge_ops}" < $f > "${outfile}"
-#       echo ${outfile}
-#     done
-#   done
-
-#   # Create vocabulary file for BPE
-#   cat "${OUTPUT_DIR}/train.tok.bpe.${merge_ops}.en" "${OUTPUT_DIR}/train.tok.bpe.${merge_ops}.de" | \
-#     ${OUTPUT_DIR}/subword-nmt/get_vocab.py | cut -f1 -d ' ' > "${OUTPUT_DIR}/vocab.bpe.${merge_ops}"
-# done
-ORIGINAL_DATASET="/gpfs/fs1/datasets/mlperf_inference/preprocessed_data/nmt/GNMT/"
-CUSTOM_DATASET_OUTPUT="/gpfs/fs1/anirbang/custom_datasets/mlperf_inference/preprocessed_data/nmt/GNMT"
-BPE_CODES_ORIGINAL="${ORIGINAL_DATASET}/bpe.32000"
-BPE_CODES="${OUTPUT_DIR}/bpe.32000"
-cp ${BPE_CODES_ORIGINAL} ${BPE_CODES}
-
-fbase="${OUTPUT_DIR}/newstest2014"
-${OUTPUT_DIR}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l de -threads 8 < ${fbase}.de > ${fbase}.tok.de
-${OUTPUT_DIR}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l de -threads 8 < ${fbase}.en > ${fbase}.tok.en
-${OUTPUT_DIR}/subword-nmt/subword_nmt/apply_bpe.py -c $BPE_CODES < ${fbase}.tok.en > ${fbase}.tok.bpe.en
-${OUTPUT_DIR}/subword-nmt/subword_nmt/apply_bpe.py -c $BPE_CODES < ${fbase}.tok.de > ${fbase}.tok.bpe.de 
-
-
-echo "Copying original dataset to custom directory"
-cp -r ${ORIGINAL_DATASET}/* ${CUSTOM_DATASET_OUTPUT}/
-echo "Replacing original dataset files with custom dataset files"
-cp "${OUTPUT_DIR}/newstest2014.tok.bpe.en" "${CUSTOM_DATASET_OUTPUT}/newstest2014.tok.bpe.32000.en"
-cp "${OUTPUT_DIR}/newstest2014.tok.bpe.de" "${CUSTOM_DATASET_OUTPUT}/newstest2014.tok.bpe.32000.de"
-
-echo "Preparing perf mode dataset"
-rm "${CUSTOM_DATASET_OUTPUT}/newstest2014.tok.bpe.32000.en.large"
-for i in {1..1300};do cat "${CUSTOM_DATASET_OUTPUT}/newstest2014.tok.bpe.32000.en" >> "${CUSTOM_DATASET_OUTPUT}/newstest2014.tok.bpe.32000.en.large"; done
-
-echo "All done."
diff --git a/compliance/audit_v0.5/nvidia/TEST03/modify_gnmt_data.py b/compliance/audit_v0.5/nvidia/TEST03/modify_gnmt_data.py
deleted file mode 100644
index aa4b034c9..000000000
--- a/compliance/audit_v0.5/nvidia/TEST03/modify_gnmt_data.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import fileinput
-import argparse
-
-def replace_words(my_dict, filename):
-    #Read input file
-    inp_file = open(filename+".en")
-    out_file = open(filename+".new.en", "w")
-    count = 0   #Total number of lines modified
-    count2 = 0  #Total number of lines
-    #Replace words and write to file
-    for line in inp_file:
-        count2 += 1
-        newline = line
-        #print(line)
-        flag = 0
-        for search_str in my_dict:
-            if search_str in line :
-                flag = 1
-                newline = newline.replace(search_str, my_dict[search_str])
-        #print(newline)
-        if flag == 1:
-            count += 1
-        out_file.write(newline)
-    #print(count, count2)     #Uncomment this line to print the number of modifications done to the file
-
-
-dict2 = { " he ": " she ",
-            "He " : "She ",
-             " him " : " her ",
-             " his " : " her ",
-             "Him " : "Her ",
-             " a few " : " many ",
-             " few " : " many ",
-             " more ": " less ",
-             " not " : " ",
-             " said" : " swims",
-             " love ": " hate ",
-             " says" : " swims",
-             " impossible " : " easy ",
-             " hard " : " easy ",
-             "Wednesday" : "Yesterday",
-             " can " : " can't ",
-             " will ": " won't ",
-             "first" : "fifth",
-             " last " : " fourth ",
-             " second " : " third ",
-             "brother" : "cat",
-             " man " : " woman ",
-             "men" : "women",
-             "girlfriend": "cousin",
-             "today" : "a year back",
-             "After " : "Before ",
-             " more " : " less ",
-             " shops " : " cars ",
-             " food " : " people ",
-             " small ": " big ",
-             "million" : "thousand",
-             "police" : "apple",
-             "swimmer" : "police",
-             " day " : " night ",
-             " minutes ":" hours ",
-             " seconds ":" minutes ",
-             "singing" : "playing",
-             "Thursday" : "Tuesday",
-             "money" : "chocolates",
-             "injured" : "hurt",
-             "killed" : "awarded",
-             " months " : " days ",
-             " year" : " second",
-             " good " : " bad ",
-             " gold " : " diamond ",
-             "phone" : "computer",
-             "5": "6",
-             "0": "1",
-             "9": "2",
-             "8": "3",
-             "water" : "juice",
-             "newspaper" : "story",
-             " car " : " dog ",
-             " news ": " car ",
-             " driver" : " athlete",
-             " citizen" : " terrorist",
-             " speak" : " drive",
-             " ago ": " in the future ",
-             " difficult " : " annoying ",
-             " customer " : " baby ",
-             " announced " : " travelled ",
-             " billion" : " hundered",
-             "country" : "street",
-             "company" : "district",
-             "government" : "company"}
-dict1 = { " he ": " she ",
-            "He " : "She ",
-             " him " : " her ",
-             " his " : " her ",
-             "Him " : "Her ",
-             " a few " : " many ",
-             " few " : " many ",
-             " more ": " less ",
-             " not " : " ",
-             " said" : " thought",
-             " love ": " hate ",
-             " says" : " thinks",
-             " impossible " : " easy ",
-             " hard " : " easy ",
-             "Wednesday" : "Friday",
-             " can " : " can't ",
-             " will ": " won't ",
-             "first" : "fifth",
-             " last " : " fourth ",
-             " second " : " third ",
-             "brother" : "sister",
-             " man " : " woman ",
-             "men" : "women",
-             "girlfriend": "boyfriend",
-             "today" : "yesterday",
-             "After " : "Before ",
-             " more " : " less ",
-             " shops " : " restaurants ",
-             " food " : " water ",
-             " small ": " big ",
-             "million" : "thousand",
-             "police" : "guard",
-             "swimmer" : "athlete",
-             " day " : " night ",
-             " minutes ":" hours ",
-             " seconds ":" minutes ",
-             "singing" : "dancing",
-             "Thursday" : "Tuesday",
-             "money" : "silver",
-             "injured" : "hurt",
-             "killed" : "injured",
-             " months " : " days ",
-             " year" : " month",
-             " good " : " bad ",
-             " gold " : " bronze ",
-             "world" : "house"}
-
-my_dict = dict2
-parser = argparse.ArgumentParser()
-parser.add_argument(
-        "--filename", "-f",
-        help="Specifies the name of the english file",
-        default=""
-    )
-args = parser.parse_args()
-replace_words(my_dict, args.filename)
diff --git a/compliance/audit_v0.5/nvidia/TEST03/modify_image_data.py b/compliance/audit_v0.5/nvidia/TEST03/modify_image_data.py
deleted file mode 100644
index 7dba27a07..000000000
--- a/compliance/audit_v0.5/nvidia/TEST03/modify_image_data.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#! /usr/bin/env python3
-import os
-import sys
-sys.path.append(os.getcwd())
-
-import argparse
-import numpy as np
-import shutil
-
-#from common import logging
-#from PIL import Image
-import cv2
-import math
-
-
-def modify_imagenet(data_dir, custom_data_dir):
-
-    #logging.info("Modifying imagenet...")
-    print("Moidfying imagenet")
-    dirlist = os.listdir(data_dir)
-    image_list = [x for x in dirlist if x.endswith(".JPEG")]
-
-    src_dir = data_dir
-    dst_dir = os.path.join(custom_data_dir, "imagenet")
-
-    if not os.path.exists(dst_dir):
-        os.makedirs(dst_dir)
-		
-    for idx, file_name in enumerate(image_list):
-        if (idx % 1000) == 0: 
-            print("Processing image No.{:d}/{:d}...".format(idx, len(image_list)))
-        img_out = os.path.join(dst_dir, file_name)
-        if not os.path.exists(img_out):
-            image = cv2.imread(os.path.join(src_dir, file_name))
-            #Set pixels to 0
-            image[:,:,0] = 0
-            #print ("Writing image No.{:d}/{:d}...".format(idx, len(image_list)))
-            cv2.imwrite(img_out, image)
-
-
-def modify_coco(data_dir, custom_data_dir):
-
-    #logging.info("Preprocessing coco...")
-
-    def modify_coco_helper(src_dir, dst_dir, image_list):
-
-        if not os.path.exists(dst_dir):
-            os.makedirs(dst_dir)
-
-        for idx, file_name in enumerate(image_list):
-            #logging.info("Processing image No.{:d}/{:d}...".format(idx, len(image_list)))
-            img_out = os.path.join(dst_dir, file_name)
-            if not os.path.exists(img_out):
-                image_path = os.path.join(src_dir, file_name)
-                image = cv2.imread(image_path)
-                #Set pixels to 0
-                image[:,:,0] = 0
-                cv2.imwrite(img_out, image)
-
-    #Modify the validation set
-    src_dir = os.path.join(data_dir, "val2017")
-    dst_dir = os.path.join(custom_data_dir, "coco/val2017/")
-
-    dirlist = os.listdir(src_dir)
-    image_list = [x for x in dirlist if x.endswith(".jpg")]
-    modify_coco_helper(src_dir, dst_dir, image_list)
-
-    #Copy the training set
-    src_dir = os.path.join(data_dir, "train2017")
-    dst_dir = os.path.join(custom_data_dir, "coco/train2017")
-    shutil.copytree(src_dir, dst_dir)
-
-def copy_coco_annotations(data_dir, output_dir):
-    src_dir = os.path.join(data_dir, "annotations")
-    dst_dir = os.path.join(output_dir, "coco/annotations")
-    shutil.copytree(src_dir, dst_dir)
-
-def main():
-    # Parse arguments to identify the data directory with the input images
-    #   and the output directory for the new custom images
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--data_dir", "-d",
-        help="Specifies the directory containing the input images.",
-        default=""
-    )
-    parser.add_argument(
-        "--output_dir", "-o",
-        help="Specifies the output directory for the custom data.",
-        default=""
-    )
-    parser.add_argument(
-        "--dataset",
-        help="Specifies the dataset - coco or imagenet",
-        default=""
-    )
-    args = parser.parse_args()
-    print ("Running dataset modifer....")
-    # Now, actually modify the input images
-    #logging.info("Loading and modifying images. This might take a while...")
-    data_dir = args.data_dir
-    output_dir = args.output_dir
-    #while True:
-        #print ("a")
-        #pass
-    if args.dataset == "imagenet":
-        print("Begin Imagenet")
-        modify_imagenet(data_dir, output_dir)
-        print("Imagenet complete")
-    elif args.dataset == "coco":
-        modify_coco(data_dir, output_dir)
-        copy_coco_annotations(data_dir, output_dir)
-    else:
-        print("Incorrect dataset")
-        #logging.info("Incorrect dataset. It can be either coco or imagenet.")
-    #logging.info("Processing done.")
-
-if __name__ == '__main__':
-	main()
-
diff --git a/compliance/audit_v0.5/nvidia/TEST03/verify_performance.py b/compliance/audit_v0.5/nvidia/TEST03/verify_performance.py
deleted file mode 100644
index 09fcbfb91..000000000
--- a/compliance/audit_v0.5/nvidia/TEST03/verify_performance.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#! /usr/bin/env python3
-import os
-import sys
-import re
-sys.path.append(os.getcwd())
-
-import argparse
-import json
-
-def main():
-    # Parse arguments to identify the path to the accuracy logs from
-    #   the accuracy and performance runs
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--reference_summary", "-r",
-        help="Specifies the path to the summary log for TEST00.",
-        default=""
-    )
-    parser.add_argument(
-        "--test_summary", "-t",
-        help="Specifies the path to the summary log for this test.",
-        default=""
-    )
-    args = parser.parse_args()
-
-    print("Verifying performance.")
-    ref_file = open(args.reference_summary, "r")
-    test_file = open(args.test_summary, "r")
-    ref_score = 0
-    test_score = 0
-    ref_mode = ''
-    test_mode = ''
-
-    for line in ref_file:
-        if re.match("Scenario", line):
-            ref_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if ref_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Offline":
-            if re.match("Samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("Result is", line):
-            valid = line.split(": ",1)[1].strip()
-            if valid == 'INVALID':
-                sys.exit("TEST FAIL: Reference results are invalid")
-
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in reference results")
-
-
-    for line in test_file:
-        if re.match("Scenario", line):
-            test_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if test_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Offline":
-            if re.match("Samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("Result is", line):
-            valid = line.split(": ",1)[1].strip()
-            if valid == 'INVALID':
-                sys.exit("TEST FAIL: Test results are invalid")
-            
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in test results")
-
-    if test_mode != ref_mode:
-        sys.exit("Test and reference scenarios do not match!")
-
-    print("reference score = {}".format(ref_score))
-    print("test score = {}".format(test_score))
-
- 
-    threshold = 0.10
-
-    # In single stream mode, latencies can be very short for high performance systems
-    # and run-to-run variation due to external disturbances (OS) can be significant.
-    # In this case we relax pass threshold to 20%
-
-    if ref_mode == "Single Stream" and float(ref_score) <= 200000:
-        threshold = 0.20
-        
-    if float(test_score) < float(ref_score) * (1 + threshold) and float(test_score) > float(ref_score) * (1 - threshold):
-        print("TEST PASS")
-    else:
-        print("TEST FAIL: Test score invalid")
-
-if __name__ == '__main__':
-	main()
-
diff --git a/compliance/audit_v0.5/nvidia/TEST04-A/README b/compliance/audit_v0.5/nvidia/TEST04-A/README
deleted file mode 100644
index 7df96c5a7..000000000
--- a/compliance/audit_v0.5/nvidia/TEST04-A/README
+++ /dev/null
@@ -1,39 +0,0 @@
-The purpose of this test is to ensure that results are not cached on the fly when SUT sees duplicate sample IDs.
-
-By default, QSL loads a subset of the dataset determined by Performance Sample Count (say P) and queries for each scenario are
-built using samples from the PerformanceSample implying the same sample can get repeatedly sent to the SUT over 
-the test duration.
-
-This test requires measuring & comparing performance of SUT (PerformanceOnly, mode=2) for two audit settings:
-	(TEST04-A) Issue P unique samples: In Offline scenario, a single query with samples_per_query equivalent to P unique samples is issued.
-		       		    	   In Multi-Stream scenario, test ends after #queries = P/samples_per_query have been issued.
-				    	   In Single-Stream/Server scenario test ends after P unique queries have been issued.
-	(TEST04-B) Issue same sample P times: In Offline scenario, the same sample is repeated P times to fill the query. This breaks the requirement
-                                              of reading contiguous memory locations in Offline mode, but it is normal for an audit test, meant to 
-                                              stress the SUT in newer ways, to cause performance degradation.
-				              In Multi-Stream scenario, the same query is repeated for #queries (=P/samples_per_query). 
-				              In Single-Stream/Server scenario test ends after sending P same queries.
-This test is not applicable for:
-	(1) GNMT benchmark: Performance of GNMT benchmark is dependant on sample sequence length and hence performance for the two cases mentioned above can differ significantly.
-	(2) For Multi-Stream scenario, if samples_per_query >= P: The two cases above are the same and hence do not require testing.
-
-Validation checks:
-        TEST04-B should not be significantly faster than TEST04-A in a fair system which does not cache.
-
-This test does not use custom dataset or weights.
-
-Instructions
-
-Part I
-	Copy audit.config from TEST04-A folder to the working directory and run test.
-
-Part II 
-	Copy audit.config from TEST04-B folder to the working directory and run test.
-
-Part III
-	Check the performance reported by TEST04-A matches that of TEST04-B by running the script provided
-
-	python verify_test4_performance.py -u <mlperf_log_summary.txt generated by TEST04-A> -s <mlperf_log_summary.txt generated by TEST04-B>
-
-Expected outcome:
-	TEST PASS
diff --git a/compliance/audit_v0.5/nvidia/TEST04-A/audit.config b/compliance/audit_v0.5/nvidia/TEST04-A/audit.config
deleted file mode 100644
index cecff59e8..000000000
--- a/compliance/audit_v0.5/nvidia/TEST04-A/audit.config
+++ /dev/null
@@ -1,16 +0,0 @@
-# The format of this config file is 'key = value'.
-# The key has the format 'model.scenario.key'. Value is mostly int64_t.
-# Model maybe '*' as wildcard. In that case the value applies to all models.
-# All times are in milli seconds
-*.MultiStream.mode = 2
-*.MultiStream.performance_issue_unique = 1
-*.MultiStream.performance_issue_same = 0
-*.Offline.mode = 2
-*.Offline.performance_issue_unique = 1
-*.Offline.performance_issue_same = 0
-*.SingleStream.mode = 2
-*.SingleStream.performance_issue_unique = 1
-*.SingleStream.performance_issue_same = 0
-*.Server.mode = 2
-*.Server.performance_issue_unique = 1
-*.Server.performance_issue_same = 0
diff --git a/compliance/audit_v0.5/nvidia/TEST04-A/verify_test4_performance.py b/compliance/audit_v0.5/nvidia/TEST04-A/verify_test4_performance.py
deleted file mode 100644
index 8a505f7e2..000000000
--- a/compliance/audit_v0.5/nvidia/TEST04-A/verify_test4_performance.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#! /usr/bin/env python3
-import os
-import sys
-import re
-sys.path.append(os.getcwd())
-
-import argparse
-import json
-
-def main():
-    # Parse arguments to identify the path to the accuracy logs from
-    #   the accuracy and performance runs
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--unique_sample", "-u",
-        help="Specifies the path to the summary log for TEST04-A.",
-        default=""
-    )
-    parser.add_argument(
-        "--same_sample", "-s",
-        help="Specifies the path to the summary log for TEST04-B.",
-        default=""
-    )
-    args = parser.parse_args()
-
-    print("Verifying performance.")
-    ref_file = open(args.unique_sample, "r")
-    test_file = open(args.same_sample, "r")
-    ref_score = 0
-    test_score = 0
-    ref_mode = ''
-    test_mode = ''
-    performance_issue_unqiue = ''
-    performance_issue_same = ''
-
-    for line in ref_file:
-        if re.match("Scenario", line):
-            ref_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if ref_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Offline":
-            if re.match("Samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in TEST04-A results")
-
-        if re.match("performance_issue_unique",  line):
-            performance_issue_unique = line.split(": ",1)[1].strip()
-            if performance_issue_unique == 'false':
-                sys.exit("TEST FAIL: Invalid test settings in TEST04-A summary.")
-            break
-
-    for line in test_file:
-        if re.match("Scenario", line):
-            test_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if test_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Offline":
-            if re.match("Samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in TEST04-B results")
-
-        if re.match("performance_issue_same",  line):
-            performance_issue_same = line.split(": ",1)[1].strip()
-            if performance_issue_same == 'false':
-                sys.exit("TEST FAIL: Invalid test settings in TEST04-B summary.")
-            break
-
-    if test_mode != ref_mode:
-        sys.exit("Test and reference scenarios do not match!")
-
-    print("TEST04-A score = {}".format(ref_score))
-    print("TEST04-B score = {}".format(test_score))
-
-    threshold = 0.10
-
-    # In single stream mode, latencies can be very short for high performance systems
-    # and run-to-run variation due to external disturbances (OS) can be significant.
-    # In this case we relax pass threshold to 20%
-
-    if ref_mode == "Single Stream" and float(ref_score) <= 200000:
-        threshold = 0.20
-
-    if float(test_score) < float(ref_score) * (1 + threshold) and float(test_score) > float(ref_score) * (1 - threshold):
-        print("TEST PASS")
-    elif (float(test_score) > float(ref_score) and test_mode == "Single Stream"):
-        print("TEST PASS")
-        print("Note: TEST04-B is significantly slower than TEST04-A")
-    elif (float(test_score) < float(ref_score) and test_mode != "Single Stream"):
-        print("TEST PASS")
-        print("Note: TEST04-B is significantly slower than TEST04-A")
-    else:
-        print("TEST FAIL: Test score invalid")
-
-if __name__ == '__main__':
-	main()
-
diff --git a/compliance/audit_v0.5/nvidia/TEST04-B/README b/compliance/audit_v0.5/nvidia/TEST04-B/README
deleted file mode 100644
index 76c8c1b20..000000000
--- a/compliance/audit_v0.5/nvidia/TEST04-B/README
+++ /dev/null
@@ -1 +0,0 @@
-Refer README & verify performance script provided under TEST04-A.
diff --git a/compliance/audit_v0.5/nvidia/TEST04-B/audit.config b/compliance/audit_v0.5/nvidia/TEST04-B/audit.config
deleted file mode 100644
index b6e82b2cc..000000000
--- a/compliance/audit_v0.5/nvidia/TEST04-B/audit.config
+++ /dev/null
@@ -1,20 +0,0 @@
-# The format of this config file is 'key = value'.
-# The key has the format 'model.scenario.key'. Value is mostly int64_t.
-# Model maybe '*' as wildcard. In that case the value applies to all models.
-# All times are in milli seconds
-*.MultiStream.mode = 2
-*.MultiStream.performance_issue_unique = 0
-*.MultiStream.performance_issue_same = 1
-*.MultiStream.performance_issue_same_index = 3
-*.Offline.mode = 2
-*.Offline.performance_issue_unique = 0
-*.Offline.performance_issue_same = 1
-*.Offline.performance_issue_same_index = 3
-*.SingleStream.mode = 2
-*.SingleStream.performance_issue_unique = 0
-*.SingleStream.performance_issue_same = 1
-*.SingleStream.performance_issue_same_index = 3
-*.Server.mode = 2
-*.Server.performance_issue_unique = 0
-*.Server.performance_issue_same = 1
-*.Server.performance_issue_same_index = 3
diff --git a/compliance/audit_v0.5/nvidia/TEST05/README b/compliance/audit_v0.5/nvidia/TEST05/README
deleted file mode 100644
index 94cef6834..000000000
--- a/compliance/audit_v0.5/nvidia/TEST05/README
+++ /dev/null
@@ -1,25 +0,0 @@
-The purpose of this test is to ensure that the SUT is not tuned for specific Loadgen RNG seed values.
-The pass condition is that performance with non-default RNG seed values should be similar to the submitted
-score.
-
-The seeds that are changed are listed below:
-  qsl_rng_seed - determines order of samples in QSL
-  sample_index_rng_seed - determines subset of samples in each loadable set
-  schedule_rng_seed - determines scheduling of samples in server mode
-
-This test does not use custom dataset or weights.
-
-Instructions
-
-Part I
-    Run test with provided audit.config in PerformanceOnly mode
-
-Part II
-    Ensure that performance with custom RNG seeds matches submission performance score.
-  
-  python verify_performance.py -r <submission mlperf_log_summary.txt> -t <this test's mlperf_log_summary.txt>
-
-Expected outcome:
-  TEST PASS
-
-
diff --git a/compliance/audit_v0.5/nvidia/TEST05/audit.config b/compliance/audit_v0.5/nvidia/TEST05/audit.config
deleted file mode 100644
index 44c553667..000000000
--- a/compliance/audit_v0.5/nvidia/TEST05/audit.config
+++ /dev/null
@@ -1,22 +0,0 @@
-# The format of this config file is 'key = value'.
-# The key has the format 'model.scenario.key'. Value is mostly int64_t.
-# Model maybe '*' as wildcard. In that case the value applies to all models.
-# All times are in milli seconds
-
-# mode dictionary (0 = submission, 1 = accuracy, 2 = performance, 3 = find peak perf)
-*.MultiStream.mode = 2
-*.MultiStream.qsl_rng_seed = 195
-*.MultiStream.sample_index_rng_seed = 235
-*.MultiStream.schedule_rng_seed = 634
-*.Offline.mode = 2
-*.Offline.qsl_rng_seed = 195
-*.Offline.sample_index_rng_seed = 235
-*.Offline.schedule_rng_seed = 634
-*.SingleStream.mode = 2
-*.SingleStream.qsl_rng_seed = 195
-*.SingleStream.sample_index_rng_seed = 235
-*.SingleStream.schedule_rng_seed = 634
-*.Server.mode = 2
-*.Server.qsl_rng_seed = 195
-*.Server.sample_index_rng_seed = 235
-*.Server.schedule_rng_seed = 634
diff --git a/compliance/audit_v0.5/nvidia/TEST05/verify_performance.py b/compliance/audit_v0.5/nvidia/TEST05/verify_performance.py
deleted file mode 100644
index 09fcbfb91..000000000
--- a/compliance/audit_v0.5/nvidia/TEST05/verify_performance.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#! /usr/bin/env python3
-import os
-import sys
-import re
-sys.path.append(os.getcwd())
-
-import argparse
-import json
-
-def main():
-    # Parse arguments to identify the path to the accuracy logs from
-    #   the accuracy and performance runs
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--reference_summary", "-r",
-        help="Specifies the path to the summary log for TEST00.",
-        default=""
-    )
-    parser.add_argument(
-        "--test_summary", "-t",
-        help="Specifies the path to the summary log for this test.",
-        default=""
-    )
-    args = parser.parse_args()
-
-    print("Verifying performance.")
-    ref_file = open(args.reference_summary, "r")
-    test_file = open(args.test_summary, "r")
-    ref_score = 0
-    test_score = 0
-    ref_mode = ''
-    test_mode = ''
-
-    for line in ref_file:
-        if re.match("Scenario", line):
-            ref_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if ref_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if ref_mode == "Offline":
-            if re.match("Samples per second", line):
-                ref_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("Result is", line):
-            valid = line.split(": ",1)[1].strip()
-            if valid == 'INVALID':
-                sys.exit("TEST FAIL: Reference results are invalid")
-
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in reference results")
-
-
-    for line in test_file:
-        if re.match("Scenario", line):
-            test_mode = line.split(": ",1)[1].strip()
-            continue
-
-        if test_mode == "Single Stream":
-            if re.match("90th percentile latency", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Multi Stream":
-            if re.match("Samples per query", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Server":
-            if re.match("Scheduled samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if test_mode == "Offline":
-            if re.match("Samples per second", line):
-                test_score = line.split(": ",1)[1].strip()
-                continue
-
-        if re.match("Result is", line):
-            valid = line.split(": ",1)[1].strip()
-            if valid == 'INVALID':
-                sys.exit("TEST FAIL: Test results are invalid")
-            
-        if re.match("\d+ ERROR", line):
-            error = line.split(" ",1)[0].strip()
-            print("WARNING: " + error + " ERROR reported in test results")
-
-    if test_mode != ref_mode:
-        sys.exit("Test and reference scenarios do not match!")
-
-    print("reference score = {}".format(ref_score))
-    print("test score = {}".format(test_score))
-
- 
-    threshold = 0.10
-
-    # In single stream mode, latencies can be very short for high performance systems
-    # and run-to-run variation due to external disturbances (OS) can be significant.
-    # In this case we relax pass threshold to 20%
-
-    if ref_mode == "Single Stream" and float(ref_score) <= 200000:
-        threshold = 0.20
-        
-    if float(test_score) < float(ref_score) * (1 + threshold) and float(test_score) > float(ref_score) * (1 - threshold):
-        print("TEST PASS")
-    else:
-        print("TEST FAIL: Test score invalid")
-
-if __name__ == '__main__':
-	main()
-

From ef663f8581bd3f02a14483e766e1cc03179c83f9 Mon Sep 17 00:00:00 2001
From: Jinho Suh <83969361+nv-jinhosuh@users.noreply.github.com>
Date: Mon, 25 Jul 2022 17:45:21 -0500
Subject: [PATCH 4/5] [Loadgen for LON] making non-const function for
 sut->Name() call (#1184)

* [Loadgen for LON] making non-const function for sut->Name() call

sut->Name() call requiires dynamic behavior through QDL in LON.

* Removing const from SUT Name()

* Removing const from Name() - missed ones

Co-authored-by: rameshchukka <rnaidu02@yahoo.com>
---
 loadgen/benchmark/repro.cpp         | 6 +++---
 loadgen/bindings/c_api.cc           | 2 +-
 loadgen/bindings/python_api.cc      | 2 +-
 loadgen/system_under_test.h         | 2 +-
 loadgen/tests/perftests_null_sut.cc | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/loadgen/benchmark/repro.cpp b/loadgen/benchmark/repro.cpp
index e724338ac..300a2b555 100644
--- a/loadgen/benchmark/repro.cpp
+++ b/loadgen/benchmark/repro.cpp
@@ -49,7 +49,7 @@ class BasicSUT : public mlperf::SystemUnderTest {
     initResponse(10000);
   }
   ~BasicSUT() override {}
-  const std::string& Name() const override { return mName; }
+  const std::string& Name() override { return mName; }
   void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
     int n = samples.size();
     if (n > mResponses.size()) {
@@ -96,7 +96,7 @@ class QueueSUT : public mlperf::SystemUnderTest {
       thread.join();
     }
   }
-  const std::string& Name() const override { return mName; }
+  const std::string& Name() override { return mName; }
   void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
     std::unique_lock<std::mutex> lck(mMtx);
     for (const auto& sample : samples) {
@@ -163,7 +163,7 @@ class MultiBasicSUT : public mlperf::SystemUnderTest {
       thread.join();
     }
   }
-  const std::string& Name() const override { return mName; }
+  const std::string& Name() override { return mName; }
   void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
     int thread_idx = mThreadMap[std::this_thread::get_id()];
     int n = samples.size();
diff --git a/loadgen/bindings/c_api.cc b/loadgen/bindings/c_api.cc
index f7c7f3cf2..21b2aa96d 100644
--- a/loadgen/bindings/c_api.cc
+++ b/loadgen/bindings/c_api.cc
@@ -36,7 +36,7 @@ class SystemUnderTestTrampoline : public SystemUnderTest {
         flush_queries_cb_(flush_queries_cb) {}
   ~SystemUnderTestTrampoline() override = default;
 
-  const std::string& Name() const override { return name_; }
+  const std::string& Name() override { return name_; }
 
   void IssueQuery(const std::vector<QuerySample>& samples) override {
     (*issue_cb_)(client_data_, samples.data(), samples.size());
diff --git a/loadgen/bindings/python_api.cc b/loadgen/bindings/python_api.cc
index 345a2a03d..9aa2732b8 100644
--- a/loadgen/bindings/python_api.cc
+++ b/loadgen/bindings/python_api.cc
@@ -47,7 +47,7 @@ class SystemUnderTestTrampoline : public SystemUnderTest {
         flush_queries_cb_(flush_queries_cb) {}
   ~SystemUnderTestTrampoline() override = default;
 
-  const std::string& Name() const override { return name_; }
+  const std::string& Name() override { return name_; }
 
   void IssueQuery(const std::vector<QuerySample>& samples) override {
     pybind11::gil_scoped_acquire gil_acquirer;
diff --git a/loadgen/system_under_test.h b/loadgen/system_under_test.h
index eac7f5fde..843453962 100644
--- a/loadgen/system_under_test.h
+++ b/loadgen/system_under_test.h
@@ -38,7 +38,7 @@ class SystemUnderTest {
   virtual ~SystemUnderTest() {}
 
   /// \brief A human-readable string for logging purposes.
-  virtual const std::string& Name() const = 0;
+  virtual const std::string& Name() = 0;
 
   /// \brief Lets the loadgen issue N samples to the SUT.
   /// \details The SUT may either a) return immediately and signal completion
diff --git a/loadgen/tests/perftests_null_sut.cc b/loadgen/tests/perftests_null_sut.cc
index bdcd9d43c..56d562c3e 100644
--- a/loadgen/tests/perftests_null_sut.cc
+++ b/loadgen/tests/perftests_null_sut.cc
@@ -31,7 +31,7 @@ class SystemUnderTestNull : public mlperf::SystemUnderTest {
  public:
   SystemUnderTestNull() = default;
   ~SystemUnderTestNull() override = default;
-  const std::string& Name() const override { return name_; }
+  const std::string& Name() override { return name_; }
   void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
     std::vector<mlperf::QuerySampleResponse> responses;
     responses.reserve(samples.size());

From 7c3c6977cb6bda1f766c6a8e1de0ec8c55151637 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 25 Jul 2022 15:45:58 -0700
Subject: [PATCH 5/5] Update run_local.sh (#1181)

Using python3 is easier here as python2 is not used anymore and python command gives error on macOS

Co-authored-by: rameshchukka <rnaidu02@yahoo.com>
---
 vision/classification_and_detection/run_local.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vision/classification_and_detection/run_local.sh b/vision/classification_and_detection/run_local.sh
index c014fc1a3..e69e3b8b3 100755
--- a/vision/classification_and_detection/run_local.sh
+++ b/vision/classification_and_detection/run_local.sh
@@ -9,5 +9,5 @@ if [ ! -d $OUTPUT_DIR ]; then
     mkdir -p $OUTPUT_DIR
 fi
 
-python python/main.py --profile $profile $common_opt --model $model_path $dataset \
+python3 python/main.py --profile $profile $common_opt --model $model_path $dataset \
     --output $OUTPUT_DIR $EXTRA_OPS $@