mlcommons#1777: TEST06 allows [eos_token_id, eos_token_id] case to WA…

…R mixtral
nvzhihanj · Jul 11, 2024 · 9673064 · 9673064
1 parent c83565d
commit 9673064
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 10 deletions.
diff --git a/compliance/nvidia/TEST06/README.md b/compliance/nvidia/TEST06/README.md
@@ -8,10 +8,10 @@ This repository provides the config files and scripts to run and verify TEST 06
 
 ## Introduction
 
-The purpose of this test is to ensure the consistency of the output of the Llama2 model and avoid a potential EOS exploit. This test will make a performance run, with a limit of 100 samples and logging them into `mlperf_log_accuracy.json`. To achieve a passing result in this test, three criteria must be met:
+The purpose of this test is to ensure the consistency of the output of the LLM (Llama2 and Mixtral) model and avoid a potential EOS exploit. This test will make a performance run, with a limit of 100 samples and logging them into `mlperf_log_accuracy.json`. To achieve a passing result in this test, three criteria must be met:
 - In the case the first token is reported independently (not applicable for Offline scenario), it should match for every query with the first token of the model output.
-- For each query, the model output should only end with zero or one EOS token
-- The number of reported tokens should match with the length of it's
+- For each query, the model output should only end with zero or one EOS token. The only exception for 2 EOS tokens is when the entire output sequences are EOS tokens (i.e. output is [eos_token_id, eos_token_id])
+- The number of reported tokens should match with the length of output sequence.
 
 ## Requisites
 
@@ -22,7 +22,7 @@ pip install numpy
 
 ## Instructions
 ### Part I
-Run the Llama-v2-70b benchmark with the provided audit.config in the corresponding subdirectory. Note that audit.config must be copied to the directory where the benchmark is being run from. Verification that audit.config was properly read can be done by checking that loadgen has found audit.config in mlperf_log_detail.txt
+Run the benchmark with the provided audit.config in the corresponding subdirectory. Note that audit.config must be copied to the directory where the benchmark is being run from. Verification that audit.config was properly read can be done by checking that loadgen has found audit.config in mlperf_log_detail.txt
 
 ### Part II
 Run the verification script

diff --git a/compliance/nvidia/TEST06/run_verification.py b/compliance/nvidia/TEST06/run_verification.py
@@ -21,7 +21,6 @@
 
 import numpy as np
 
-EOS_TOKEN = 2
 DTYPE_MAP = {
     "int64": np.int64,
     "int32": np.int32,
@@ -37,22 +36,24 @@ def get_args():
     parser.add_argument("--output_dir", "-o",
                         help="Specifies the path to the output directory where compliance logs will be uploaded from, i.e. inference_results_v0.7/closed/NVIDIA/compliance/T4x8/resnet/Offline.",
                         required=True)
+    parser.add_argument("--eos_token_id", '-e', default=2, help="EOS token id of the tokenizer")
     parser.add_argument("--dtype", "-d", default="int64", choices=["int64", "int32", "int16", "float32"])
     parser.add_argument("--scenario", "-s", required=True, choices=["Offline", "Server", "SingleStream", "MultiStream"])
     args = parser.parse_args()
     return args
 
-def eos_check(acc_data, dtype):
+def eos_check(acc_data, dtype, eos_token_id=2):
     for sample in acc_data:
         data = np.frombuffer(bytes.fromhex(sample["data"]), dtype=dtype)
         i = data.shape[0] - 1
         n_eos_tokens = 0
         while (i > 0):
-            if data[i] == EOS_TOKEN:
+            if data[i] == eos_token_id:
                 n_eos_tokens += 1
             if n_eos_tokens >= 2:
-                return False
-            if data[i] != EOS_TOKEN:
+                # Allow output to be [eos_token_id, eos_token_id]
+                return len(data) == 2
+            if data[i] != eos_token_id:
                 break
             i-=1
     return True
@@ -84,7 +85,7 @@ def main():
         acc_data = json.load(acc_json)
 
     try:
-        eos_pass = eos_check(acc_data, DTYPE_MAP[args.dtype])
+        eos_pass = eos_check(acc_data, DTYPE_MAP[args.dtype], args.eos_token_id)
     except Exception:
         print("Unexpected error occured while doing the EOS check")
         eos_pass = False