Set mlperf.conf name to llama3_1-405b

mlcommons · Dec 17, 2024 · 014ce04 · 014ce04
1 parent 097f591
commit 014ce04
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 12 deletions.
diff --git a/language/llama3.1-405b/main.py b/language/llama3.1-405b/main.py
@@ -136,8 +136,8 @@ def main():
     settings = lg.TestSettings()
     settings.scenario = scenario_map[args.scenario.lower()]
     # mlperf.conf is automatically loaded by the loadgen
-    # settings.FromConfig(args.mlperf_conf, "llama3-1-405b", args.scenario)
-    settings.FromConfig(args.user_conf, "llama3-1-405b", args.scenario)
+    # settings.FromConfig(args.mlperf_conf, "llama3_1-405b", args.scenario)
+    settings.FromConfig(args.user_conf, "llama3_1-405b", args.scenario)
 
     if args.accuracy:
         settings.mode = lg.TestMode.AccuracyOnly

diff --git a/language/llama3.1-405b/user.conf b/language/llama3.1-405b/user.conf
@@ -10,4 +10,4 @@
 *.Server.min_duration = 120000
 *.Server.min_query_count = 100
 
-llama3-1-405b.Server.sample_concatenate_permutation = 1
+llama3_1-405b.Server.sample_concatenate_permutation = 1
diff --git a/loadgen/mlperf.conf b/loadgen/mlperf.conf
@@ -14,7 +14,7 @@ dlrm-v2.*.performance_sample_count_override = 204800
 rnnt.*.performance_sample_count_override = 2513
 gptj.*.performance_sample_count_override = 13368
 llama2-70b.*.performance_sample_count_override = 24576
-llama3-1-405b.*.performance_sample_count_override = 8313
+llama3_1-405b.*.performance_sample_count_override = 8313
 stable-diffusion-xl.*.performance_sample_count_override = 5000
 rgat.*.performance_sample_count_override = 788379
 # set to 0 to let entire sample set to be performance sample
@@ -46,7 +46,7 @@ retinanet.MultiStream.target_latency = 528
 gptj.*.sample_concatenate_permutation = 1
 llama2-70b.*.sample_concatenate_permutation = 1
 mixtral-8x7b.*.sample_concatenate_permutation = 1
-llama3-1-405b.*.sample_concatenate_permutation = 1
+llama3_1-405b.*.sample_concatenate_permutation = 1
 
 *.Server.target_latency = 10
 *.Server.target_latency_percentile = 99
@@ -63,11 +63,11 @@ stable-diffusion-xl.Server.target_latency = 20000
 # Benchmarks that measure token latencies
 llama2-70b.*.use_token_latencies = 1
 mixtral-8x7b.*.use_token_latencies = 1
-llama3-1-405b.*.use_token_latencies = 1
+llama3_1-405b.*.use_token_latencies = 1
 # gptj benchmark infers token latencies
 gptj.*.infer_token_latencies = 1
 gptj.*.token_latency_scaling_factor = 69
-# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3-1-405b benchmark therefore target_latency = 0
+# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3_1-405b benchmark therefore target_latency = 0
 llama2-70b.Server.target_latency = 0
 llama2-70b.Server.ttft_latency = 2000
 llama2-70b.Server.tpot_latency = 200
@@ -76,9 +76,9 @@ mixtral-8x7b.Server.target_latency = 0
 mixtral-8x7b.Server.ttft_latency = 2000
 mixtral-8x7b.Server.tpot_latency = 200
 
-llama3-1-405b.Server.target_latency = 0
-llama3-1-405b.Server.ttft_latency = 6000
-llama3-1-405b.Server.tpot_latency = 175
+llama3_1-405b.Server.target_latency = 0
+llama3_1-405b.Server.ttft_latency = 6000
+llama3_1-405b.Server.tpot_latency = 175
 
 *.Offline.target_latency_percentile = 90
 *.Offline.min_duration = 600000
@@ -97,7 +97,7 @@ rnnt.Offline.min_query_count = 2513
 3d-unet.Offline.min_query_count = 43
 stable-diffusion-xl.Offline.min_query_count = 5000
 llama2-70b.Offline.min_query_count = 24576
-llama3-1-405b.Offline.min_query_count = 8313
+llama3_1-405b.Offline.min_query_count = 8313
 mixtral-8x7b.Offline.min_query_count = 15000
 rgat.Offline.min_query_count = 788379
 

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
@@ -194,7 +194,7 @@
             "ssd-resnet34": "retinanet",
             "mobilenet": "resnet",
             "resnet50": "resnet",
-            "llama3-1-405b": "llama3.1-405b"
+            "llama3_1-405b": "llama3.1-405b"
         },
         "seeds": {
             "qsl_rng_seed": 3066443479025735752,