Skip to content

Commit

Permalink
Set mlperf.conf name to llama3_1-405b
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 committed Dec 17, 2024
1 parent 097f591 commit 014ce04
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 12 deletions.
4 changes: 2 additions & 2 deletions language/llama3.1-405b/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def main():
settings = lg.TestSettings()
settings.scenario = scenario_map[args.scenario.lower()]
# mlperf.conf is automatically loaded by the loadgen
# settings.FromConfig(args.mlperf_conf, "llama3-1-405b", args.scenario)
settings.FromConfig(args.user_conf, "llama3-1-405b", args.scenario)
# settings.FromConfig(args.mlperf_conf, "llama3_1-405b", args.scenario)
settings.FromConfig(args.user_conf, "llama3_1-405b", args.scenario)

if args.accuracy:
settings.mode = lg.TestMode.AccuracyOnly
Expand Down
2 changes: 1 addition & 1 deletion language/llama3.1-405b/user.conf
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
*.Server.min_duration = 120000
*.Server.min_query_count = 100

llama3-1-405b.Server.sample_concatenate_permutation = 1
llama3_1-405b.Server.sample_concatenate_permutation = 1
16 changes: 8 additions & 8 deletions loadgen/mlperf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dlrm-v2.*.performance_sample_count_override = 204800
rnnt.*.performance_sample_count_override = 2513
gptj.*.performance_sample_count_override = 13368
llama2-70b.*.performance_sample_count_override = 24576
llama3-1-405b.*.performance_sample_count_override = 8313
llama3_1-405b.*.performance_sample_count_override = 8313
stable-diffusion-xl.*.performance_sample_count_override = 5000
rgat.*.performance_sample_count_override = 788379
# set to 0 to let entire sample set to be performance sample
Expand Down Expand Up @@ -46,7 +46,7 @@ retinanet.MultiStream.target_latency = 528
gptj.*.sample_concatenate_permutation = 1
llama2-70b.*.sample_concatenate_permutation = 1
mixtral-8x7b.*.sample_concatenate_permutation = 1
llama3-1-405b.*.sample_concatenate_permutation = 1
llama3_1-405b.*.sample_concatenate_permutation = 1

*.Server.target_latency = 10
*.Server.target_latency_percentile = 99
Expand All @@ -63,11 +63,11 @@ stable-diffusion-xl.Server.target_latency = 20000
# Benchmarks that measure token latencies
llama2-70b.*.use_token_latencies = 1
mixtral-8x7b.*.use_token_latencies = 1
llama3-1-405b.*.use_token_latencies = 1
llama3_1-405b.*.use_token_latencies = 1
# gptj benchmark infers token latencies
gptj.*.infer_token_latencies = 1
gptj.*.token_latency_scaling_factor = 69
# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3-1-405b benchmark therefore target_latency = 0
# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3_1-405b benchmark therefore target_latency = 0
llama2-70b.Server.target_latency = 0
llama2-70b.Server.ttft_latency = 2000
llama2-70b.Server.tpot_latency = 200
Expand All @@ -76,9 +76,9 @@ mixtral-8x7b.Server.target_latency = 0
mixtral-8x7b.Server.ttft_latency = 2000
mixtral-8x7b.Server.tpot_latency = 200

llama3-1-405b.Server.target_latency = 0
llama3-1-405b.Server.ttft_latency = 6000
llama3-1-405b.Server.tpot_latency = 175
llama3_1-405b.Server.target_latency = 0
llama3_1-405b.Server.ttft_latency = 6000
llama3_1-405b.Server.tpot_latency = 175

*.Offline.target_latency_percentile = 90
*.Offline.min_duration = 600000
Expand All @@ -97,7 +97,7 @@ rnnt.Offline.min_query_count = 2513
3d-unet.Offline.min_query_count = 43
stable-diffusion-xl.Offline.min_query_count = 5000
llama2-70b.Offline.min_query_count = 24576
llama3-1-405b.Offline.min_query_count = 8313
llama3_1-405b.Offline.min_query_count = 8313
mixtral-8x7b.Offline.min_query_count = 15000
rgat.Offline.min_query_count = 788379

Expand Down
2 changes: 1 addition & 1 deletion tools/submission/submission_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@
"ssd-resnet34": "retinanet",
"mobilenet": "resnet",
"resnet50": "resnet",
"llama3-1-405b": "llama3.1-405b"
"llama3_1-405b": "llama3.1-405b"
},
"seeds": {
"qsl_rng_seed": 3066443479025735752,
Expand Down

0 comments on commit 014ce04

Please sign in to comment.