From b86237ca3c2cce5497f11f5e07ad944ed0b8f69a Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Tue, 18 Jun 2024 16:34:59 -0500 Subject: [PATCH] Update for v4.1: Add new seeds + update checker + update compliance test table --- compliance/nvidia/README.md | 5 +- mlperf.conf | 12 +- text_to_image/tools/sample_ids.py | 2 +- text_to_image/tools/sample_ids.txt | 20 +-- tools/submission/submission_checker.py | 234 +++++++++++++++++++++++-- 5 files changed, 239 insertions(+), 34 deletions(-) diff --git a/compliance/nvidia/README.md b/compliance/nvidia/README.md index e3a751e98..630676180 100755 --- a/compliance/nvidia/README.md +++ b/compliance/nvidia/README.md @@ -37,5 +37,6 @@ The `run_verification.py` found in each test directory will copy the test files | 3d-unet | [TEST01](./TEST01/), [TEST05](./TEST05/) | | rnnt | [TEST01](./TEST01/), [TEST05](./TEST05/) | | gpt-j | - | -| stable-diffusion-xl | - | -| Llama2-70b | [TEST06]() | +| stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/), [TEST05](./TEST05/) | +| Llama2-70b | [TEST06](./TEST06/) | +| mixtral-8x7b | [TEST06](./TEST06/) | diff --git a/mlperf.conf b/mlperf.conf index 5a3c78b22..6487bfc23 100644 --- a/mlperf.conf +++ b/mlperf.conf @@ -19,13 +19,13 @@ stable-diffusion-xl.*.performance_sample_count_override = 5000 3d-unet.*.performance_sample_count_override = 0 # Set seeds. The seeds will be distributed two weeks before the submission. -*.*.qsl_rng_seed = 13281865557512327830 -*.*.sample_index_rng_seed = 198141574272810017 -*.*.schedule_rng_seed = 7575108116881280410 +*.*.qsl_rng_seed = 3066443479025735752 +*.*.sample_index_rng_seed = 10688027786191513374 +*.*.schedule_rng_seed = 14962580496156340209 # Set seeds for TEST_05. The seeds will be distributed two weeks before the submission. -*.*.test05_qsl_rng_seed = 2376919268182438552 -*.*.test05_sample_index_rng_seed = 11176391829184272374 -*.*.test05_schedule_rng_seed = 3911940905271271337 +*.*.test05_qsl_rng_seed = 16799458546791641818 +*.*.test05_sample_index_rng_seed = 5453809927556429288 +*.*.test05_schedule_rng_seed = 5435552105434836064 *.SingleStream.target_latency_percentile = 90 diff --git a/text_to_image/tools/sample_ids.py b/text_to_image/tools/sample_ids.py index e1d6effb4..8c440ec5b 100644 --- a/text_to_image/tools/sample_ids.py +++ b/text_to_image/tools/sample_ids.py @@ -16,7 +16,7 @@ def get_args(): "--n", type=int, default=10, help="Dataset download location" ) parser.add_argument( - "--seed", "-s", type=int, default=926019364, help="Dataset download location" + "--seed", "-s", type=int, default=633994880, help="Dataset download location" ) args = parser.parse_args() return args diff --git a/text_to_image/tools/sample_ids.txt b/text_to_image/tools/sample_ids.txt index 65c9f5641..8bf3d2be8 100644 --- a/text_to_image/tools/sample_ids.txt +++ b/text_to_image/tools/sample_ids.txt @@ -1,10 +1,10 @@ -4459 -4015 -2705 -1682 -4048 -4683 -3757 -1578 -3319 -95 \ No newline at end of file +4655 +2569 +1303 +109 +4509 +3009 +2179 +1826 +2094 +3340 \ No newline at end of file diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 451cd66b6..9263a5f68 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -186,6 +186,169 @@ "stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1} }, }, + "v4.1": { + "models": [ + "resnet", + "retinanet", + "bert-99", + "bert-99.9", + "dlrm-v2-99", + "dlrm-v2-99.9", + "3d-unet-99", + "3d-unet-99.9", + "gptj-99", + "gptj-99.9", + "llama2-70b-99", + "llama2-70b-99.9", + "stable-diffusion-xl", + "mixtral-8x7b" + ], + "required-scenarios-datacenter": { + "resnet": ["Server", "Offline"], + "retinanet": ["Server", "Offline"], + "bert-99": ["Server", "Offline"], + "bert-99.9": ["Server", "Offline"], + "dlrm-v2-99": ["Server", "Offline"], + "dlrm-v2-99.9": ["Server", "Offline"], + "3d-unet-99": ["Offline"], + "3d-unet-99.9": ["Offline"], + "gptj-99": ["Server", "Offline"], + "gptj-99.9": ["Server", "Offline"], + "llama2-70b-99": ["Server", "Offline"], + "llama2-70b-99.9": ["Server", "Offline"], + "stable-diffusion-xl": ["Server", "Offline"], + "mixtral-8x7b": ["Server", "Offline"] + }, + "optional-scenarios-datacenter": {}, + "required-scenarios-edge": { + "resnet": ["SingleStream", "MultiStream", "Offline"], + "retinanet": ["SingleStream", "MultiStream", "Offline"], + "bert-99": ["SingleStream", "Offline"], + "3d-unet-99": ["SingleStream", "Offline"], + "3d-unet-99.9": ["SingleStream", "Offline"], + "gptj-99": ["SingleStream", "Offline"], + "gptj-99.9": ["SingleStream", "Offline"], + "stable-diffusion-xl": ["SingleStream", "Offline"], + }, + "optional-scenarios-edge": {}, + "required-scenarios-datacenter-edge": { + "resnet": ["SingleStream", "Offline", "MultiStream", "Server"], + "retinanet": ["SingleStream", "Offline", "MultiStream", "Server"], + "bert-99": ["SingleStream", "Offline", "Server"], + "bert-99.9": ["Offline", "Server"], + "dlrm-v2-99": ["Offline", "Server"], + "dlrm-v2-99.9": ["Offline", "Server"], + "3d-unet-99": ["SingleStream", "Offline"], + "3d-unet-99.9": ["SingleStream", "Offline"], + "gptj-99": ["SingleStream", "Offline", "Server"], + "gptj-99.9": ["SingleStream", "Offline", "Server"], + "llama2-70b-99": ["Server", "Offline"], + "llama2-70b-99.9": ["Server", "Offline"], + "stable-diffusion-xl": ["SingleStream", "Offline", "Server"], + "mixtral-8x7b": ["SingleStream""Server", "Offline"] + }, + "optional-scenarios-datacenter-edge": {}, + "accuracy-target": { + "resnet": ("acc", 76.46 * 0.99), + "retinanet": ("mAP", 37.55 * 0.99), + "bert-99": ("F1", 90.874 * 0.99), + "bert-99.9": ("F1", 90.874 * 0.999), + "dlrm-v2-99": ("AUC", 80.31 * 0.99), + "dlrm-v2-99.9": ("AUC", 80.31 * 0.999), + "3d-unet-99": ("DICE", 0.86170 * 0.99), + "3d-unet-99.9": ("DICE", 0.86170 * 0.999), + "gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9), + "gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9), + "llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9), + "llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9), + "stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758), + # TODO: Mixtral metrics + # "mixtral-8x7b" : ("ROUGE1", X * 0.99, "ROUGE2", X * 0.99, "ROUGEL", X * 0.99, "TOKENS_PER_SAMPLE", X * 0.9, "gsm8k_accuracy": 73.78*0.99, "mbxp_accuracy": 60.12 * 0.99), + }, + "accuracy-upper-limit": { + "stable-diffusion-xl": ("CLIP_SCORE", 31.81331801, "FID_SCORE", 23.95007626), + "llama2-70b-99" : ("TOKENS_PER_SAMPLE", 294.45*1.1), + "llama2-70b-99.9" : ("TOKENS_PER_SAMPLE", 294.45*1.1) + # "mixtral-8x7b" :("TOKENS_PER_SAMPLE", X * 0.9) + }, + "performance-sample-count": { + "resnet": 1024, + "retinanet": 64, + "bert-99": 10833, + "bert-99.9": 10833, + "dlrm-v2-99": 204800, + "dlrm-v2-99.9": 204800, + "3d-unet-99": 43, + "3d-unet-99.9": 43, + "gptj-99": 13368, + "gptj-99.9": 13368, + "llama2-70b-99": 24576, + "llama2-70b-99.9": 24576, + "stable-diffusion-xl": 5000, + "mixtral-8x7b": 15000, + }, + # TODO: Update this list. + "model_mapping": { + # map model names to the official mlperf model class + "ssd-resnet34": "retinanet", + "mobilenet": "resnet", + "resnet50": "resnet" + }, + "seeds": { + # TODO: Update random seeds + "qsl_rng_seed": 3066443479025735752, + "sample_index_rng_seed": 10688027786191513374, + "schedule_rng_seed": 14962580496156340209, + }, + "test05_seeds": { + # TODO: Update random seeds + "qsl_rng_seed": 16799458546791641818, + "sample_index_rng_seed": 5453809927556429288, + "schedule_rng_seed": 5435552105434836064, + }, + "ignore_errors": [], + "latency-constraint": { + "resnet": {"Server": 15000000}, + "retinanet": {"Server": 100000000}, + "bert-99": {"Server": 130000000}, + "bert-99.9": {"Server": 130000000}, + "dlrm-v2-99": {"Server": 60000000}, + "dlrm-v2-99.9": {"Server": 60000000}, + "gptj-99": {"Server": 20000000000}, + "gptj-99.9": {"Server": 20000000000}, + "llama2-70b-99": {"Server": 20000000000}, + "llama2-70b-99.9": {"Server": 20000000000}, + "stable-diffusion-xl" : {"Server": 20000000000} + # TODO: Mixtral metrics + # "mixtral-8x7b" : {"Server": 20000000000} + }, + "min-queries": { + "resnet": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1, + }, + "retinanet": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1, + }, + "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "dlrm-v2-99": {"Server": 270336, "Offline": 1}, + "dlrm-v2-99.9": {"Server": 270336, "Offline": 1}, + "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, + "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "gptj-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "gptj-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "llama2-70b-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "llama2-70b-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + "mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, + }, + }, } VALID_DIVISIONS = ["open", "closed", "network"] @@ -221,6 +384,20 @@ "3319", "95" ] + }, + "v4.1": { + "images": [ + "4655", + "2569", + "1303", + "109", + "4509", + "3009", + "2179", + "1826", + "2094", + "3340" + ] } } } @@ -255,7 +432,8 @@ "gptj-99.9": 13368, "llama2-70b-99": 24576, "llama2-70b-99.9": 24576, - "stable-diffusion-xl": 5000 + "stable-diffusion-xl": 5000, + "mixtral-8x7b": 15000 } SCENARIO_MAPPING = { @@ -302,8 +480,8 @@ }, "v4.1": { "llama2-70b-99": { - "Offline": "result_tokens_per_second", - "Server": "result_completed_tokens_per_second", + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", }, "llama2-70b-99.9": { "Offline": "result_tokens_per_second", @@ -316,16 +494,33 @@ "gptj-99.9": { "Offline": "result_inferred_tokens_per_second", "Server": "result_inferred_completed_tokens_per_second", + }, + "mixtral-8x7b": { + "Offline": "result_tokens_per_second", + "Server": "result_completed_tokens_per_second", } } } -LLAMA2_LATENCY_LIMITS = { - # We might add interactive in the next round. Latency in ns - "conversational": { - "ttft": 2000 * 1000000, - "tpot": 200 * 1000000 - } +LLM_LATENCY_LIMITS = { + "llama2-70b-99":{ + "conversational": { + "ttft": 2000 * 1000000, + "tpot": 200 * 1000000 + } + }, + "llama2-70b-99.9":{ + "conversational": { + "ttft": 2000 * 1000000, + "tpot": 200 * 1000000 + } + }, + # "mixtral-8x7b":{ + # "conversational": { + # "ttft": 2000 * 1000000, + # "tpot": 200 * 1000000 + # } + # } } ACC_PATTERN = { @@ -799,13 +994,13 @@ def check_accuracy_dir(config, model, path, verbose): return is_valid, result_acc -def extra_check_llama2(mlperf_log, scenario): +def extra_check_llm(mlperf_log, scenario, model): if (mlperf_log["requested_use_token_latencies"]): if scenario == "Offline": # For offline no further checks are necessary return None, True else: - for constraint, limits in LLAMA2_LATENCY_LIMITS.items(): + for constraint, limits in LLM_LATENCY_LIMITS[model].items(): if mlperf_log["result_first_token_99.00_percentile_latency_ns"] < limits["ttft"] and mlperf_log["result_time_per_output_token_99.00_percentile_ns"] < limits["tpot"]: return constraint, True else: @@ -867,8 +1062,8 @@ def check_performance_dir( res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]]) - if model in ["llama2-70b-99", "llama2-70b-99.9"]: - llama_constraint, is_valid = extra_check_llama2(mlperf_log, scenario_fixed) + if model in ["llama2-70b-99", "llama2-70b-99.9", "mixtral-8x7b"]: + llama_constraint, is_valid = extra_check_llm(mlperf_log, scenario_fixed, model) latency_99_percentile = mlperf_log["result_99.00_percentile_latency_ns"] latency_mean = mlperf_log["result_mean_latency_ns"] @@ -2344,8 +2539,7 @@ def check_compliance_dir( "gptj-99.9", "llama2-70b-99", "llama2-70b-99.9", - "stable-diffusion-xl" - + "mixtral-8x7b" ]: test_list.remove("TEST04") @@ -2355,13 +2549,23 @@ def check_compliance_dir( "llama2-70b-99", "llama2-70b-99.9", "stable-diffusion-xl" + "mixtral-8x7b" ]: test_list.remove("TEST05") + + if model in [ + "gptj-99", + "gptj-99.9", + "llama2-70b-99", + "llama2-70b-99.9", + "mixtral-8x7b" + ]: test_list.remove("TEST01") if model in [ "llama2-70b-99", "llama2-70b-99.9", + "mixtral-8x7b" ]: test_list.append("TEST06")