Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/benchmarks/language/scc25_guide/scc25.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ or supporting multi-node execution) useful for the community and [MLCommons](htt

{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "reference", fixed_scenarios=["Offline"], categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}

{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.99", "reference", fixed_scenarios=["Offline"], categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "reference", fixed_scenarios=["Offline"], categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}

=== "Nvidia"
## Nvidia MLPerf Implementation
Expand Down
6 changes: 1 addition & 5 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def mlperf_inference_implementation_readme(

elif implementation == "nvidia":
if model in ["retinanet", "resnet50",
"3d-unet-99", "3d-unet-99.9"]:
"3d-unet-99", "3d-unet-99.9", "llama2-70b-99", "llama2-70b-99.9"]:
code_version = "r5.1-dev"
if model in ["mixtral-8x7b"]:
return pre_space + " WIP"
Expand Down Expand Up @@ -594,9 +594,6 @@ def get_docker_info(spaces, model, implementation,
if implementation.lower() == "nvidia":
info += f"{pre_space} - Default batch size is assigned based on [GPU memory](https://github.com/mlcommons/cm4mlops/blob/dd0c35856969c68945524d5c80414c615f5fe42c/script/app-mlperf-inference-nvidia/_cm.yaml#L1129) or the [specified GPU](https://github.com/mlcommons/cm4mlops/blob/dd0c35856969c68945524d5c80414c615f5fe42c/script/app-mlperf-inference-nvidia/_cm.yaml#L1370). Please click more option for *docker launch* or *run command* to see how to specify the GPU name.\n\n"
info += f"{pre_space} - When run with `--all_models=yes`, all the benchmark models of NVIDIA implementation can be executed within the same container.\n\n"
if "llama2" in model.lower():
info += f"{pre_space} - The dataset for NVIDIA's implementation of Llama2 is not publicly available. The user must fill [this](https://docs.google.com/forms/d/e/1FAIpQLSc_8VIvRmXM3I8KQaYnKf7gy27Z63BBoI_I1u02f4lw6rBp3g/viewform?pli=1&fbzx=-8842630989397184967) form and be verified as a MLCommons member to access the dataset.\n\n"
info += f"{pre_space} - `PATH_TO_PICKE_FILE` should be replaced with path to the downloaded pickle file.\n\n"
else:
if model == "sdxl":
info += f"\n{pre_space}!!! tip\n\n"
Expand Down Expand Up @@ -742,7 +739,6 @@ def mlperf_inference_run_command(
if "llama2-70b" in model.lower():
if implementation == "nvidia":
docker_cmd_suffix += f" \\\n{pre_space} --tp_size=2"
docker_cmd_suffix += f" \\\n{pre_space} --nvidia_llama2_dataset_file_path=<PATH_TO_PICKLE_FILE>"
elif implementation == "neuralmagic":
docker_cmd_suffix += (
f" \\\n{pre_space} --api_server=http://localhost:8000"
Expand Down