Merge pull request #30 from anandhu-eng/amdenable

WIP - amd llama2 implementation
GATEOverflow · Oct 21, 2024 · 2bf7aac · 2bf7aac
2 parents fa0eeca + 7eee072
commit 2bf7aac
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 0 deletions.
diff --git a/docs/benchmarks/language/llama2-70b.md b/docs/benchmarks/language/llama2-70b.md
@@ -25,3 +25,10 @@ hide:
 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "neuralmagic") }}
 
 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "neuralmagic") }}
+
+=== "AMD"
+    ## AMD MLPerf Implementation
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "amd") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "amd") }}
diff --git a/main.py b/main.py
@@ -43,6 +43,11 @@ def mlperf_inference_implementation_readme(spaces, model, implementation, *, imp
             devices = [ "CUDA" ]
             frameworks = [ "TensorRT" ]
 
+        elif implementation == "amd":
+            devices = [ "cuda" ]
+            frameworks = [ "pytorch" ]
+            execution_envs.remove("Docker")
+
         elif implementation == "neuralmagic":
             devices = [ "CUDA" ]
             frameworks = [ "pytorch" ]