Expose bfloat16 and float16 options for sdxl

GATEOverflow · Sep 19, 2024 · e8b2adc · e8b2adc
1 parent 0511c95
commit e8b2adc
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/main.py b/main.py
@@ -369,8 +369,11 @@ def get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scen
         f_pre_space += ""
         if scenario == "Server" or (scenario == "All Scenarios" and "Server" in scenarios):
             extra_content += f"{f_pre_space}    * `<SERVER_TARGET_QPS>` must be determined manually. It is usually around 80% of the Offline QPS, but on some systems, it can drop below 50%. If a higher value is specified, the latency constraint will not be met, and the run will be considered invalid.\n"
-        if "gptj" in model and device == "cuda" and implementation == "reference":
-            extra_content += f"{f_pre_space}    * `--precision=[float16|bfloat16]` can help run on GPUs with less RAM \n"
+        if implementation == "reference" and model in [ "sdxl", "gptj-99", "gptj-99.9" ] and device in ["cuda", "rocm"]:
+            extra_content += f"{f_pre_space}    * `--precision=float16` can help run on GPUs with less RAM \n"
+        if implementation == "reference" and model in [ "sdxl", "gptj-99", "gptj-99.9" ] and device in ["cpu"]:
+            extra_content += f"{f_pre_space}    * `--precision=bfloat16` can help run on GPUs with less RAM \n"
+        if "gptj" in model and implementation == "reference":
             extra_content += f"{f_pre_space}    * `--beam-size=1` Beam size of 4 is mandatory for a closed division submission but reducing the beam size can help in running the model on GPUs with lower device memory\n"
         if extra_content:
             extra_content = f"{f_pre_space}!!! tip\n\n" + extra_content