Skip to content

Commit

Permalink
Expose bfloat16 and float16 options for sdxl
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh committed Sep 19, 2024
1 parent 0511c95 commit e8b2adc
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,11 @@ def get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scen
f_pre_space += ""
if scenario == "Server" or (scenario == "All Scenarios" and "Server" in scenarios):
extra_content += f"{f_pre_space} * `<SERVER_TARGET_QPS>` must be determined manually. It is usually around 80% of the Offline QPS, but on some systems, it can drop below 50%. If a higher value is specified, the latency constraint will not be met, and the run will be considered invalid.\n"
if "gptj" in model and device == "cuda" and implementation == "reference":
extra_content += f"{f_pre_space} * `--precision=[float16|bfloat16]` can help run on GPUs with less RAM \n"
if implementation == "reference" and model in [ "sdxl", "gptj-99", "gptj-99.9" ] and device in ["cuda", "rocm"]:
extra_content += f"{f_pre_space} * `--precision=float16` can help run on GPUs with less RAM \n"
if implementation == "reference" and model in [ "sdxl", "gptj-99", "gptj-99.9" ] and device in ["cpu"]:
extra_content += f"{f_pre_space} * `--precision=bfloat16` can help run on GPUs with less RAM \n"
if "gptj" in model and implementation == "reference":
extra_content += f"{f_pre_space} * `--beam-size=1` Beam size of 4 is mandatory for a closed division submission but reducing the beam size can help in running the model on GPUs with lower device memory\n"
if extra_content:
extra_content = f"{f_pre_space}!!! tip\n\n" + extra_content
Expand Down

0 comments on commit e8b2adc

Please sign in to comment.