fix

sgl-project · Jul 3, 2024 · d530a1c · d530a1c
1 parent c7709d3
commit d530a1c
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 1 deletion.
diff --git a/docs/test_process.md b/docs/test_process.md
@@ -1,8 +1,13 @@
 ## SRT Unit Tests
 
 ### Latency Alignment
+Make sure the changes will not degrade performance on following benchmarks
 ```
 python -m sglang.bench_latency --model-path meta-llama/Llama-2-7b-chat-hf --mem-fraction-static 0.8 --batch 32 --input-len 512 --output-len 256
+python -m sglang.bench_latency --model-path meta-llama/Llama-2-7b-chat-hf --mem-fraction-static 0.8 --batch 1 --input-len 512 --output-len 256
+
+python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-70B --tp 8 --mem-fraction-static 0.6 --batch 32 --input-len 8192 --output-len 1
+
 ```
 
 ### High-level API

diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py
@@ -151,7 +151,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
         enable_show_time_cost()
     if server_args.disable_disk_cache:
         disable_cache()
-    if server_args.enable_flashinfer:
+    if server_args.disable_flashinfer:
         assert_pkg_version("flashinfer", "0.0.7")
     if server_args.chat_template:
         # TODO: replace this with huggingface transformers template