File tree Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Original file line number Diff line number Diff line change @@ -127,6 +127,7 @@ jobs:
127127 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
128128 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
129129 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
130+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
130131 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
131132 fi
132133
@@ -157,5 +158,6 @@ jobs:
157158 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
158159 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
159160 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
161+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
160162 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
161163 fi
Original file line number Diff line number Diff line change 2323import os
2424from unittest .mock import patch
2525
26- import vllm # noqa: F401
26+ from modelscope import snapshot_download # type: ignore
2727from vllm import SamplingParams
2828
2929from tests .conftest import VllmRunner
@@ -95,3 +95,20 @@ def test_models_distributed_DeepSeek_dbo():
9595 distributed_executor_backend = "mp" ,
9696 ) as vllm_model :
9797 vllm_model .generate (example_prompts , sampling_params )
98+
99+
100+ def test_models_distributed_DeepSeek_W8A8 ():
101+ example_prompts = [
102+ "Hello, my name is" ,
103+ ]
104+ max_tokens = 5
105+
106+ with VllmRunner (
107+ snapshot_download ("vllm-ascend/DeepSeek-V2-Lite-W8A8" ),
108+ max_model_len = 8192 ,
109+ enforce_eager = True ,
110+ dtype = "auto" ,
111+ tensor_parallel_size = 4 ,
112+ quantization = "ascend" ,
113+ ) as vllm_model :
114+ vllm_model .generate_greedy (example_prompts , max_tokens )
You can’t perform that action at this time.
0 commit comments