File tree Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Original file line number Diff line number Diff line change @@ -188,6 +188,7 @@ jobs:
188188 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
189189 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
190190 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
191+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
191192 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
192193 fi
193194
@@ -218,5 +219,6 @@ jobs:
218219 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
219220 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
220221 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
222+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
221223 VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
222224 fi
Original file line number Diff line number Diff line change 2323import os
2424from unittest .mock import patch
2525
26- import vllm # noqa: F401
26+ from modelscope import snapshot_download # type: ignore
2727from vllm import SamplingParams
2828
2929from tests .conftest import VllmRunner
@@ -95,3 +95,20 @@ def test_models_distributed_DeepSeek_dbo():
9595 distributed_executor_backend = "mp" ,
9696 ) as vllm_model :
9797 vllm_model .generate (example_prompts , sampling_params )
98+
99+
100+ def test_models_distributed_DeepSeek_W8A8 ():
101+ example_prompts = [
102+ "Hello, my name is" ,
103+ ]
104+ max_tokens = 5
105+
106+ with VllmRunner (
107+ snapshot_download ("vllm-ascend/DeepSeek-V2-Lite-W8A8" ),
108+ max_model_len = 8192 ,
109+ enforce_eager = True ,
110+ dtype = "auto" ,
111+ tensor_parallel_size = 4 ,
112+ quantization = "ascend" ,
113+ ) as vllm_model :
114+ vllm_model .generate_greedy (example_prompts , max_tokens )
You can’t perform that action at this time.
0 commit comments