|
18 | 18 | # |
19 | 19 | import gc |
20 | 20 | import multiprocessing |
21 | | -import signal |
22 | | -import subprocess |
23 | 21 | import sys |
24 | | -import time |
25 | 22 | from multiprocessing import Queue |
26 | 23 |
|
27 | 24 | import lm_eval |
28 | 25 | import pytest |
29 | | -import requests |
30 | 26 | import torch |
31 | 27 |
|
32 | 28 | SERVER_HOST = "127.0.0.1" |
|
36 | 32 |
|
37 | 33 | # pre-trained model path on Hugging Face. |
38 | 34 | # Qwen/Qwen2.5-0.5B-Instruct: accuracy test for DP. |
39 | | -# Qwen/Qwen3-30B-A3B: accuracy test for EP. |
| 35 | +# Qwen/Qwen3-30B-A3B: accuracy test for EP and DP. |
40 | 36 | # deepseek-ai/DeepSeek-V2-Lite: accuracy test for TP. |
41 | 37 | MODEL_NAME = ["Qwen/Qwen3-30B-A3B", "deepseek-ai/DeepSeek-V2-Lite"] |
42 | 38 |
|
@@ -145,58 +141,27 @@ def test_lm_eval_accuracy(monkeypatch: pytest.MonkeyPatch, model): |
145 | 141 | f"Expected: {EXPECTED_VALUE[model]}±{RTOL} | Measured: {result}" |
146 | 142 |
|
147 | 143 |
|
148 | | -@pytest.mark.parametrize("max_tokens", [10]) |
149 | | -@pytest.mark.parametrize("model", ["Qwen/Qwen2.5-0.5B-Instruct"]) |
150 | | -def test_lm_eval_accuracy_dp(model, max_tokens): |
151 | | - log_file = open("accuracy_pd.log", "a+") |
152 | | - cmd = [ |
153 | | - "vllm", "serve", model, "--max_model_len", "4096", |
154 | | - "--tensor_parallel_size", "2", "--data_parallel_size", "2" |
155 | | - ] |
156 | | - server_proc = subprocess.Popen(cmd, |
157 | | - stdout=log_file, |
158 | | - stderr=subprocess.DEVNULL) |
| 144 | +DP_DENSCE_MODEL = ["Qwen/Qwen2.5-0.5B-Instruct"] |
| 145 | +DP_MOE_MOEDL = ["Qwen/Qwen3-30B-A3B"] |
159 | 146 |
|
160 | | - try: |
161 | | - for _ in range(300): |
162 | | - try: |
163 | | - r = requests.get(HEALTH_URL, timeout=1) |
164 | | - if r.status_code == 200: |
165 | | - break |
166 | | - except requests.exceptions.RequestException: |
167 | | - pass |
168 | | - time.sleep(1) |
169 | | - else: |
170 | | - log_file.flush() |
171 | | - log_file.seek(0) |
172 | | - log_content = log_file.read() |
173 | | - pytest.fail( |
174 | | - f"vLLM serve did not become healthy after 300s: {HEALTH_URL}\n" |
175 | | - f"==== vLLM Serve Log Start ===\n{log_content}\n==== vLLM Serve Log End ===" |
176 | | - ) |
177 | | - |
178 | | - prompt = "bejing is a" |
179 | | - payload = { |
180 | | - "prompt": prompt, |
181 | | - "max_tokens": max_tokens, |
182 | | - "sampling_params": { |
183 | | - "temperature": 0.0, |
184 | | - "top_p": 1.0, |
185 | | - "seed": 123 |
186 | | - } |
187 | | - } |
188 | | - resp = requests.post(COMPLETIONS_URL, json=payload, timeout=30) |
189 | | - resp.raise_for_status() |
190 | | - data = resp.json() |
| 147 | +DP_MORE_ARGS = { |
| 148 | + "Qwen/Qwen2.5-0.5B-Instruct": |
| 149 | + "tensor_parallel_size=2,data_parallel_size=2", |
| 150 | + "Qwen/Qwen3-30B-A3B": |
| 151 | + "tensor_parallel_size=2,data_parallel_size=2,enable_expert_parallel=True,max_model_len=1024,enforce_eager=True", |
| 152 | +} |
191 | 153 |
|
192 | | - generated = data["choices"][0]["text"].strip() |
193 | | - expected = "city in north china, it has many famous attractions" |
194 | | - assert generated == expected, f"Expected `{expected}`, got `{generated}`" |
195 | 154 |
|
196 | | - finally: |
197 | | - server_proc.send_signal(signal.SIGINT) |
198 | | - try: |
199 | | - server_proc.wait(timeout=10) |
200 | | - except subprocess.TimeoutExpired: |
201 | | - server_proc.kill() |
202 | | - server_proc.wait() |
| 155 | +@pytest.mark.parametrize("model", DP_DENSCE_MODEL) |
| 156 | +def test_lm_eval_accuracy_dp(model): |
| 157 | + result_queue: Queue[float] = multiprocessing.Queue() |
| 158 | + p = multiprocessing.Process(target=run_test, |
| 159 | + args=(result_queue, model, |
| 160 | + MAX_MODEL_LEN[model], MODEL_TYPE[model], |
| 161 | + DP_MORE_ARGS[model])) |
| 162 | + p.start() |
| 163 | + p.join() |
| 164 | + result = result_queue.get() |
| 165 | + print(result) |
| 166 | + assert (EXPECTED_VALUE[model] - RTOL < result < EXPECTED_VALUE[model] + RTOL), \ |
| 167 | + f"Expected: {EXPECTED_VALUE[model]}±{RTOL} | Measured: {result}" |
0 commit comments