Skip to content

Commit 4e0c8c2

Browse files
committed
tests: Use tp=2 for LoRA tensorizer test
Uses tp = 2 to get this test passing on L4s, while also unit testing sharded tensorizer support with LoRA. Signed-off-by: Sanger Steel <sangersteel@gmail.com>
1 parent b649648 commit 4e0c8c2

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

tests/lora/test_llama_tp.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -191,21 +191,24 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
191191

192192

193193
@create_new_process_for_each_test()
194-
def test_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
195-
sql_lora_huggingface_id):
194+
def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
195+
sql_lora_huggingface_id):
196196

197197
# Run the tensorizing of the LoRA adapter and the model in a subprocess
198198
# to guarantee cleanup
199199

200+
tp_size = 2
201+
model_name = "model-rank-%03d.tensors"
202+
200203
model_ref = MODEL_PATH
201204
lora_path = sql_lora_huggingface_id
202205
suffix = "test"
203206
try:
204207
result = subprocess.run([
205208
sys.executable,
206209
f"{VLLM_PATH}/examples/other/tensorize_vllm_model.py", "--model",
207-
MODEL_PATH, "--lora-path", lora_path, "serialize",
208-
"--serialized-directory",
210+
MODEL_PATH, "--lora-path", lora_path, "--tensor-parallel-size",
211+
str(tp_size), "serialize", "--serialized-directory",
209212
str(tmp_path), "--suffix", suffix
210213
],
211214
check=True,
@@ -219,7 +222,7 @@ def test_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
219222

220223
print("STDOUT:\n", result.stdout)
221224

222-
model_uri = tmp_path / "vllm" / model_ref / suffix / "model.tensors"
225+
model_uri = tmp_path / "vllm" / model_ref / suffix / model_name
223226
tensorizer_config = TensorizerConfig(tensorizer_uri=str(model_uri))
224227
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
225228

@@ -229,8 +232,8 @@ def test_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
229232
enforce_eager=True,
230233
model_loader_extra_config=tensorizer_config,
231234
max_num_seqs=13,
232-
max_loras=2,
233-
gpu_memory_utilization=0.3)
235+
tensor_parallel_size=2,
236+
max_loras=2)
234237

235238
tensorizer_config_dict = tensorizer_config.to_dict()
236239

0 commit comments

Comments
 (0)