@@ -191,21 +191,24 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
191191
192192
193193@create_new_process_for_each_test ()
194- def test_serialize_and_deserialize_lora (tmp_path , sql_lora_files ,
195- sql_lora_huggingface_id ):
194+ def test_tp2_serialize_and_deserialize_lora (tmp_path , sql_lora_files ,
195+ sql_lora_huggingface_id ):
196196
197197 # Run the tensorizing of the LoRA adapter and the model in a subprocess
198198 # to guarantee cleanup
199199
200+ tp_size = 2
201+ model_name = "model-rank-%03d.tensors"
202+
200203 model_ref = MODEL_PATH
201204 lora_path = sql_lora_huggingface_id
202205 suffix = "test"
203206 try :
204207 result = subprocess .run ([
205208 sys .executable ,
206209 f"{ VLLM_PATH } /examples/other/tensorize_vllm_model.py" , "--model" ,
207- MODEL_PATH , "--lora-path" , lora_path , "serialize " ,
208- "--serialized-directory" ,
210+ MODEL_PATH , "--lora-path" , lora_path , "--tensor-parallel-size " ,
211+ str ( tp_size ), "serialize" , "--serialized-directory" ,
209212 str (tmp_path ), "--suffix" , suffix
210213 ],
211214 check = True ,
@@ -219,7 +222,7 @@ def test_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
219222
220223 print ("STDOUT:\n " , result .stdout )
221224
222- model_uri = tmp_path / "vllm" / model_ref / suffix / "model.tensors"
225+ model_uri = tmp_path / "vllm" / model_ref / suffix / model_name
223226 tensorizer_config = TensorizerConfig (tensorizer_uri = str (model_uri ))
224227 tensorizer_config .lora_dir = tensorizer_config .tensorizer_dir
225228
@@ -229,8 +232,8 @@ def test_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
229232 enforce_eager = True ,
230233 model_loader_extra_config = tensorizer_config ,
231234 max_num_seqs = 13 ,
232- max_loras = 2 ,
233- gpu_memory_utilization = 0.3 )
235+ tensor_parallel_size = 2 ,
236+ max_loras = 2 )
234237
235238 tensorizer_config_dict = tensorizer_config .to_dict ()
236239
0 commit comments