File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed
vllm/model_executor/model_loader Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -115,10 +115,16 @@ def model(x):
115115
116116
117117@fork_new_process_for_each_test
118- def test_end_to_end ():
118+ @pytest .mark .parametrize (
119+ "model" ,
120+ [
121+ "meta-llama/Llama-3.2-1B" , # sleep mode with safetensors
122+ "facebook/opt-125m" # sleep mode with pytorch checkpoint
123+ ])
124+ def test_end_to_end (model ):
119125 free , total = torch .cuda .mem_get_info ()
120126 used_bytes_baseline = total - free # in case other process is running
121- llm = LLM ("meta-llama/Llama-3.2-1B" , enable_sleep_mode = True )
127+ llm = LLM (model , enable_sleep_mode = True )
122128 prompt = "How are you?"
123129 sampling_params = SamplingParams (temperature = 0 , max_tokens = 10 )
124130 output = llm .generate (prompt , sampling_params )
Original file line number Diff line number Diff line change @@ -462,7 +462,6 @@ def pt_weights_iterator(
462462 state = torch .load (bin_file , map_location = "cpu" , weights_only = True )
463463 yield from state .items ()
464464 del state
465- torch .cuda .empty_cache ()
466465
467466
468467def get_gguf_extra_tensor_names (
You can’t perform that action at this time.
0 commit comments