@@ -350,13 +350,13 @@ def test_eagle_disable_queue(vllm_runner, common_llm_kwargs,
350350        "dtype" : "float16" , 
351351
352352        # Main model  
353-         "model_name" : "meta-llama /Llama-2-7b-chat-hf"  
353+         "model_name" : "vllm-ascend /Llama-2-7b-chat-hf"  
354354    }]) 
355355@pytest .mark .parametrize ("per_test_common_llm_kwargs" , [{}]) 
356356@pytest .mark .parametrize ("baseline_llm_kwargs" , [{}]) 
357357@pytest .mark .parametrize ("test_llm_kwargs" , [ 
358358    { 
359-         "speculative_model" : "yuhuili /EAGLE-llama2-chat-7B" , 
359+         "speculative_model" : "vllm-ascend /EAGLE-llama2-chat-7B" , 
360360        "num_speculative_tokens" : MAX_SPEC_TOKENS , 
361361    }, 
362362]) 
@@ -368,21 +368,25 @@ def test_eagle_disable_queue(vllm_runner, common_llm_kwargs,
368368    ]) 
369369@pytest .mark .parametrize ("batch_size" , [1 , 5 ]) 
370370@pytest .mark .parametrize ("seed" , [1 ]) 
371- def  test_llama2_eagle_e2e_greedy_correctness (vllm_runner , common_llm_kwargs ,
371+ def  test_llama2_eagle_e2e_greedy_correctness (monkeypatch : pytest .MonkeyPatch ,
372+                                              vllm_runner , common_llm_kwargs ,
372373                                             per_test_common_llm_kwargs ,
373374                                             baseline_llm_kwargs ,
374375                                             test_llm_kwargs , batch_size : int ,
375376                                             output_len : int , seed : int ):
376377
377-     run_equality_correctness_test (vllm_runner ,
378-                                   common_llm_kwargs ,
379-                                   per_test_common_llm_kwargs ,
380-                                   baseline_llm_kwargs ,
381-                                   test_llm_kwargs ,
382-                                   batch_size ,
383-                                   output_len ,
384-                                   seed ,
385-                                   temperature = 0.0 )
378+     # TODO: it is a wrong way to use modelscope. 
379+     with  monkeypatch .context () as  m :
380+         m .setenv ("VLLM_USE_MODELSCOPE" , "True" )
381+         run_equality_correctness_test (vllm_runner ,
382+                                       common_llm_kwargs ,
383+                                       per_test_common_llm_kwargs ,
384+                                       baseline_llm_kwargs ,
385+                                       test_llm_kwargs ,
386+                                       batch_size ,
387+                                       output_len ,
388+                                       seed ,
389+                                       temperature = 0.0 )
386390
387391
388392@pytest .mark .skipif (True , reason = "Open it when CI could use modelscope" ) 
@@ -399,13 +403,13 @@ def test_llama2_eagle_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
399403        "dtype" : "float16" , 
400404
401405        # Main model  
402-         "model_name" : "meta-llama /Meta-Llama-3-8B-Instruct"  
406+         "model_name" : "vllm-ascend /Meta-Llama-3-8B-Instruct"  
403407    }]) 
404408@pytest .mark .parametrize ("per_test_common_llm_kwargs" , [{}]) 
405409@pytest .mark .parametrize ("baseline_llm_kwargs" , [{}]) 
406410@pytest .mark .parametrize ("test_llm_kwargs" , [ 
407411    { 
408-         "speculative_model" : "yuhuili /EAGLE-LLaMA3-Instruct-8B" , 
412+         "speculative_model" : "vllm-ascend /EAGLE-LLaMA3-Instruct-8B" , 
409413        "num_speculative_tokens" : MAX_SPEC_TOKENS , 
410414    }, 
411415]) 
@@ -417,21 +421,25 @@ def test_llama2_eagle_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
417421    ]) 
418422@pytest .mark .parametrize ("batch_size" , [1 , 5 ]) 
419423@pytest .mark .parametrize ("seed" , [1 ]) 
420- def  test_llama3_eagle_e2e_greedy_correctness (vllm_runner , common_llm_kwargs ,
424+ def  test_llama3_eagle_e2e_greedy_correctness (monkeypatch : pytest .MonkeyPatch ,
425+                                              vllm_runner , common_llm_kwargs ,
421426                                             per_test_common_llm_kwargs ,
422427                                             baseline_llm_kwargs ,
423428                                             test_llm_kwargs , batch_size : int ,
424429                                             output_len : int , seed : int ):
425430
426-     run_equality_correctness_test (vllm_runner ,
427-                                   common_llm_kwargs ,
428-                                   per_test_common_llm_kwargs ,
429-                                   baseline_llm_kwargs ,
430-                                   test_llm_kwargs ,
431-                                   batch_size ,
432-                                   output_len ,
433-                                   seed ,
434-                                   temperature = 0.0 )
431+     # TODO: it is a wrong way to use modelscope. 
432+     with  monkeypatch .context () as  m :
433+         m .setenv ("VLLM_USE_MODELSCOPE" , "True" )
434+         run_equality_correctness_test (vllm_runner ,
435+                                       common_llm_kwargs ,
436+                                       per_test_common_llm_kwargs ,
437+                                       baseline_llm_kwargs ,
438+                                       test_llm_kwargs ,
439+                                       batch_size ,
440+                                       output_len ,
441+                                       seed ,
442+                                       temperature = 0.0 )
435443
436444
437445@pytest .mark .parametrize ( 
0 commit comments