2424import torch
2525from vllm import LLM , SamplingParams
2626
27- MODELS = ["deepseek-ai/DeepSeek-V2-Lite " ]
27+ MODELS = ["Qwen/Qwen2.5-0.5B-Instruct " ]
2828
2929
3030@pytest .mark .parametrize ("model" , MODELS )
@@ -34,34 +34,49 @@ def test_models(
3434 max_tokens : int ,
3535 monkeypatch : pytest .MonkeyPatch ,
3636) -> None :
37- return
38-
3937 prompts = "The president of the United States is"
4038
4139 sampling_params = SamplingParams (
4240 max_tokens = max_tokens ,
4341 temperature = 0.0 ,
4442 )
4543
46- vllm_model = LLM (model , long_prefill_token_threshold = 4 , enforce_eager = True )
47- output_chunked = vllm_model .generate (prompts , sampling_params )
48- logprobs_chunked = output_chunked .outputs [0 ].logprobs
44+ vllm_model = LLM (model ,
45+ long_prefill_token_threshold = 20 ,
46+ enforce_eager = True ,
47+ trust_remote_code = True )
48+ output1 = vllm_model .generate (prompts , sampling_params )
4949 del vllm_model
5050 torch .npu .empty_cache ()
5151
5252 vllm_model = LLM (model ,
5353 enforce_eager = True ,
54+ trust_remote_code = True ,
5455 additional_config = {
5556 'ascend_scheduler_config' : {
5657 'enabled' : True
5758 },
5859 })
59- output = vllm_model .generate (prompts , sampling_params )
60- logprobs = output .outputs [0 ].logprobs
60+ output2 = vllm_model .generate (prompts , sampling_params )
6161 del vllm_model
6262 torch .npu .empty_cache ()
6363
64- logprobs_similarity = torch .cosine_similarity (logprobs_chunked .flatten (),
65- logprobs .flatten (),
66- dim = 0 )
67- assert logprobs_similarity > 0.95
64+ # Extract the generated token IDs for comparison
65+ token_ids1 = output1 [0 ].outputs [0 ].token_ids
66+ token_ids2 = output2 [0 ].outputs [0 ].token_ids
67+
68+ print (f"Token IDs 1: { token_ids1 } " )
69+ print (f"Token IDs 2: { token_ids2 } " )
70+
71+ # Convert token IDs to tensors and calculate cosine similarity
72+ # Take the length of a shorter sequence to ensure consistent dimensions
73+ min_len = min (len (token_ids1 ), len (token_ids2 ))
74+
75+ tensor1 = torch .tensor (token_ids1 [:min_len ], dtype = torch .float32 )
76+ tensor2 = torch .tensor (token_ids2 [:min_len ], dtype = torch .float32 )
77+
78+ # Calculate similarity using torch.csine_similarity
79+ similarity = torch .cosine_similarity (tensor1 , tensor2 , dim = 0 )
80+ print (f"Token IDs cosine similarity: { similarity .item ()} " )
81+
82+ assert similarity > 0.95
0 commit comments