1+ #
2+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+ # This file is a part of the vllm-ascend project.
4+ # Adapted from vllm-project/blob/main/tests/entrypoints/llm/test_accuracy.py
5+ # Copyright 2023 The vLLM team.
6+ #
7+ # Licensed under the Apache License, Version 2.0 (the "License");
8+ # you may not use this file except in compliance with the License.
9+ # You may obtain a copy of the License at
10+ #
11+ # http://www.apache.org/licenses/LICENSE-2.0
12+ #
13+ # Unless required by applicable law or agreed to in writing, software
14+ # distributed under the License is distributed on an "AS IS" BASIS,
15+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+ # See the License for the specific language governing permissions and
17+ # limitations under the License.
18+ #
19+ import os
20+
21+ import lm_eval
22+ import pytest
23+
24+ MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
25+ TASK = "gsm8k"
26+ FILTER = "exact_match,strict-match"
27+ RTOL = 0.03
28+ EXPECTED_VALUE = 0.58
29+
30+
31+ def run_test (more_args = None ):
32+ """Run the end to end accuracy test."""
33+
34+ model_args = f"pretrained={ MODEL_NAME } ,max_model_len=4096"
35+
36+ if more_args is not None :
37+ model_args = "{},{}" .format (model_args , more_args )
38+
39+ results = lm_eval .simple_evaluate (
40+ model = "vllm" ,
41+ model_args = model_args ,
42+ tasks = "gsm8k" ,
43+ batch_size = "auto" ,
44+ )
45+
46+ measured_value = results ["results" ][TASK ][FILTER ]
47+ print ("accuracy_measured_value:" , measured_value )
48+
49+ assert (measured_value - RTOL < EXPECTED_VALUE
50+ and measured_value + RTOL > EXPECTED_VALUE
51+ ), f"Expected: { EXPECTED_VALUE } | Measured: { measured_value } "
52+
53+
54+ @pytest .mark .skipif (
55+ os .getenv ('VLLM_USE_V1' ) == '1' ,
56+ reason = "V1 engine is fully supported in 0.8.X release, skipping this test."
57+ )
58+ def test_lm_eval_accuracy (monkeypatch : pytest .MonkeyPatch ):
59+ """Run with the V0 Engine."""
60+
61+ with monkeypatch .context ():
62+ run_test ()
0 commit comments