|
9 | 9 |
|
10 | 10 | from vllm.platforms import current_platform |
11 | 11 |
|
| 12 | +from ...registry import HF_EXAMPLE_MODELS |
12 | 13 | from ...utils import check_logprobs_close |
13 | 14 |
|
14 | 15 | # These have unsupported head_dim for FA. We do not |
|
33 | 34 |
|
34 | 35 | # @maybe_test_rocm_aiter |
35 | 36 | @pytest.mark.parametrize( |
36 | | - "model", |
| 37 | + "model_arch", |
37 | 38 | [ |
38 | 39 | pytest.param( |
39 | | - "bigscience/bloom-560m", # bloom - testing alibi slopes |
| 40 | + "BloomForCausalLM", # testing alibi slopes |
40 | 41 | marks=[pytest.mark.core_model, pytest.mark.cpu_model], |
41 | 42 | ), |
42 | 43 | pytest.param( |
43 | | - "openai-community/gpt2", # gpt2 |
| 44 | + "GPT2LMHeadModel", # gpt2 |
44 | 45 | marks=[pytest.mark.core_model, pytest.mark.cpu_model], |
45 | 46 | ), |
46 | | - pytest.param("Milos/slovak-gpt-j-405M"), # gptj |
47 | | - pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode |
48 | | - pytest.param("EleutherAI/pythia-70m"), # gpt_neox |
| 47 | + pytest.param("GPTJForCausalLM"), |
| 48 | + pytest.param("GPTBigCodeForCausalLM"), |
| 49 | + pytest.param("GPTNeoXForCausalLM"), |
49 | 50 | pytest.param( |
50 | | - "google/gemma-1.1-2b-it", # gemma |
| 51 | + "GemmaForCausalLM", # gemma |
51 | 52 | marks=[pytest.mark.core_model, pytest.mark.cpu_model], |
52 | 53 | ), |
| 54 | + pytest.param("GlmForCausalLM"), |
53 | 55 | pytest.param( |
54 | | - "THUDM/chatglm3-6b", # chatglm (text-only) |
55 | | - ), |
56 | | - pytest.param( |
57 | | - "meta-llama/Llama-3.2-1B-Instruct", # llama |
| 56 | + "LlamaForCausalLM", |
58 | 57 | marks=[pytest.mark.core_model, pytest.mark.cpu_model], |
59 | 58 | ), |
60 | 59 | pytest.param( |
61 | | - "openbmb/MiniCPM3-4B", |
| 60 | + "MiniCPM3ForCausalLM", |
62 | 61 | # fused_moe not supported on CPU |
63 | 62 | marks=[pytest.mark.core_model], |
64 | 63 | ), |
65 | 64 | pytest.param( |
66 | | - "facebook/opt-125m", # opt |
| 65 | + "OPTForCausalLM", |
67 | 66 | marks=[pytest.mark.core_model, pytest.mark.cpu_model], |
68 | 67 | ), |
69 | 68 | pytest.param( |
70 | | - "microsoft/phi-2", # phi |
| 69 | + "PhiForCausalLM", |
71 | 70 | marks=[pytest.mark.core_model], |
72 | 71 | ), |
| 72 | + pytest.param("QWenLMHeadModel", ), |
73 | 73 | pytest.param( |
74 | | - "Qwen/Qwen-7B", # qwen (text-only) |
75 | | - ), |
76 | | - pytest.param( |
77 | | - "Qwen/Qwen2.5-0.5B-Instruct", # qwen2 |
| 74 | + "Qwen2ForCausalLM", |
78 | 75 | marks=[pytest.mark.core_model], |
79 | 76 | ), |
80 | | - pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm |
81 | | - pytest.param("bigcode/starcoder2-3b"), # starcoder2 |
| 77 | + pytest.param("StableLmForCausalLM"), |
| 78 | + pytest.param("Starcoder2ForCausalLM"), |
82 | 79 | pytest.param( |
83 | | - "ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral |
| 80 | + "MixtralForCausalLM", |
84 | 81 | marks=[pytest.mark.cpu_model], |
85 | 82 | ) |
86 | 83 | ]) |
|
89 | 86 | @pytest.mark.parametrize("num_logprobs", [5]) |
90 | 87 | @pytest.mark.parametrize( |
91 | 88 | "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]) |
92 | | -def test_models(hf_runner, vllm_runner, example_prompts, model: str, |
| 89 | +def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str, |
93 | 90 | dtype: str, max_tokens: int, num_logprobs: int, |
94 | 91 | use_rocm_aiter: bool, monkeypatch) -> None: |
95 | 92 |
|
| 93 | + model = HF_EXAMPLE_MODELS.get_hf_info(model_arch).default |
| 94 | + |
96 | 95 | if model in REQUIRES_V0: |
97 | 96 | monkeypatch.setenv("VLLM_USE_V1", "0") |
98 | 97 |
|
|
0 commit comments