diff --git a/tests/models/test_phimoe.py b/tests/models/test_phimoe.py index 44f811a10611a..2fb2eecc94672 100644 --- a/tests/models/test_phimoe.py +++ b/tests/models/test_phimoe.py @@ -3,6 +3,9 @@ Run `pytest tests/models/test_phimoe.py`. """ import pytest +import torch + +from vllm.utils import is_cpu from .utils import check_logprobs_close @@ -12,8 +15,6 @@ def test_phimoe_routing_function(): - import torch - from vllm.model_executor.models.phimoe import phimoe_routing_function test_case = { 0: { @@ -68,6 +69,20 @@ def test_phimoe_routing_function(): assert torch.equal(topk_ids, ground_truth[test_id]["topk_ids"]) +def get_gpu_memory(): + try: + props = torch.cuda.get_device_properties(torch.cuda.current_device()) + gpu_memory = props.total_memory / (1024**3) + return gpu_memory + except Exception: + return 0 + + +@pytest.mark.skipif(condition=is_cpu(), + reason="This test takes a lot time to run on CPU, " + "and vllm CI's disk space is not enough for this model.") +@pytest.mark.skipif(condition=get_gpu_memory() < 100, + reason="Skip this test if GPU memory is insufficient.") @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) @pytest.mark.parametrize("max_tokens", [64])