diff --git a/tests/models/test_phimoe.py b/tests/models/test_phimoe.py
index 44f811a10611a..2fb2eecc94672 100644
--- a/tests/models/test_phimoe.py
+++ b/tests/models/test_phimoe.py
@@ -3,6 +3,9 @@
 Run `pytest tests/models/test_phimoe.py`.
 """
 import pytest
+import torch
+
+from vllm.utils import is_cpu
 
 from .utils import check_logprobs_close
 
@@ -12,8 +15,6 @@
 
 
 def test_phimoe_routing_function():
-    import torch
-
     from vllm.model_executor.models.phimoe import phimoe_routing_function
     test_case = {
         0: {
@@ -68,6 +69,20 @@ def test_phimoe_routing_function():
         assert torch.equal(topk_ids, ground_truth[test_id]["topk_ids"])
 
 
+def get_gpu_memory():
+    try:
+        props = torch.cuda.get_device_properties(torch.cuda.current_device())
+        gpu_memory = props.total_memory / (1024**3)
+        return gpu_memory
+    except Exception:
+        return 0
+
+
+@pytest.mark.skipif(condition=is_cpu(),
+                    reason="This test takes a lot time to run on CPU, "
+                    "and vllm CI's disk space is not enough for this model.")
+@pytest.mark.skipif(condition=get_gpu_memory() < 100,
+                    reason="Skip this test if GPU memory is insufficient.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("max_tokens", [64])