vllm-project · vanbasten23 · Nov 8, 2025 · Oct 27, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/.buildkite/pipeline_jax.yml b/.buildkite/pipeline_jax.yml
@@ -151,7 +151,7 @@ steps:
            exit 0
          fi
 
-   - label: "lora tests for JAX + vLLM models single chip"
+   - label: "lora e2e tests for JAX + vLLM models single chip"
      key: test_10
      soft_fail: true
      agents:
@@ -160,8 +160,7 @@ steps:
        - |
          if [[ "$$NIGHTLY" == "1" ]]; then
            .buildkite/scripts/run_in_docker.sh \
-             bash -c 'MODEL_IMPL_TYPE=vllm TPU_BACKEND_TYPE=jax python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_lora.py && \
-             python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_bgmv.py'
+             bash -c 'MODEL_IMPL_TYPE=vllm TPU_BACKEND_TYPE=jax python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_lora.py'
          else
            echo "Skipping: NIGHTLY environment variable not set"
            exit 0
@@ -203,7 +202,7 @@ steps:
            exit 0
          fi
 
-   - label: "lora tests for JAX + vLLM models multi chips"
+   - label: "lora e2e tests for JAX + vLLM models multi chips"
      key: test_13
      soft_fail: true
      env:
@@ -233,6 +232,29 @@ steps:
          .buildkite/scripts/run_in_docker.sh \
            bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_data_parallel.py'
 
+   - label: "lora unit tests on single chip"
+     key: test_15
+     soft_fail: true
+     agents:
+       queue: tpu_v6e_queue
+     commands:
+       - |
+         .buildkite/scripts/run_in_docker.sh \
+           bash -c ' python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_bgmv.py && \
+           python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_layers.py'
+
+   - label: "lora unit tests on multi chips"
+     key: test_16
+     soft_fail: true
+     env:
+       USE_V6E8_QUEUE: "True"
+       VLLM_LOG_LEVEL: "INFO"
+     agents:
+       queue: tpu_v6e_8_queue
+     commands:
+       - |
+         .buildkite/scripts/run_in_docker.sh \
+           bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_layers.py'
   # -----------------------------------------------------------------
   # NOTIFICATION STEP
   # -----------------------------------------------------------------
@@ -253,9 +275,11 @@ steps:
        - test_12
        - test_13
        - test_14
+       - test_15
+       - test_16
      agents:
        queue: cpu
      commands:
        - |
          .buildkite/scripts/check_results.sh \
-           "TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13
+           "TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13 test_14 test_15 test_16
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
@@ -0,0 +1,32 @@
+import tempfile
+
+import pytest
+from vllm.config import set_current_vllm_config
+from vllm.distributed import cleanup_dist_env_and_memory
+from vllm.distributed.parallel_state import (ensure_model_parallel_initialized,
+                                             init_distributed_environment)
+from vllm.engine.arg_utils import EngineArgs
+
+
+@pytest.fixture
+def dist_init():
+    engine_args = EngineArgs(
+        model="Qwen/Qwen2-1.5B-Instruct",
+        max_model_len=64,
+        max_num_batched_tokens=64,
+        max_num_seqs=4,
+    )
+
+    vllm_config = engine_args.create_engine_config()
+
+    with set_current_vllm_config(vllm_config):
+        temp_file = tempfile.mkstemp()[1]
+        init_distributed_environment(
+            1,
+            0,
+            local_rank=0,
+            distributed_init_method=f"file://{temp_file}",
+            backend="gloo")
+        ensure_model_parallel_initialized(1, 1)
+        yield vllm_config
+    cleanup_dist_env_and_memory(shutdown_ray=True)