Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions .buildkite/pipeline_jax.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ steps:
exit 0
fi

- label: "lora tests for JAX + vLLM models single chip"
- label: "lora e2e tests for JAX + vLLM models single chip"
key: test_10
soft_fail: true
agents:
Expand All @@ -160,8 +160,7 @@ steps:
- |
if [[ "$$NIGHTLY" == "1" ]]; then
.buildkite/scripts/run_in_docker.sh \
bash -c 'MODEL_IMPL_TYPE=vllm TPU_BACKEND_TYPE=jax python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_lora.py && \
python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_bgmv.py'
bash -c 'MODEL_IMPL_TYPE=vllm TPU_BACKEND_TYPE=jax python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_lora.py'
else
echo "Skipping: NIGHTLY environment variable not set"
exit 0
Expand Down Expand Up @@ -203,7 +202,7 @@ steps:
exit 0
fi

- label: "lora tests for JAX + vLLM models multi chips"
- label: "lora e2e tests for JAX + vLLM models multi chips"
key: test_13
soft_fail: true
env:
Expand Down Expand Up @@ -233,6 +232,29 @@ steps:
.buildkite/scripts/run_in_docker.sh \
bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_data_parallel.py'

- label: "lora unit tests on single chip"
key: test_15
soft_fail: true
agents:
queue: tpu_v6e_queue
commands:
- |
.buildkite/scripts/run_in_docker.sh \
bash -c ' python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_bgmv.py && \
python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_layers.py'

- label: "lora unit tests on multi chips"
key: test_16
soft_fail: true
env:
USE_V6E8_QUEUE: "True"
VLLM_LOG_LEVEL: "INFO"
agents:
queue: tpu_v6e_8_queue
commands:
- |
.buildkite/scripts/run_in_docker.sh \
bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_layers.py'
# -----------------------------------------------------------------
# NOTIFICATION STEP
# -----------------------------------------------------------------
Expand All @@ -253,9 +275,11 @@ steps:
- test_12
- test_13
- test_14
- test_15
- test_16
agents:
queue: cpu
commands:
- |
.buildkite/scripts/check_results.sh \
"TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13
"TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13 test_14 test_15 test_16
32 changes: 32 additions & 0 deletions tests/lora/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import tempfile

import pytest
from vllm.config import set_current_vllm_config
from vllm.distributed import cleanup_dist_env_and_memory
from vllm.distributed.parallel_state import (ensure_model_parallel_initialized,
init_distributed_environment)
from vllm.engine.arg_utils import EngineArgs


@pytest.fixture
def dist_init():
engine_args = EngineArgs(
model="Qwen/Qwen2-1.5B-Instruct",
max_model_len=64,
max_num_batched_tokens=64,
max_num_seqs=4,
)

vllm_config = engine_args.create_engine_config()

with set_current_vllm_config(vllm_config):
temp_file = tempfile.mkstemp()[1]
init_distributed_environment(
1,
0,
local_rank=0,
distributed_init_method=f"file://{temp_file}",
backend="gloo")
ensure_model_parallel_initialized(1, 1)
yield vllm_config
cleanup_dist_env_and_memory(shutdown_ray=True)
Loading