vllm-project
diff --git a/‎.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/scripts/run-prime-rl-test.sh‎
Lines changed: 59 additions & 0 deletions b/‎.buildkite/scripts/run-prime-rl-test.sh‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎.buildkite/test-pipeline.yaml‎
Lines changed: 30 additions & 2 deletions b/‎.buildkite/test-pipeline.yaml‎
Lines changed: 30 additions & 2 deletions
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/750-RFC.yml‎
Lines changed: 0 additions & 4 deletions b/‎.github/ISSUE_TEMPLATE/750-RFC.yml‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 23 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 23 deletions
diff --git a/‎.readthedocs.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.readthedocs.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmarks/auto_tune/auto_tune.sh‎
Lines changed: 6 additions & 1 deletion b/‎benchmarks/auto_tune/auto_tune.sh‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎benchmarks/benchmark_serving_structured_output.py‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/benchmark_serving_structured_output.py‎
Lines changed: 2 additions & 1 deletion
@@ -62,7 +62,7 @@ echo "--- Installing Python dependencies ---"
 python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
     && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
     && python3 -m pip install --progress-bar off "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d" \
-    && python3 -m pip install --progress-bar off hf-transfer
+    && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
 echo "--- Python dependencies installed ---"
 export VLLM_USE_V1=1
 export VLLM_XLA_CHECK_RECOMPILATION=1
 
@@ -62,7 +62,7 @@ echo "--- Installing Python dependencies ---"
 python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
     && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
     && python3 -m pip install --progress-bar off "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d" \
-    && python3 -m pip install --progress-bar off hf-transfer
+    && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
 echo "--- Python dependencies installed ---"
 export VLLM_USE_V1=1
 export VLLM_XLA_CHECK_RECOMPILATION=1
 
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Setup script for Prime-RL integration tests
+# This script prepares the environment for running Prime-RL tests with nightly vLLM
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
+PRIME_RL_REPO="https://github.com/PrimeIntellect-ai/prime-rl.git"
+PRIME_RL_DIR="${REPO_ROOT}/prime-rl"
+
+echo "Setting up Prime-RL integration test environment..."
+
+# Clean up any existing Prime-RL directory
+if [ -d "${PRIME_RL_DIR}" ]; then
+    echo "Removing existing Prime-RL directory..."
+    rm -rf "${PRIME_RL_DIR}"
+fi
+
+# Install UV if not available
+if ! command -v uv &> /dev/null; then
+    echo "Installing UV package manager..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    source $HOME/.local/bin/env
+fi
+
+# Clone Prime-RL repository at specific branch for reproducible tests
+PRIME_RL_BRANCH="integ-vllm-main"
+echo "Cloning Prime-RL repository at branch: ${PRIME_RL_BRANCH}..."
+git clone --branch "${PRIME_RL_BRANCH}" --single-branch "${PRIME_RL_REPO}" "${PRIME_RL_DIR}"
+cd "${PRIME_RL_DIR}"
+
+echo "Setting up UV project environment..."
+export UV_PROJECT_ENVIRONMENT=/usr/local
+ln -s /usr/bin/python3 /usr/local/bin/python
+
+# Remove vllm pin from pyproject.toml
+echo "Removing vllm pin from pyproject.toml..."
+sed -i '/vllm==/d' pyproject.toml
+
+# Sync Prime-RL dependencies
+echo "Installing Prime-RL dependencies..."
+uv sync --inexact && uv sync --inexact --all-extras
+
+# Verify installation
+echo "Verifying installations..."
+uv run python -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
+uv run python -c "import prime_rl; print('Prime-RL imported successfully')"
+
+echo "Prime-RL integration test environment setup complete!"
+
+echo "Running Prime-RL integration tests..."
+export WANDB_MODE=offline # this makes this test not require a WANDB_API_KEY
+uv run pytest -vs tests/integration/test_rl.py -m gpu
+
+echo "Prime-RL integration tests completed!"
@@ -164,11 +164,20 @@ steps:
   - tests/v1/test_internal_lb_dp.py
   - tests/v1/test_hybrid_lb_dp.py
   - tests/v1/engine/test_engine_core_client.py
+  - tests/distributed/test_symm_mem_allreduce.py
   commands:
-  # test with tp=2 and external_dp=2
+  # test with torchrun tp=2 and external_dp=2
   - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
-  # test with tp=2 and pp=2
+  # test with torchrun tp=2 and pp=2
   - PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
+  # test with torchrun tp=4 and dp=1
+  - TP_SIZE=4 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  # test with torchrun tp=2, pp=2 and dp=1
+  - PP_SIZE=2 TP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  # test with torchrun tp=1 and dp=4 with ep
+  - DP_SIZE=4 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  # test with torchrun tp=2 and dp=2 with ep
+  - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
   # test with internal dp
   - python3 ../examples/offline_inference/data_parallel.py --enforce-eager
   - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
@@ -180,6 +189,7 @@ steps:
   - pytest -v -s compile/test_basic_correctness.py
   - pytest -v -s distributed/test_pynccl.py
   - pytest -v -s distributed/test_events.py
+  - pytest -v -s distributed/test_symm_mem_allreduce.py
   # TODO: create a dedicated test section for multi-GPU example tests
   # when we have multiple distributed example tests
   - pushd ../examples/offline_inference
@@ -321,6 +331,8 @@ steps:
     - python3 offline_inference/basic/classify.py
     - python3 offline_inference/basic/embed.py
     - python3 offline_inference/basic/score.py
+    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
 
 - label: Platform Tests (CUDA) # 4min
   timeout_in_minutes: 15
@@ -875,6 +887,8 @@ steps:
   - tests/v1/test_external_lb_dp.py
   - tests/v1/entrypoints/openai/test_multi_api_servers.py
   - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/v1/worker/test_worker_memory_snapshot.py
   commands:
   - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
   - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_external_lb_dp.py
@@ -896,6 +910,7 @@ steps:
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
   - pytest -v -s models/multimodal/generation/test_maverick.py
+  - pytest -v -s v1/worker/test_worker_memory_snapshot.py
 
 - label: Plugin Tests (2 GPUs) # 40min
   timeout_in_minutes: 60
@@ -1029,3 +1044,16 @@ steps:
   num_gpus: 2
   commands:
     - pytest -v -s tests/distributed/test_context_parallel.py
+    - pytest -v -s tests/distributed/test_nccl_symm_mem_allreduce.py
+
+##### RL Integration Tests #####
+- label: Prime-RL Integration Test # 15min
+  timeout_in_minutes: 30
+  optional: true
+  num_gpus: 2
+  working_dir: "/vllm-workspace"
+  source_file_dependencies:
+  - vllm/
+  - .buildkite/scripts/run-prime-rl-test.sh
+  commands:
+    - bash .buildkite/scripts/run-prime-rl-test.sh
@@ -72,6 +72,7 @@ mkdocs.yaml @hmellor
 # Linting
 .markdownlint.yaml @hmellor
 .pre-commit-config.yaml @hmellor
+/tools/pre_commit @hmellor
 
 # CPU
 /vllm/v1/worker/cpu* @bigPYJ1151
 
@@ -43,10 +43,6 @@ body:
       Any other things you would like to mention.
   validations:
     required: false
-- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉! The vLLM core team hosts a biweekly RFC review session at 9:30AM Pacific Time, while most RFCs can be discussed online, you can optionally sign up for a slot to discuss your RFC online [here](https://docs.google.com/document/d/1CiLVBZeIVfR7_PNAKVSusxpceywkoOOB78qoWqHvSZc/edit).
 - type: checkboxes
   id: askllm
   attributes:
 
@@ -60,38 +60,32 @@ repos:
     files: ^requirements/test\.(in|txt)$
   - id: mypy-local
     name: Run mypy for local Python installation
-    entry: tools/mypy.sh 0 "local"
-    language: python
-    types: [python]
-    additional_dependencies: &mypy_deps [mypy==1.11.1, types-cachetools, types-setuptools, types-PyYAML, types-requests, pydantic]
+    entry: python tools/pre_commit/mypy.py 0 "local"
     stages: [pre-commit] # Don't run in CI
+    <<: &mypy_common
+      language: python
+      types_or: [python, pyi]
+      require_serial: true
+      additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
   - id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.9
-    entry: tools/mypy.sh 1 "3.9"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.9"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.10
-    entry: tools/mypy.sh 1 "3.10"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.10"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.11
-    entry: tools/mypy.sh 1 "3.11"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.11"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.12
-    entry: tools/mypy.sh 1 "3.12"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.12"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: shellcheck
     name: Lint shell scripts
@@ -155,11 +149,10 @@ repos:
     additional_dependencies: [regex]
   - id: check-pickle-imports
     name: Prevent new pickle/cloudpickle imports
-    entry: python tools/check_pickle_imports.py
+    entry: python tools/pre_commit/check_pickle_imports.py
     language: python
     types: [python]
-    pass_filenames: false
-    additional_dependencies: [pathspec, regex]
+    additional_dependencies: [regex]
   - id: validate-config
     name: Validate configuration has default values and that each field has a docstring
     entry: python tools/validate_config.py
 
@@ -13,6 +13,7 @@ build:
 
 mkdocs:
   configuration: mkdocs.yaml
+  fail_on_warning: true
 
 # Optionally declare the Python requirements required to build your docs
 python:
 
@@ -103,10 +103,15 @@ start_server() {
         VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 \
             vllm serve "${common_args_array[@]}" > "$vllm_log" 2>&1 &
     fi
+    local server_pid=$!
 
     # wait for 10 minutes...
     server_started=0
     for i in {1..60}; do
+        # This line checks whether the server is still alive or not,
+        # since that we should always have permission to send signal to the server process.
+        kill -0 $server_pid 2> /dev/null || break
+
         RESPONSE=$(curl -s -X GET "http://0.0.0.0:8004/health" -w "%{http_code}" -o /dev/stdout)
         STATUS_CODE=$(echo "$RESPONSE" | tail -n 1)
         if [[ "$STATUS_CODE" -eq 200 ]]; then
@@ -118,7 +123,7 @@ start_server() {
     done
 
     if (( ! server_started )); then
-        echo "server did not start within 10 minutes. Please check server log at $vllm_log".
+        echo "server did not start within 10 minutes or crashed. Please check server log at $vllm_log".
         return 1
     else
         return 0
 
@@ -449,7 +449,8 @@ async def benchmark(
     def prepare_extra_body(request) -> dict:
         extra_body = {}
         # Add the schema to the extra_body
-        extra_body[request.structure_type] = request.schema
+        extra_body["structured_outputs"] = {}
+        extra_body["structured_outputs"][request.structure_type] = request.schema
         return extra_body
 
     print("Starting initial single prompt test run...")