Skip to content

Commit 07b1349

Browse files
authored
Merge branch 'main' into truncate-prompt-tokens-default-1
2 parents 744db15 + b545a0b commit 07b1349

File tree

2,167 files changed

+220662
-179761
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,167 files changed

+220662
-179761
lines changed

.buildkite/check-wheel-size.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import sys
66
import zipfile
77

8-
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
9-
# Note that we have 400 MiB quota, please use it wisely.
10-
# See https://github.com/pypi/support/issues/3792 .
8+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 450 MiB
9+
# Note that we have 800 MiB quota, please use it wisely.
10+
# See https://github.com/pypi/support/issues/6326 .
1111
# Please also sync the value with the one in Dockerfile.
12-
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 450))
1313

1414

1515
def print_top_10_largest_files(zip_file):

.buildkite/nightly-benchmarks/nightly-descriptions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ This benchmark aims to:
88

99
Latest results: [results link](https://blog.vllm.ai/2024/09/05/perf-update.html), scroll to the end.
1010

11-
Latest reproduction guilde: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
11+
Latest reproduction guide: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
1212

1313
## Setup
1414

.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ def parse_client_command(cmd: str) -> dict[str, Any]:
368368
# The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
369369
# we want to turn it into "8xGPUTYPE"
370370
df["GPU"] = df["GPU"].apply(
371-
lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"
371+
lambda x: f"{len(x.splitlines())}x{x.splitlines()[0]}"
372372
)
373373

374374
# get markdown tables

.buildkite/nightly-benchmarks/scripts/launch-server.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,18 +181,14 @@ launch_vllm_server() {
181181
if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
182182
echo "Key 'fp8' exists in common params. Use neuralmagic fp8 model for convenience."
183183
model=$(echo "$common_params" | jq -r '.neuralmagic_quantized_model')
184-
server_command="python3 \
185-
-m vllm.entrypoints.openai.api_server \
184+
server_command="vllm serve $model \
186185
-tp $tp \
187-
--model $model \
188186
--port $port \
189187
$server_args"
190188
else
191189
echo "Key 'fp8' does not exist in common params."
192-
server_command="python3 \
193-
-m vllm.entrypoints.openai.api_server \
190+
server_command="vllm serve $model \
194191
-tp $tp \
195-
--model $model \
196192
--port $port \
197193
$server_args"
198194
fi

.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,7 @@ run_serving_tests() {
365365
continue
366366
fi
367367

368-
server_command="$server_envs python3 \
369-
-m vllm.entrypoints.openai.api_server \
368+
server_command="$server_envs vllm serve \
370369
$server_args"
371370

372371
# run the server
@@ -455,11 +454,6 @@ main() {
455454
fi
456455
check_hf_token
457456

458-
# Set to v1 to run v1 benchmark
459-
if [[ "${ENGINE_VERSION:-v0}" == "v1" ]]; then
460-
export VLLM_USE_V1=1
461-
fi
462-
463457
# dependencies
464458
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
465459
(which jq) || (apt-get update && apt-get -y install jq)

0 commit comments

Comments
 (0)