We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 33d5e29 commit a738dbbCopy full SHA for a738dbb
.buildkite/scripts/tpu/config_v6e_1.env
@@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
4
5
# vllm config
6
MODEL=meta-llama/Llama-3.1-8B-Instruct
7
-MAX_NUM_SEQS=512
8
-MAX_NUM_BATCHED_TOKENS=512
+MAX_NUM_SEQS=256
+MAX_NUM_BATCHED_TOKENS=1024
9
TENSOR_PARALLEL_SIZE=1
10
MAX_MODEL_LEN=2048
11
DOWNLOAD_DIR=/mnt/disks/persist
0 commit comments