From 9ea7506a6358f57b7d1bac2a6837561f42c56a6d Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 28 Jun 2024 18:55:17 -0700 Subject: [PATCH] [Bugfix][TPU] Fix pad slot id (#5977) --- vllm/worker/tpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py index 7827f7c7447a1..dd08536efc5fb 100644 --- a/vllm/worker/tpu_model_runner.py +++ b/vllm/worker/tpu_model_runner.py @@ -19,7 +19,7 @@ logger = init_logger(__name__) -_PAD_SLOT_ID = 0 # FIXME(woosuk) +_PAD_SLOT_ID = -1 # NOTE(woosuk): In PyTorch XLA, index -1 is ignored. # FIXME(woosuk): Temporarily disabled top-p sampling since it's too slow. _ENABLE_TOP_P = False # FIXME(woosuk): A temporary hack to support `n > 1`.