From 48f639a1da3c27b12b094ec01f58961dd24c8102 Mon Sep 17 00:00:00 2001 From: Maybewuss <38156589+Maybewuss@users.noreply.github.com> Date: Mon, 18 Nov 2024 18:05:36 +0800 Subject: [PATCH] [Model] Remove redundant softmax when using PoolingType.STEP (#10415) Signed-off-by: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com> --- vllm/model_executor/layers/pooler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/pooler.py b/vllm/model_executor/layers/pooler.py index 6fee57a0a03eb..bfe2d7d0f382e 100644 --- a/vllm/model_executor/layers/pooler.py +++ b/vllm/model_executor/layers/pooler.py @@ -118,14 +118,13 @@ def forward( if returned_token_ids is not None and len(returned_token_ids) > 0: hidden_states = hidden_states[:, returned_token_ids] - logits = hidden_states.softmax(dim=-1) step_tag_id = self.step_tag_id offset = 0 pooled_data_lst = [] for prompt_len, seq_data_i in zip( prompt_lens, pooling_metadata.seq_data.values()): - pooled_data_i = logits[offset:offset + prompt_len] + pooled_data_i = hidden_states[offset:offset + prompt_len] if step_tag_id is not None: token_ids = torch.tensor(seq_data_i.prompt_token_ids) pooled_data_i = pooled_data_i[token_ids == step_tag_id]