From ee260704aa4c3d8a96e4a2c9a24feda95d99d3e7 Mon Sep 17 00:00:00 2001 From: cyy Date: Wed, 13 Aug 2025 12:10:09 +0800 Subject: [PATCH 1/3] Fix CUDA sync of qwen image and video preprocess Signed-off-by: cyy --- vllm/model_executor/models/qwen2_5_vl.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 6bea180ffec9..05e9afcdbfb6 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -976,10 +976,13 @@ def _process_image_input( image_embeds = self.visual(pixel_values, grid_thw=grid_thw_list) # Split concatenated embeddings for each image item. + # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync merge_size = self.visual.spatial_merge_size - sizes = grid_thw.prod(-1) // merge_size // merge_size + sizes = ( + torch.prod(torch.tensor(grid_thw_list, dtype=torch.long), -1) // + merge_size // merge_size).tolist() - return image_embeds.split(sizes.tolist()) + return image_embeds.split(sizes) def _process_video_input( self, @@ -998,9 +1001,12 @@ def _process_video_input( # Split concatenated embeddings for each video item. merge_size = self.visual.spatial_merge_size - sizes = grid_thw.prod(-1) // merge_size // merge_size + # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync + sizes = ( + torch.prod(torch.tensor(grid_thw_list, dtype=torch.long), -1) // + merge_size // merge_size).tolist() - return video_embeds.split(sizes.tolist()) + return video_embeds.split(sizes) def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: mm_input_by_modality = {} From 24215cf405063f026f31a334524e94e36eaa4f62 Mon Sep 17 00:00:00 2001 From: Yuanyuan Chen Date: Wed, 13 Aug 2025 18:14:47 +0800 Subject: [PATCH 2/3] Update vllm/model_executor/models/qwen2_5_vl.py Co-authored-by: Cyrus Leung Signed-off-by: Yuanyuan Chen --- vllm/model_executor/models/qwen2_5_vl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 05e9afcdbfb6..15d1124ea0a7 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -979,7 +979,7 @@ def _process_image_input( # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync merge_size = self.visual.spatial_merge_size sizes = ( - torch.prod(torch.tensor(grid_thw_list, dtype=torch.long), -1) // + torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // merge_size // merge_size).tolist() return image_embeds.split(sizes) From 19d76852dafd48cd2892eace6c28009d182bef80 Mon Sep 17 00:00:00 2001 From: cyy Date: Wed, 13 Aug 2025 18:18:27 +0800 Subject: [PATCH 3/3] Apply sugguestion Signed-off-by: cyy --- vllm/model_executor/models/qwen2_5_vl.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 15d1124ea0a7..5bcbcc4f0e37 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -978,9 +978,8 @@ def _process_image_input( # Split concatenated embeddings for each image item. # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync merge_size = self.visual.spatial_merge_size - sizes = ( - torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // - merge_size // merge_size).tolist() + sizes = (torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // + (merge_size * merge_size)).tolist() return image_embeds.split(sizes) @@ -1002,9 +1001,8 @@ def _process_video_input( # Split concatenated embeddings for each video item. merge_size = self.visual.spatial_merge_size # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync - sizes = ( - torch.prod(torch.tensor(grid_thw_list, dtype=torch.long), -1) // - merge_size // merge_size).tolist() + sizes = (torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // + (merge_size * merge_size)).tolist() return video_embeds.split(sizes)