[Bugfix] Fix shape checking for Fuyu (vllm-project#21709)

DarkLight1337 · epwalsh · commit 76f9cbf9f8b1 · 2025-08-27T16:55:29.000-07:00
Signed-off-by: DarkLight1337 &lt;tlleungac@connect.ust.hk&gt;
diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py
@@ -55,14 +55,15 @@ class FuyuImagePatchInputs(TensorSchema):
     """
     Dimensions:
         - bn: Batch size * number of images
-        - fn: Num channels * patch_size_x * patch_size_y
+        - bnp: Batch size * number of images * number of patches
+        - fn: patch_size_x * patch_size_y * num_channels
     """
 
     type: Literal["image_patches"] = "image_patches"
 
     flat_data: Annotated[
         torch.Tensor,
-        TensorShape("bn", "fn"),
+        TensorShape("bnp", "fn"),
     ]
 
     patches_per_image: Annotated[list[int], TensorShape("bn")]
@@ -309,8 +310,8 @@ def _parse_and_validate_image_input(
         image_patches = kwargs.pop("image_patches", None)
         if image_patches is not None:
             image_patches_flat = flatten_bn(image_patches)
-            flat_data = flatten_bn(image_patches, concat=True).data.to(
-                self.vision_embed_tokens.weight.dtype)
+            flat_data = flatten_bn(image_patches_flat, concat=True)
+
             return FuyuImagePatchInputs(
                 type="image_patches",
                 flat_data=flat_data,