File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -55,14 +55,15 @@ class FuyuImagePatchInputs(TensorSchema):
5555 """
5656 Dimensions:
5757 - bn: Batch size * number of images
58- - fn: Num channels * patch_size_x * patch_size_y
58+ - bnp: Batch size * number of images * number of patches
59+ - fn: patch_size_x * patch_size_y * num_channels
5960 """
6061
6162 type : Literal ["image_patches" ] = "image_patches"
6263
6364 flat_data : Annotated [
6465 torch .Tensor ,
65- TensorShape ("bn " , "fn" ),
66+ TensorShape ("bnp " , "fn" ),
6667 ]
6768
6869 patches_per_image : Annotated [list [int ], TensorShape ("bn" )]
@@ -309,8 +310,8 @@ def _parse_and_validate_image_input(
309310 image_patches = kwargs .pop ("image_patches" , None )
310311 if image_patches is not None :
311312 image_patches_flat = flatten_bn (image_patches )
312- flat_data = flatten_bn (image_patches , concat = True ). data . to (
313- self . vision_embed_tokens . weight . dtype )
313+ flat_data = flatten_bn (image_patches_flat , concat = True )
314+
314315 return FuyuImagePatchInputs (
315316 type = "image_patches" ,
316317 flat_data = flat_data ,
You can’t perform that action at this time.
0 commit comments