Skip to content
14 changes: 13 additions & 1 deletion vllm/multimodal/hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,19 @@ def serialize_item(cls, obj: object) -> Union[bytes, memoryview]:
return cls.item_to_bytes(
"image", np.asarray(convert_image_mode(obj, "RGBA")))
if isinstance(obj, torch.Tensor):
return cls.item_to_bytes("tensor", obj.cpu().numpy())
tensor_obj: torch.Tensor = obj.cpu()
tensor_dtype = tensor_obj.dtype
if tensor_dtype == torch.bfloat16:
tensor_obj = tensor_obj.contiguous()
tensor_obj = tensor_obj.view(
(tensor_obj.numel(), )).view(torch.uint8)
return cls.item_to_bytes(
"tensor", {
"original_dtype": str(tensor_dtype),
"original_shape": tuple(tensor_obj.shape),
Comment on lines +54 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for being late to the party. This is still the 1D shape of the uint8 tensor since tensor_obj is overwritten above. This should be obj.shape instead of tensor_obj.shape.

Could you extend the tests to verify the bfloat16 code path as well:

def test_hash_collision_array_shape():
# The hash should be different though the data is the same when flattened
arr1 = np.zeros((5, 10, 20, 3))
arr2 = np.zeros((10, 20, 5, 3))
hasher = MultiModalHasher
assert hasher.hash_kwargs(data=arr1) != hasher.hash_kwargs(data=arr2)

Otherwise it would cause another CVE similar to #17378

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, let me open another PR to fix

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"data": tensor_obj.numpy()
})
return cls.item_to_bytes("tensor", tensor_obj.numpy())
if isinstance(obj, np.ndarray):
# If the array is non-contiguous, we need to copy it first
arr_data = obj.data if obj.flags.c_contiguous else obj.tobytes()
Expand Down