We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bbaf8e9 commit da3a941Copy full SHA for da3a941
vllm/model_executor/models/qwen2_5_vl.py
@@ -428,6 +428,14 @@ def forward(
428
)
429
elif self.attn_backend == _Backend.TORCH_SDPA:
430
# Execute attention entry by entry for speed & less VRAM.
431
+ from vllm.platforms import current_platform
432
+
433
+ # Never remove the next contiguous logic
434
+ # Without it, hallucinations occur with the backend
435
+ if current_platform.is_rocm():
436
+ q = q.contiguous()
437
+ k = k.contiguous()
438
+ v = v.contiguous()
439
outputs = []
440
for i in range(1, len(cu_seqlens)):
441
start_idx = cu_seqlens[i - 1]
0 commit comments