We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 081cb98 commit 02d1f85Copy full SHA for 02d1f85
vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -479,8 +479,8 @@ def forward(
479
)
480
481
if self.kv_cache_dtype.startswith("fp8"):
482
- key_cache = key_cache.view(torch.float8_e4m3fnuz)
483
- value_cache = value_cache.view(torch.float8_e4m3fnuz)
+ key_cache = key_cache.view(current_platform.fp8_dtype())
+ value_cache = value_cache.view(current_platform.fp8_dtype())
484
485
if not attn_metadata.use_cascade:
486
cu_seqlens_q = attn_metadata.query_start_loc
0 commit comments