Skip to content

Commit 1cddacf

Browse files
committed
forward fix PR 14245
PR vllm-project#14245 broke compilation for ROCm 6.2. Signed-off-by: Jeff Daily <jeff.daily@amd.com>
1 parent ce20124 commit 1cddacf

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

csrc/quantization/fp8/amd/quant_utils.cuh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,22 @@ __device__ __forceinline__ fp8_type cvt_c10(float const r) {
1919
return {};
2020
}
2121

22+
// __hip_fp8_e4m3 only exists starting in ROCm 6.3. The macro
23+
// HIP_FP8_TYPE_OCP comes from the hip_fp8.h header and also makes
24+
// its first appearance in ROCm 6.3. Since VLLM_DISPATCH_FP8_TYPES
25+
// on ROCm instantiates both OCP and FNUZ kernels, we need to replace
26+
// the new HW cvt with something reasonable that doesn't rely on the
27+
// ROCm 6.3 feature. This allows compiling on ROCm 6.2 or newer.
2228
template <>
2329
__device__ __forceinline__ c10::Float8_e4m3fn cvt_c10(float const r) {
30+
#if HIP_FP8_TYPE_OCP
2431
return c10::Float8_e4m3fn(
2532
__hip_cvt_float_to_fp8(r, __hip_fp8_e4m3::__default_saturation,
2633
__hip_fp8_e4m3::__default_interpret),
2734
c10::Float8_e4m3fn::from_bits());
35+
#else
36+
return static_cast<c10::Float8_e4m3fn>(r);
37+
#endif
2838
}
2939

3040
template <>

0 commit comments

Comments
 (0)