From 3a15454753156e7042d74d4eccb003bfdb10838e Mon Sep 17 00:00:00 2001 From: skc7 Date: Fri, 5 Dec 2025 17:19:52 +0530 Subject: [PATCH] [CIR][AMDGPU] Add lowering for amdgcn div fmas builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 11 +++++++++- .../test/CIR/CodeGen/HIP/builtins-amdgcn.hip | 20 +++++++++++++++++ .../CIR/CodeGen/OpenCL/builtins_amdgcn.cl | 22 +++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index e0661bef79ac..27a151c62901 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -116,7 +116,16 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, } case AMDGPU::BI__builtin_amdgcn_div_fmas: case AMDGPU::BI__builtin_amdgcn_div_fmasf: { - llvm_unreachable("div_fmas_* NYI"); + mlir::Value src0 = emitScalarExpr(expr->getArg(0)); + mlir::Value src1 = emitScalarExpr(expr->getArg(1)); + mlir::Value src2 = emitScalarExpr(expr->getArg(2)); + mlir::Value src3 = emitScalarExpr(expr->getArg(3)); + mlir::Value result = + LLVMIntrinsicCallOp::create(builder, getLoc(expr->getExprLoc()), + builder.getStringAttr("amdgcn.div.fmas"), + src0.getType(), {src0, src1, src2, src3}) + .getResult(); + return result; } case AMDGPU::BI__builtin_amdgcn_ds_swizzle: case AMDGPU::BI__builtin_amdgcn_mov_dpp8: diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip index 21ae5cd5e39e..c47989e4349d 100644 --- a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip @@ -255,3 +255,23 @@ __device__ void test_div_scale_f32_with_ptr(float* out, int* flagout, bool* flag { *out = __builtin_amdgcn_div_scalef(a, b, true, flag); } + +// CIR-LABEL: @_Z17test_div_fmas_f32Pdfffi +// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.float, !cir.float, !cir.float, !cir.bool) -> !cir.float +// LLVM: define{{.*}} void @_Z17test_div_fmas_f32Pdfffi +// LLVM: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}}) +// OGCG: define{{.*}} void @_Z17test_div_fmas_f32Pdfffi +// OGCG: call {{.*}} float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}}) +__device__ void test_div_fmas_f32(double* out, float a, float b, float c, int d) { + *out = __builtin_amdgcn_div_fmasf(a, b, c, d); +} + +// CIR-LABEL: @_Z17test_div_fmas_f64Pddddi +// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.double, !cir.double, !cir.double, !cir.bool) -> !cir.double +// LLVM: define{{.*}} void @_Z17test_div_fmas_f64Pddddi +// LLVM: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}}) +// OGCG: define{{.*}} void @_Z17test_div_fmas_f64Pddddi +// OGCG: call {{.*}} double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}}) +__device__ void test_div_fmas_f64(double* out, double a, double b, double c, int d) { + *out = __builtin_amdgcn_div_fmas(a, b, c, d); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl index 1e38fcfeba93..812a5fd42b28 100644 --- a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl +++ b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl @@ -266,3 +266,25 @@ void test_div_scale_f32_generic_ptr(global float* out, global int* flagout, floa { *out = __builtin_amdgcn_div_scalef(a, b, true, flag); } + +// CIR-LABEL: @test_div_fmas_f32 +// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.float, !cir.float, !cir.float, !cir.bool) -> !cir.float +// LLVM: define{{.*}} void @test_div_fmas_f32 +// LLVM: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}}) +// OGCG: define{{.*}} void @test_div_fmas_f32 +// OGCG: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}}) +void test_div_fmas_f32(global float* out, float a, float b, float c, int d) +{ + *out = __builtin_amdgcn_div_fmasf(a, b, c, d); +} + +// CIR-LABEL: @test_div_fmas_f64 +// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.double, !cir.double, !cir.double, !cir.bool) -> !cir.double +// LLVM: define{{.*}} void @test_div_fmas_f64 +// LLVM: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}}) +// OGCG: define{{.*}} void @test_div_fmas_f64 +// OGCG: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}}) +void test_div_fmas_f64(global double* out, double a, double b, double c, int d) +{ + *out = __builtin_amdgcn_div_fmas(a, b, c, d); +}