Skip to content

Commit 5823125

Browse files
authored
[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. (#15836)
Add support for options `-f[no]-offload-fp32-prec-div` and `-f[no-]-offload-fp32-prec-sqrt`. These options are added to allow users to control whether `fdiv` and `sqrt` operations in offload device code are required to return correctly rounded results. In order to communicate this to the device code, we need the front end to generate IR that reflects the choice. When the correctly rounded setting is used, we can just generate the `fdiv` instruction and `llvm.sqrt` intrinsic, because these operations are required to be correctly rounded by default in LLVM IR. When the result is not required to be correctly rounded, the front end should generate a call to the `llvm.fpbuiltin.fdiv` or `llvm.fpbuiltin.sqrt` intrinsic with the `fpbuiltin-max-error` attribute set. For single precision` fdiv`, the setting should be `2.5`. For single-precision sqrt, the setting should be `3.0`. If the -ffp-accuracy option is used, we should issue warnings if the settings conflict with an explicitly set `-foffload-fp32-prec-div` or `-foffload-fp32-prec-sqrt` option.
1 parent 2339bac commit 5823125

14 files changed

+798
-55
lines changed

Diff for: clang/include/clang/Basic/DiagnosticCommonKinds.td

+5
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,11 @@ def err_ppc_impossible_musttail: Error<
374374
def err_aix_musttail_unsupported: Error<
375375
"'musttail' attribute is not supported on AIX">;
376376

377+
def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning<
378+
"floating point accuracy control '%0' conflicts with explicit target "
379+
"precision option '%1'">,
380+
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-offload-fp32-prec-option">>;
381+
377382
// Source manager
378383
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
379384
def err_file_modified : Error<

Diff for: clang/include/clang/Basic/FPOptions.def

+2
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
3030
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
3131
OPTION(MathErrno, bool, 1, FPAccuracy)
3232
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
33+
OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange)
34+
OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi)
3335
#undef OPTION

Diff for: clang/include/clang/Basic/LangOptions.def

+2
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
377377
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
378378
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
379379
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
380+
LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
381+
LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
380382
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
381383
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
382384
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")

Diff for: clang/include/clang/Driver/Options.td

+16
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
11671167
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
11681168
"for complex arithmetic operations">>;
11691169

1170+
defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
1171+
LangOpts<"OffloadFP32PrecDiv">, DefaultTrue,
1172+
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
1173+
"code are required to return correctly rounded results.">,
1174+
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
1175+
"code are not required to return correctly rounded results.">>,
1176+
Group<f_Group>;
1177+
1178+
defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
1179+
LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue,
1180+
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
1181+
"code are required to return correctly rounded results.">,
1182+
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
1183+
"code are not required to return correctly rounded results.">>,
1184+
Group<f_Group>;
1185+
11701186
// OpenCL-only Options
11711187
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
11721188
Visibility<[ClangOption, CC1Option]>,

Diff for: clang/lib/CodeGen/CGBuiltin.cpp

+9-37
Original file line numberDiff line numberDiff line change
@@ -666,29 +666,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
666666
return Store;
667667
}
668668

669-
static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
670-
llvm::Function *FPBuiltinF,
671-
ArrayRef<Value *> Args,
672-
unsigned ID) {
673-
llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
674-
// TODO: Replace AttrList with a single attribute. The call can only have a
675-
// single FPAccuracy attribute.
676-
llvm::AttributeList AttrList;
677-
// "sycl_used_aspects" metadata associated with the call.
678-
llvm::Metadata *AspectMD = nullptr;
679-
// sincos() doesn't return a value, but it still has a type associated with
680-
// it that corresponds to the operand type.
681-
CGF.CGM.getFPAccuracyFuncAttributes(
682-
Name, AttrList, AspectMD, ID,
683-
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
684-
CI->setAttributes(AttrList);
685-
686-
if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
687-
CI->setMetadata("sycl_used_aspects",
688-
llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
689-
return CI;
690-
}
691-
692669
static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
693670
unsigned FPIntrinsicID, unsigned IntrinsicID,
694671
bool HasAccuracyRequirement) {
@@ -697,13 +674,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
697674
: CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
698675
}
699676

700-
static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
701-
if (!CGF.getLangOpts().FPAccuracyVal.empty())
702-
return true;
703-
auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
704-
return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
705-
}
706-
707677
static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
708678
unsigned FPAccuracyIntrinsicID,
709679
unsigned IntrinsicID, llvm::Value *Src0,
@@ -722,7 +692,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
722692
CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID());
723693
// Use fpbuiltin intrinsic only when needed.
724694
Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
725-
hasAccuracyRequirement(CGF, Name));
695+
CGF.hasAccuracyRequirement(Name));
726696
}
727697
}
728698
}
@@ -741,8 +711,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
741711
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
742712
IntrinsicID, Src0, Name);
743713
if (Func)
744-
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
745-
FPAccuracyIntrinsicID);
714+
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
715+
FPAccuracyIntrinsicID);
746716

747717
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
748718
if (CGF.Builder.getIsFPConstrained()) {
@@ -766,8 +736,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
766736
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
767737
IntrinsicID, Src0, Name);
768738
if (Func)
769-
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
770-
FPAccuracyIntrinsicID);
739+
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
740+
FPAccuracyIntrinsicID);
771741

772742
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
773743
if (CGF.Builder.getIsFPConstrained()) {
@@ -25194,6 +25164,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
2519425164
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
2519525165
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
2519625166
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
25167+
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
2519725168
.Default(0);
2519825169
} else {
2519925170
// The function has a clang builtin. Create an attribute for it
@@ -25295,10 +25266,11 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
2529525266
// a TU fp-accuracy requested.
2529625267
const LangOptions &LangOpts = getLangOpts();
2529725268
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
25298-
!LangOpts.FPAccuracyVal.empty()) {
25269+
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
25270+
!LangOpts.OffloadFP32PrecSqrt) {
2529925271
llvm::Function *Func =
2530025272
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
25301-
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
25273+
return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
2530225274
FPAccuracyIntrinsicID);
2530325275
}
2530425276
return nullptr;

Diff for: clang/lib/CodeGen/CGCall.cpp

+34-9
Original file line numberDiff line numberDiff line change
@@ -1902,25 +1902,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
19021902
// the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs
19031903
// is empty), then set its accuracy from the TU's accuracy value.
19041904
if (!getLangOpts().FPAccuracyFuncMap.empty()) {
1905+
StringRef FPAccuracyVal;
19051906
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
19061907
if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
1907-
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
1908-
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
1908+
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
1909+
FPAccuracyVal = "2.5";
1910+
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
1911+
FPAccuracyVal = "3.0";
1912+
else
1913+
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
1914+
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
19091915
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
19101916
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
19111917
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
19121918
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
19131919
}
19141920
}
1915-
if (FuncAttrs.attrs().size() == 0)
1921+
if (FuncAttrs.attrs().size() == 0) {
19161922
if (!getLangOpts().FPAccuracyVal.empty()) {
1917-
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
1918-
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
1923+
StringRef FPAccuracyVal;
1924+
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
1925+
FPAccuracyVal = "2.5";
1926+
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
1927+
FPAccuracyVal = "3.0";
1928+
else
1929+
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
1930+
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
19191931
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
19201932
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
19211933
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
19221934
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
1935+
} else {
1936+
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
1937+
FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
1938+
} else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
1939+
FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
1940+
}
19231941
}
1942+
}
19241943
}
19251944

19261945
/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
@@ -5864,10 +5883,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
58645883
// Emit the actual call/invoke instruction.
58655884
llvm::CallBase *CI;
58665885
if (!InvokeDest) {
5867-
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
5868-
!getLangOpts().FPAccuracyVal.empty()) {
5869-
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
5870-
if (FD && FD->getNameInfo().getName().isIdentifier()) {
5886+
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
5887+
if (FD && FD->getNameInfo().getName().isIdentifier()) {
5888+
StringRef FuncName = FD->getName();
5889+
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
5890+
bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName);
5891+
bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty();
5892+
bool isFp32SqrtFunction =
5893+
(FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
5894+
IsFloat32Type);
5895+
if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
58715896
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
58725897
FD->getName(), FD->getBuiltinID());
58735898
if (CI)

Diff for: clang/lib/CodeGen/CGExprScalar.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -3866,6 +3866,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
38663866
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
38673867
llvm::Value *Val;
38683868
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
3869+
if (Ops.LHS->getType()->isFloatTy()) {
3870+
if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
3871+
unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
3872+
llvm::Function *Func =
3873+
CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
3874+
llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
3875+
"fdiv", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
3876+
return Val;
3877+
}
3878+
}
38693879
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
38703880
CGF.SetDivFPAccuracy(Val);
38713881
return Val;

Diff for: clang/lib/CodeGen/CodeGenFunction.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,35 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
121121
}
122122
}
123123

124+
bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
125+
if (!getLangOpts().FPAccuracyVal.empty())
126+
return true;
127+
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
128+
return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
129+
}
130+
131+
llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
132+
StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
133+
unsigned ID) {
134+
llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
135+
// TODO: Replace AttrList with a single attribute. The call can only have a
136+
// single FPAccuracy attribute.
137+
llvm::AttributeList AttrList;
138+
// "sycl_used_aspects" metadata associated with the call.
139+
llvm::Metadata *AspectMD = nullptr;
140+
// sincos() doesn't return a value, but it still has a type associated with
141+
// it that corresponds to the operand type.
142+
CGM.getFPAccuracyFuncAttributes(
143+
Name, AttrList, AspectMD, ID,
144+
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
145+
CI->setAttributes(AttrList);
146+
147+
if (getLangOpts().SYCLIsDevice && AspectMD)
148+
CI->setMetadata("sycl_used_aspects",
149+
llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
150+
return CI;
151+
}
152+
124153
void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
125154
llvm::FastMathFlags FMF;
126155
FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());

Diff for: clang/lib/CodeGen/CodeGenFunction.h

+7
Original file line numberDiff line numberDiff line change
@@ -5285,6 +5285,13 @@ class CodeGenFunction : public CodeGenTypeCache {
52855285
/// CodeGenOpts.
52865286
void SetDivFPAccuracy(llvm::Value *Val);
52875287

5288+
bool hasAccuracyRequirement(StringRef Name);
5289+
5290+
llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
5291+
llvm::Function *FPBuiltinF,
5292+
ArrayRef<llvm::Value *> Args,
5293+
unsigned ID);
5294+
52885295
/// Set the codegen fast-math flags.
52895296
void SetFastMathFlags(FPOptions FPFeatures);
52905297

0 commit comments

Comments
 (0)