Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. #15836

Open
wants to merge 28 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f8caf83
Add support for -ftarget-prec-div/sqrt options.
zahiraam Oct 23, 2024
00ffb5a
Added fast-math run lines to LIT tests.
zahiraam Oct 23, 2024
795dd38
Renamed the options accordingly.
zahiraam Oct 24, 2024
78a9005
Fix format.
zahiraam Oct 24, 2024
50e71c0
Changed the place where the options are added in order for the options
zahiraam Oct 28, 2024
54f2409
Fix format.
zahiraam Oct 28, 2024
bdf78d7
Addresed review comments.
zahiraam Oct 29, 2024
8cd6d8b
Put the code to handle the options in RenderFloatingPointOptions
zahiraam Oct 30, 2024
755d630
Addressed review comments.
zahiraam Oct 30, 2024
27011c8
Fixed up condition to clearer code.
zahiraam Oct 31, 2024
ff2b3d9
Addressed review comments.
zahiraam Nov 4, 2024
07752e2
Add extension SPV_INTEL_fp_max_error.
zahiraam Nov 5, 2024
aa909d2
Fixed LIT test.
zahiraam Nov 5, 2024
fcc4786
Addressed review comment.
zahiraam Nov 5, 2024
24711fd
Addressed review comments.
zahiraam Nov 8, 2024
56314b7
Renamed function.
zahiraam Nov 12, 2024
e643027
Addressed review comments.
zahiraam Nov 13, 2024
b25e5ac
Changed SplitFPAccuracyVal to be a static function instead of a lambda.
zahiraam Nov 13, 2024
ce00296
Restricting the use of the options to sycl only.
zahiraam Nov 15, 2024
bc01759
Remove restriction on Cuda/Hip and changed the code so that the div
zahiraam Nov 18, 2024
c5fffc5
Removed unused lines in CodeGenSYC/offload-fp32-div-sqrt.cpp.
zahiraam Nov 21, 2024
f2fb8b2
Renamed div to fdiv to avoid confusion.
zahiraam Nov 22, 2024
83c9b31
This is an attempt to fix the DeviceLib/cmath_test.cpp issue.
zahiraam Nov 25, 2024
0efc825
Removing the latest change that attempted to fix the LIT issue.
zahiraam Dec 2, 2024
e1de775
Merge remote-tracking branch 'origin/sycl' into TargetPrecOption
zahiraam Jan 15, 2025
34f07cc
Merge remote-tracking branch 'origin/sycl' into TargetPrecOption
zahiraam Jan 15, 2025
e18930f
Fix sync error.
zahiraam Jan 16, 2025
410856d
Merge remote-tracking branch 'origin/sycl' into TargetPrecOption
zahiraam Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticCommonKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ def err_ppc_impossible_musttail: Error<
def err_aix_musttail_unsupported: Error<
"'musttail' attribute is not supported on AIX">;

def warn_acuracy_conflicts_with_explicit_target_prec_option : Warning<
"floating point accuracy control '%0' conflicts with explicit target "
"precision option '%1'">,
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-target-prec-option">>;

// Source manager
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
def err_file_modified : Error<
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/FPOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
OPTION(MathErrno, bool, 1, FPAccuracy)
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
OPTION(OffloadFp32PrecDiv, bool, 1, ComplexRange)
OPTION(OffloadFp32PrecSqrt, bool, 1, OffloadFp32PrecDiv)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
OPTION(OffloadFp32PrecDiv, bool, 1, ComplexRange)
OPTION(OffloadFp32PrecSqrt, bool, 1, OffloadFp32PrecDiv)
OPTION(OffloadFP32PrecDiv, bool, 1, ComplexRange)
OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDiv)

#undef OPTION
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
LANGOPT(OffloadFp32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFp32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
LANGOPT(OffloadFp32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFp32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")

LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
"for complex arithmetic operations">>;

defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;


// OpenCL-only Options
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
Visibility<[ClangOption, CC1Option]>,
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24099,6 +24099,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
.Default(0);
} else {
// The function has a clang builtin. Create an attribute for it
Expand Down Expand Up @@ -24200,7 +24201,8 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
// a TU fp-accuracy requested.
const LangOptions &LangOpts = getLangOpts();
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
!LangOpts.FPAccuracyVal.empty()) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFp32PrecDiv ||
!LangOpts.OffloadFp32PrecSqrt) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFp32PrecDiv ||
!LangOpts.OffloadFp32PrecSqrt) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
!LangOpts.OffloadFP32PrecSqrt) {

llvm::Function *Func =
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
Expand Down
26 changes: 20 additions & 6 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1889,15 +1889,23 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
}
}
if (FuncAttrs.attrs().size() == 0)
if (FuncAttrs.attrs().size() == 0) {
StringRef FPAccuracyVal;
if (!getLangOpts().FPAccuracyVal.empty()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
}
if (Name == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt)
FPAccuracyVal = "3.0";
if (Name == "fdiv" && !getLangOpts().OffloadFp32PrecDiv)
FPAccuracyVal = "2.5";
if (!FPAccuracyVal.empty())
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is the combination supposed to work? If the condition in 1894 was true, would two fpbuiltin-max-error attributes get added? Once in 1898 and again in 1907?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the condition in 1894 is satisfied, then the FuncAttrs.size() != 0); we will not get into this code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, maybe I wasn't clear before. Let me type out what I am asking:

  if (FuncAttrs.attrs().size() == 0) {
    StringRef FPAccuracyVal;
    if (!getLangOpts().FPAccuracyVal.empty()) {
      ...
      FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(...);
      FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);  // #Attr here 1
      ... 
    }
    if (Name == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt)
      FPAccuracyVal = "3.0";
    if (Name == "fdiv" && !getLangOpts().OffloadFp32PrecDiv)
      FPAccuracyVal = "2.5";
    if (!FPAccuracyVal.empty())
      FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);    // #Attr here 2

Couldn't you get into the size == 0 block, set FPAccuracyVal, add the attribute (#1), and if name is one of sqrt or fdiv, set FPAccuracyVal again, and then add the attribute again (#2)?

Is this combination supposed to work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! good catch. I think this will fix it.

}
}

/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
Expand Down Expand Up @@ -5790,10 +5798,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit the actual call/invoke instruction.
llvm::CallBase *CI;
if (!InvokeDest) {
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
StringRef FuncName = FD->getName();
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty() ||
(FuncName == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt &&
IsFloat32Type) ||
(FuncName == "fdiv" && !getLangOpts().OffloadFp32PrecDiv &&
IsFloat32Type)) {
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
FD->getName(), FD->getBuiltinID());
if (CI)
Expand Down
45 changes: 43 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1721,6 +1721,39 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,
}
}

static void EmitAccuracyDiag(const Driver &D, const JobAction &JA,
StringRef AccuracValStr, StringRef TargetPrecStr) {
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
D.Diag(clang::diag::warn_acuracy_conflicts_with_explicit_target_prec_option)
<< AccuracValStr << TargetPrecStr;
}
}

void Clang::AddSPIRTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
const JobAction &JA, const Driver &D) const {
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
if (Arg *A = Args.getLastArg(options::OPT_ffp_model_EQ)) {
if (!strcmp(A->getValue(), "fast")) {
CmdArgs.push_back("-fno-offload-fp32-prec-div");
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}
mdtoguchi marked this conversation as resolved.
Show resolved Hide resolved
} else {
if (Arg *A = Args.getLastArg(options::OPT_ffp_accuracy_EQ)) {
if (Args.getLastArg(options::OPT_fno_offload_fp32_prec_div))
EmitAccuracyDiag(D, JA, A->getValue(), "-fno-offload-fp32-prec-div");
if (Args.getLastArg(options::OPT_fno_offload_fp32_prec_sqrt))
EmitAccuracyDiag(D, JA, A->getValue(), "-fno-offload-fp32-prec-sqrt");
}
if (!Args.hasFlag(options::OPT_foffload_fp32_prec_div,
options::OPT_fno_offload_fp32_prec_div, true))
CmdArgs.push_back("-fno-offload-fp32-prec-div");
if (!Args.hasFlag(options::OPT_foffload_fp32_prec_sqrt,
options::OPT_fno_offload_fp32_prec_sqrt, true))
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}
}
}

void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs, bool KernelOrKext) const {
RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs);
Expand Down Expand Up @@ -1771,8 +1804,10 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,

void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
const ArgList &Args, bool KernelOrKext,
ArgStringList &CmdArgs) const {
ArgStringList &CmdArgs,
const JobAction &JA) const {
const ToolChain &TC = getToolChain();
const Driver &D = TC.getDriver();

// Add the target features
getTargetFeatures(TC.getDriver(), EffectiveTriple, Args, CmdArgs, false);
Expand All @@ -1782,6 +1817,12 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
default:
break;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inadvertent line removal?

case llvm::Triple::spir:
case llvm::Triple::spir64:
mdtoguchi marked this conversation as resolved.
Show resolved Hide resolved
case llvm::Triple::spirv32:
case llvm::Triple::spirv64:
AddSPIRTargetArgs(Args, CmdArgs, JA, D);
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
Expand Down Expand Up @@ -6806,7 +6847,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(CPU));
}

RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs, JA);

// Add clang-cl arguments.
types::ID InputType = Input.getType();
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,18 @@ class LLVM_LIBRARY_VISIBILITY Clang : public Tool {

void RenderTargetOptions(const llvm::Triple &EffectiveTriple,
const llvm::opt::ArgList &Args, bool KernelOrKext,
llvm::opt::ArgStringList &CmdArgs) const;
llvm::opt::ArgStringList &CmdArgs,
const JobAction &JA) const;

void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
void AddARMTargetArgs(const llvm::Triple &Triple,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
bool KernelOrKext) const;
void AddSPIRTargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, const JobAction &JA,
const Driver &D) const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes not needed anymore?

void AddARM64TargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args,
Expand Down
156 changes: 156 additions & 0 deletions clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// DEFINE: %{common_opts_spirv32} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown

// DEFINE: %{common_opts_spirv64} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv64-unknown-unknown


// DEFINE: %{common_opts_spir} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
// DEFINE: -fsycl-is-device -emit-llvm -triple spir32-unknown-unknown


// DEFINE: %{common_opts_spir64} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
// DEFINE: -fsycl-is-device -emit-llvm -triple spir64-unknown-unknown


// RUN: %clang_cc1 %{common_opts_spirv32} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \
// RUN:-fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \
// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

//

// RUN: %clang_cc1 %{common_opts_spirv64} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

//

// RUN: %clang_cc1 %{common_opts_spir} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

//

// RUN: %clang_cc1 %{common_opts_spir64} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

#include "sycl.hpp"

extern "C" SYCL_EXTERNAL float sqrt(float);
extern "C" SYCL_EXTERNAL float fdiv(float, float);

using namespace sycl;

int main() {
const unsigned array_size = 4;
range<1> numOfItems{array_size};
float Value1 = .5f;
float Value2 = .9f;
queue deviceQueue;

deviceQueue.submit([&](handler& cgh) {
cgh.parallel_for<class KernelSqrt>(numOfItems,
[=](id<1> wiID) {
// PREC-SQRT: call spir_func float @sqrt(float noundef {{.*}})
// ROUNDED-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]]
// ROUNDED-DIV: call spir_func float @sqrt(float noundef {{.*}})
(void)sqrt(Value1);
});
});

deviceQueue.submit([&](handler& cgh) {
cgh.parallel_for<class KernelFdiv>(numOfItems,
[=](id<1> wiID) {
// PREC-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
// ROUNDED-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})

// ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]]

// PREC-DIV: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
// ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
// ROUNDED-DIV-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
(void)fdiv(Value1, Value1);
});
});

return 0;
}

// ROUNDED-SQRT: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0"
// ROUNDED-SQRT-FAST: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0"
// ROUNDED-DIV: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5"
// ROUNDED-DIV-FAST: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5"
Loading
Loading