Skip to content

Commit b78453c

Browse files
authored
Merge branch 'sycl' into bf16_builtins
2 parents bc6e32a + a33f9c8 commit b78453c

File tree

21 files changed

+933
-59
lines changed

21 files changed

+933
-59
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 110 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
1818
#endif
1919

20+
#pragma push_macro("SM_53")
2021
#pragma push_macro("SM_70")
2122
#pragma push_macro("SM_72")
2223
#pragma push_macro("SM_75")
@@ -30,7 +31,9 @@
3031

3132
#pragma push_macro("SM_60")
3233
#define SM_60 "sm_60|sm_61|sm_62|" SM_70
34+
#define SM_53 "sm_53|" SM_60
3335

36+
#pragma push_macro("PTX42")
3437
#pragma push_macro("PTX60")
3538
#pragma push_macro("PTX61")
3639
#pragma push_macro("PTX63")
@@ -53,6 +56,7 @@
5356
#define PTX63 "ptx63|" PTX64
5457
#define PTX61 "ptx61|" PTX63
5558
#define PTX60 "ptx60|" PTX61
59+
#define PTX42 "ptx42|" PTX60
5660

5761
#pragma push_macro("AND")
5862
#define AND(a, b) "(" a "),(" b ")"
@@ -110,13 +114,89 @@ BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
110114

111115
// Min Max
112116

113-
BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
114-
BUILTIN(__nvvm_fmax_f, "fff", "")
115-
BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
116-
BUILTIN(__nvvm_fmin_f, "fff", "")
117+
TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
118+
TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
119+
TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
120+
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
121+
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
122+
TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
123+
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
124+
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
125+
AND(SM_86, PTX72))
126+
TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
127+
TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
128+
TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
129+
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
130+
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
131+
AND(SM_86, PTX72))
132+
TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
133+
AND(SM_86, PTX72))
134+
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
135+
AND(SM_86, PTX72))
136+
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
137+
AND(SM_86, PTX72))
138+
TARGET_BUILTIN(__nvvm_fmin_bf16, "UsUsUs", "", AND(SM_80, PTX70))
139+
TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
140+
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
141+
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "UsUsUs", "",
142+
AND(SM_86, PTX72))
143+
TARGET_BUILTIN(__nvvm_fmin_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
144+
TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
145+
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
146+
AND(SM_86, PTX72))
147+
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
148+
AND(SM_86, PTX72))
149+
BUILTIN(__nvvm_fmin_f, "fff", "")
150+
BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
151+
TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
152+
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
153+
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
154+
TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
155+
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
156+
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
157+
BUILTIN(__nvvm_fmin_d, "ddd", "")
117158

159+
TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
160+
TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
161+
TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
162+
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
163+
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
164+
TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
165+
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
166+
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
167+
AND(SM_86, PTX72))
168+
TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
169+
TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
170+
TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
171+
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
172+
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
173+
AND(SM_86, PTX72))
174+
TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
175+
AND(SM_86, PTX72))
176+
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
177+
AND(SM_86, PTX72))
178+
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
179+
AND(SM_86, PTX72))
180+
TARGET_BUILTIN(__nvvm_fmax_bf16, "UsUsUs", "", AND(SM_80, PTX70))
181+
TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
182+
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
183+
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "UsUsUs", "",
184+
AND(SM_86, PTX72))
185+
TARGET_BUILTIN(__nvvm_fmax_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
186+
TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
187+
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
188+
AND(SM_86, PTX72))
189+
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
190+
AND(SM_86, PTX72))
191+
BUILTIN(__nvvm_fmax_f, "fff", "")
192+
BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
193+
TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
194+
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
195+
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
196+
TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
197+
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
198+
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
118199
BUILTIN(__nvvm_fmax_d, "ddd", "")
119-
BUILTIN(__nvvm_fmin_d, "ddd", "")
120200

121201
// Multiplication
122202

@@ -228,6 +308,22 @@ TARGET_BUILTIN(__nvvm_tanh_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
228308

229309
// Fma
230310

311+
TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
312+
TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
313+
TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
314+
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
315+
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
316+
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
317+
TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
318+
TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
319+
TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
320+
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
321+
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
322+
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
323+
TARGET_BUILTIN(__nvvm_fma_rn_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
324+
TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
325+
TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
326+
TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
231327
BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
232328
BUILTIN(__nvvm_fma_rn_f, "ffff", "")
233329
BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
@@ -2309,15 +2405,24 @@ TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
23092405
TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
23102406
TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
23112407

2408+
2409+
// bf16, bf16x2 abs, neg
2410+
TARGET_BUILTIN(__nvvm_abs_bf16, "UsUs", "", AND(SM_80,PTX70))
2411+
TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
2412+
TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
2413+
TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
2414+
23122415
#undef BUILTIN
23132416
#undef TARGET_BUILTIN
23142417
#pragma pop_macro("AND")
2418+
#pragma pop_macro("SM_53")
23152419
#pragma pop_macro("SM_60")
23162420
#pragma pop_macro("SM_70")
23172421
#pragma pop_macro("SM_72")
23182422
#pragma pop_macro("SM_75")
23192423
#pragma pop_macro("SM_80")
23202424
#pragma pop_macro("SM_86")
2425+
#pragma pop_macro("PTX42")
23212426
#pragma pop_macro("PTX60")
23222427
#pragma pop_macro("PTX61")
23232428
#pragma pop_macro("PTX63")

clang/lib/Driver/Driver.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3013,10 +3013,19 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
30133013
// TODO: The static archive processing for SYCL is done in a different
30143014
// manner than the OpenMP processing. We should try and refactor this
30153015
// to use the OpenMP flow (adding -l<name> to the llvm-link step)
3016-
auto resolveStaticLib = [&](StringRef LibName) -> bool {
3016+
auto resolveStaticLib = [&](StringRef LibName, bool IsStatic) -> bool {
30173017
if (!LibName.startswith("-l"))
30183018
return false;
30193019
for (auto LPath : LibPaths) {
3020+
if (!IsStatic) {
3021+
// Current linking state is dynamic. We will first check for the
3022+
// shared object and not pull in the static library if it is found.
3023+
SmallString<128> SoLibName(LPath);
3024+
llvm::sys::path::append(SoLibName,
3025+
Twine("lib" + LibName.substr(2) + ".so").str());
3026+
if (llvm::sys::fs::exists(SoLibName))
3027+
return false;
3028+
}
30203029
SmallString<128> FullName(LPath);
30213030
llvm::sys::path::append(FullName,
30223031
Twine("lib" + LibName.substr(2) + ".a").str());
@@ -3029,6 +3038,7 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
30293038
};
30303039
for (const auto *A : Args) {
30313040
std::string FileName = A->getAsString(Args);
3041+
static bool IsLinkStateStatic(Args.hasArg(options::OPT_static));
30323042
auto addLibArg = [&](StringRef LibName) -> bool {
30333043
if (isStaticArchiveFile(LibName) ||
30343044
(IncludeObj && isObjectFile(LibName.str()))) {
@@ -3088,7 +3098,20 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
30883098
LibArgs.push_back(Args.MakeArgString(V));
30893099
return;
30903100
}
3091-
resolveStaticLib(V);
3101+
if (optionMatches("-Bstatic", V.str()) ||
3102+
optionMatches("-dn", V.str()) ||
3103+
optionMatches("-non_shared", V.str()) ||
3104+
optionMatches("-static", V.str())) {
3105+
IsLinkStateStatic = true;
3106+
return;
3107+
}
3108+
if (optionMatches("-Bdynamic", V.str()) ||
3109+
optionMatches("-dy", V.str()) ||
3110+
optionMatches("-call_shared", V.str())) {
3111+
IsLinkStateStatic = false;
3112+
return;
3113+
}
3114+
resolveStaticLib(V, IsLinkStateStatic);
30923115
};
30933116
if (Value[0] == '@') {
30943117
// Found a response file, we want to expand contents to try and
@@ -3128,7 +3151,7 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
31283151
continue;
31293152
}
31303153
if (A->getOption().matches(options::OPT_l))
3131-
resolveStaticLib(A->getAsString(Args));
3154+
resolveStaticLib(A->getAsString(Args), IsLinkStateStatic);
31323155
}
31333156
return LibArgs;
31343157
}

0 commit comments

Comments
 (0)