Skip to content

Commit 6da4d2e

Browse files
[SYCL][NATIVECPU][LIBCLC] Use libclc for SYCL Native CPU (#10970)
This PR allows linking to libclc when compiling for SYCL Native CPU. Currently only the `x86_64-unknown-linux-gnu` target triple is supported, additional target triples (and possibly a more versatile way of setting them) will come with follow up PRs. Some useful information for reviewing: * We start using an `AddrSpaceMap` (set in `TargetInfo.cpp`) because the mangled names emitted by the device compiler need to match with the names provided by `libclc`. The AddressSpaceMap is taken from the `PTX` Target. * Changes in `Driver` are needed to find and link to `libclc`. * `libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll` has been split into 4 modules, one for each memory ordering constraint. Copies of these modules have been added in `generic` (because some functions in `generic/libspirv/atomic` needed them), and the module split allows to specialize the file for targets that may not support some orderings. Currently only a couple of function for `acquire` and `seq_cst` have been implemented for `generic`, but the others will be implemented in a follow up PR. * We've added a target in `libclc` for `x86_64-unknown-linux`. This has been done because some math builtins in `generic` have been defined as ``` typedef char vec __attribute__((ext_vector_type(8))); __attribute__((overloadable)) vec __clc_native_popcount(vec x) __asm("llvm.ctpop" ".v16i" "8"); vec call(vec x) { return __clc_native_popcount(x); } ``` While this approach conveniently allows to call directly LLVM intrinsics, it does seem to play well with the ABI for `x86_64-unknown-linux`, since it leads to this IR: ``` define dso_local double @call(double noundef %x.coerce) #0 { entry: %0 = bitcast double %x.coerce to <8 x i8> %1 = bitcast <8 x i8> %0 to double %call = call double @llvm.ctpop.v8i8(double noundef %1) #8 %2 = bitcast double %call to <8 x i8> %3 = bitcast <8 x i8> %2 to double ret double %3 } ``` Which is invalid because `lvm.ctpop.v8i8` expect a vector of `i8` and not a `double`, leading to failing asserts in the compiler that prevented from building `libclc`. As a temporary work around we have added empty files that override the files in `generic` when building for `x86_64-unknown-linux`, allowing to complete the build, even though the corresponding builtins will be missing from the library. We are working on a proper solution for this. --------- Co-authored-by: Uwe Dolinsky <uwe@codeplay.com>
1 parent a162179 commit 6da4d2e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+810
-389
lines changed

buildbot/configure.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def do_configure(args):
6565
if args.enable_esimd_emulator:
6666
sycl_enabled_plugins.append("esimd_emulator")
6767

68-
if args.cuda or args.hip:
68+
if args.cuda or args.hip or args.native_cpu:
6969
llvm_enable_projects += ';libclc'
7070

7171
if args.cuda:
@@ -87,6 +87,12 @@ def do_configure(args):
8787
sycl_build_pi_hip_platform = args.hip_platform
8888
sycl_enabled_plugins.append("hip")
8989

90+
if args.native_cpu:
91+
#Todo: we should set whatever targets we support for native cpu
92+
libclc_targets_to_build += ';x86_64-unknown-linux-gnu'
93+
sycl_enabled_plugins.append("native_cpu")
94+
95+
9096
# all llvm compiler targets don't require 3rd party dependencies, so can be
9197
# built/tested even if specific runtimes are not available
9298
if args.enable_all_llvm_targets:
@@ -234,6 +240,7 @@ def main():
234240
parser.add_argument("-t", "--build-type",
235241
metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release")
236242
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
243+
parser.add_argument("--native_cpu", action='store_true', help="Enable SYCL Native CPU")
237244
parser.add_argument("--hip", action='store_true', help="switch from OpenCL to HIP")
238245
parser.add_argument("--hip-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose hardware platform for HIP backend")
239246
parser.add_argument("--host-target", default='X86',

clang/lib/Basic/TargetInfo.cpp

+32
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,38 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
516516

517517
if (Opts.FakeAddressSpaceMap)
518518
AddrSpaceMap = &FakeAddrSpaceMap;
519+
520+
if (Opts.SYCLIsDevice && Opts.SYCLIsNativeCPU) {
521+
// For SYCL Native CPU we use the NVPTXAddrSpaceMap because
522+
// we need builtins to be mangled with AS information
523+
524+
static const unsigned SYCLNativeCPUASMap[] = {
525+
0, // Default
526+
1, // opencl_global
527+
3, // opencl_local
528+
4, // opencl_constant
529+
0, // opencl_private
530+
0, // opencl_generic
531+
1, // opencl_global_device
532+
1, // opencl_global_host
533+
1, // cuda_device
534+
4, // cuda_constant
535+
3, // cuda_shared
536+
1, // sycl_global
537+
1, // sycl_global_device
538+
1, // sycl_global_host
539+
3, // sycl_local
540+
0, // sycl_private
541+
0, // ptr32_sptr
542+
0, // ptr32_uptr
543+
0, // ptr64
544+
0, // hlsl_groupshared
545+
20, // wasm_funcref
546+
};
547+
548+
AddrSpaceMap = &SYCLNativeCPUASMap;
549+
UseAddrSpaceMapMangling = true;
550+
}
519551
}
520552

521553
bool TargetInfo::initFeatureMap(

clang/lib/Driver/Driver.cpp

+50
Original file line numberDiff line numberDiff line change
@@ -5274,6 +5274,53 @@ class OffloadingActionBuilder final {
52745274
return needLibs;
52755275
}
52765276

5277+
bool addSYCLNativeCPULibs(const ToolChain *TC,
5278+
ActionList &DeviceLinkObjects) {
5279+
std::string LibSpirvFile;
5280+
if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) {
5281+
auto ProvidedPath =
5282+
Args.getLastArgValue(options::OPT_fsycl_libspirv_path_EQ).str();
5283+
if (llvm::sys::fs::exists(ProvidedPath))
5284+
LibSpirvFile = ProvidedPath;
5285+
} else {
5286+
SmallVector<StringRef, 8> LibraryPaths;
5287+
5288+
// Expected path w/out install.
5289+
SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir);
5290+
llvm::sys::path::append(WithoutInstallPath, Twine("../../clc"));
5291+
LibraryPaths.emplace_back(WithoutInstallPath.c_str());
5292+
5293+
// Expected path w/ install.
5294+
SmallString<256> WithInstallPath(C.getDriver().ResourceDir);
5295+
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
5296+
LibraryPaths.emplace_back(WithInstallPath.c_str());
5297+
5298+
// Select libclc variant based on target triple
5299+
std::string LibSpirvTargetName = "builtins.link.libspirv-";
5300+
LibSpirvTargetName.append(TC->getTripleString() + ".bc");
5301+
5302+
for (StringRef LibraryPath : LibraryPaths) {
5303+
SmallString<128> LibSpirvTargetFile(LibraryPath);
5304+
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
5305+
if (llvm::sys::fs::exists(LibSpirvTargetFile) ||
5306+
Args.hasArg(options::OPT__HASH_HASH_HASH)) {
5307+
LibSpirvFile = std::string(LibSpirvTargetFile.str());
5308+
break;
5309+
}
5310+
}
5311+
}
5312+
5313+
if (!LibSpirvFile.empty()) {
5314+
Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(),
5315+
Args.MakeArgString(LibSpirvFile));
5316+
auto *SYCLLibClcInputAction =
5317+
C.MakeAction<InputAction>(*LibClcInputArg, types::TY_LLVM_BC);
5318+
DeviceLinkObjects.push_back(SYCLLibClcInputAction);
5319+
return true;
5320+
}
5321+
return false;
5322+
}
5323+
52775324
bool addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects,
52785325
bool isSpirvAOT, bool isMSVCEnv) {
52795326
struct DeviceLibOptInfo {
@@ -5684,6 +5731,9 @@ class OffloadingActionBuilder final {
56845731
TC, DeviceLibs, UseAOTLink,
56855732
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment());
56865733
}
5734+
if (isSYCLNativeCPU) {
5735+
SYCLDeviceLibLinked |= addSYCLNativeCPULibs(TC, DeviceLibs);
5736+
}
56875737
JobAction *LinkSYCLLibs =
56885738
C.MakeAction<LinkJobAction>(DeviceLibs, types::TY_LLVM_BC);
56895739
for (Action *FullLinkObject : FullLinkObjects) {

clang/lib/Driver/ToolChains/SYCL.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88
#include "SYCL.h"
99
#include "CommonArgs.h"
10+
#include "clang/Driver/Action.h"
1011
#include "clang/Driver/Compilation.h"
1112
#include "clang/Driver/Driver.h"
1213
#include "clang/Driver/DriverDiagnostic.h"
@@ -170,6 +171,8 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
170171
// instead of the original object.
171172
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
172173
bool IsRDC = !shouldDoPerObjectFileLinking(C);
174+
const bool IsSYCLNativeCPU = isSYCLNativeCPU(
175+
this->getToolChain(), *C.getSingleOffloadToolChain<Action::OFK_Host>());
173176
auto isNoRDCDeviceCodeLink = [&](const InputInfo &II) {
174177
if (IsRDC)
175178
return false;
@@ -190,12 +193,14 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
190193

191194
std::string FileName = this->getToolChain().getInputFilename(II);
192195
StringRef InputFilename = llvm::sys::path::filename(FileName);
193-
if (this->getToolChain().getTriple().isNVPTX()) {
196+
const bool IsNVPTX = this->getToolChain().getTriple().isNVPTX();
197+
if (IsNVPTX || IsSYCLNativeCPU) {
194198
// Linking SYCL Device libs requires libclc as well as libdevice
195199
if ((InputFilename.find("libspirv") != InputFilename.npos ||
196200
InputFilename.find("libdevice") != InputFilename.npos))
197201
return true;
198-
LibPostfix = ".cubin";
202+
if (IsNVPTX)
203+
LibPostfix = ".cubin";
199204
}
200205
StringRef LibSyclPrefix("libsycl-");
201206
if (!InputFilename.startswith(LibSyclPrefix) ||
+17-29
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
//RUN: %clang -fsycl -fsycl-targets=native_cpu -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS
2-
//RUN: %clang -fsycl -fsycl-targets=native_cpu -ccc-print-bindings %s 2>&1 | FileCheck %s --check-prefix=CHECK_BINDINGS
3-
//RUN: %clang -fsycl -fsycl-targets=native_cpu -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_INVO
4-
//RUN: %clang -fsycl -fsycl-targets=native_cpu -target aarch64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS-AARCH64
1+
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS
2+
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -ccc-print-bindings %s 2>&1 | FileCheck %s --check-prefix=CHECK_BINDINGS
3+
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_INVO
4+
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -target aarch64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS-AARCH64
55

66

77
//CHECK_ACTIONS: +- 0: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (host-sycl)
@@ -15,20 +15,23 @@
1515
//CHECK_ACTIONS: +- 8: backend, {7}, assembler, (host-sycl)
1616
//CHECK_ACTIONS: +- 9: assembler, {8}, object, (host-sycl)
1717
//CHECK_ACTIONS:+- 10: linker, {9}, image, (host-sycl)
18+
//CHECK_ACTIONS: +- 11: linker, {5}, ir, (device-sycl)
19+
//CHECK_ACTIONS: |- 12: input, "{{.*}}libspirv{{.*}}", ir, (device-sycl)
20+
//CHECK_ACTIONS: +- 13: linker, {11, 12}, ir, (device-sycl)
1821
//this is where we compile the device code to a shared lib, and we link the host shared lib and the device shared lib
19-
//CHECK_ACTIONS:| +- 11: linker, {5}, ir, (device-sycl)
20-
//CHECK_ACTIONS:| +- 12: backend, {11}, assembler, (device-sycl)
21-
//CHECK_ACTIONS:|- 13: assembler, {12}, object, (device-sycl)
22+
//CHECK_ACTIONS:| +- 14: backend, {13}, assembler, (device-sycl)
23+
//CHECK_ACTIONS:|- 15: assembler, {14}, object, (device-sycl)
2224
//call sycl-post-link and clang-offload-wrapper
23-
//CHECK_ACTIONS:| +- 14: sycl-post-link, {11}, tempfiletable, (device-sycl)
24-
//CHECK_ACTIONS:|- 15: clang-offload-wrapper, {14}, object, (device-sycl)
25-
//CHECK_ACTIONS:16: offload, "host-sycl ({{.*}})" {10}, "device-sycl ({{.*}})" {13}, "device-sycl ({{.*}})" {15}, image
25+
//CHECK_ACTIONS:| +- 16: sycl-post-link, {13}, tempfiletable, (device-sycl)
26+
//CHECK_ACTIONS:|- 17: clang-offload-wrapper, {16}, object, (device-sycl)
27+
//CHECK_ACTIONS:18: offload, "host-sycl ({{.*}})" {10}, "device-sycl ({{.*}})" {15}, "device-sycl ({{.*}})" {17}, image
2628

2729

2830
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["{{.*}}sycl-native-cpu-fsycl.cpp"], output: "[[KERNELIR:.*]].bc"
2931
//CHECK_BINDINGS:# "{{.*}}" - "SYCL::Linker", inputs: ["[[KERNELIR]].bc"], output: "[[KERNELLINK:.*]].bc"
30-
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[KERNELLINK]].bc"], output: "[[KERNELOBJ:.*]].o"
31-
//CHECK_BINDINGS:# "{{.*}}" - "SYCL post link", inputs: ["[[KERNELLINK]].bc"], output: "[[TABLEFILE:.*]].table"
32+
//CHECK_BINDINGS:# "{{.*}}" - "SYCL::Linker", inputs: ["[[KERNELLINK]].bc", "{{.*}}.bc"], output: "[[KERNELLINKWLIB:.*]].bc"
33+
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[KERNELLINKWLIB]].bc"], output: "[[KERNELOBJ:.*]].o"
34+
//CHECK_BINDINGS:# "{{.*}}" - "SYCL post link", inputs: ["[[KERNELLINKWLIB]].bc"], output: "[[TABLEFILE:.*]].table"
3235
//CHECK_BINDINGS:# "{{.*}}" - "offload wrapper", inputs: ["[[TABLEFILE]].table"], output: "[[WRAPPEROBJ:.*]].o"
3336
//CHECK_BINDINGS:# "{{.*}}" - "Append Footer to source", inputs: ["{{.*}}sycl-native-cpu-fsycl.cpp"], output: "[[SRCWFOOTER:.*]].cpp"
3437
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[SRCWFOOTER]].cpp", "[[KERNELIR]].bc"], output: "[[HOSTOBJ:.*]].o"
@@ -38,21 +41,6 @@
3841
//CHECK_INVO:{{.*}}clang{{.*}}"-x" "ir"
3942
//CHECK_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}}
4043

41-
// checkes that the device and host triple is correct in the generated actions when it is set explicitly
42-
//CHECK_ACTIONS-AARCH64: +- 0: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (host-sycl)
43-
//CHECK_ACTIONS-AARCH64: +- 1: append-footer, {0}, c++, (host-sycl)
44-
//CHECK_ACTIONS-AARCH64: +- 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
45-
//CHECK_ACTIONS-AARCH64: | +- 3: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (device-sycl)
46-
//CHECK_ACTIONS-AARCH64: | +- 4: preprocessor, {3}, c++-cpp-output, (device-sycl)
47-
//CHECK_ACTIONS-AARCH64: |- 5: compiler, {4}, ir, (device-sycl)
44+
// checks that the device and host triple is correct in the generated actions when it is set explicitly
4845
//CHECK_ACTIONS-AARCH64: +- 6: offload, "host-sycl (aarch64-unknown-linux-gnu)" {2}, "device-sycl (aarch64-unknown-linux-gnu)" {5}, c++-cpp-output
49-
//CHECK_ACTIONS-AARCH64: +- 7: compiler, {6}, ir, (host-sycl)
50-
//CHECK_ACTIONS-AARCH64: +- 8: backend, {7}, assembler, (host-sycl)
51-
//CHECK_ACTIONS-AARCH64: +- 9: assembler, {8}, object, (host-sycl)
52-
//CHECK_ACTIONS-AARCH64:+- 10: linker, {9}, image, (host-sycl)
53-
//CHECK_ACTIONS-AARCH64:| +- 11: linker, {5}, ir, (device-sycl)
54-
//CHECK_ACTIONS-AARCH64:| +- 12: backend, {11}, assembler, (device-sycl)
55-
//CHECK_ACTIONS-AARCH64:|- 13: assembler, {12}, object, (device-sycl)
56-
//CHECK_ACTIONS-AARCH64:| +- 14: sycl-post-link, {11}, tempfiletable, (device-sycl)
57-
//CHECK_ACTIONS-AARCH64:|- 15: clang-offload-wrapper, {14}, object, (device-sycl)
58-
//CHECK_ACTIONS-AARCH64:16: offload, "host-sycl (aarch64-unknown-linux-gnu)" {10}, "device-sycl (aarch64-unknown-linux-gnu)" {13}, "device-sycl (aarch64-unknown-linux-gnu)" {15}, image
46+
//CHECK_ACTIONS-AARCH64:{{[0-9]*}}: offload, "host-sycl (aarch64-unknown-linux-gnu)" {{{[0-9]*}}}, "device-sycl (aarch64-unknown-linux-gnu)" {{{[0-9]*}}}, "device-sycl (aarch64-unknown-linux-gnu)" {{{[0-9]*}}}, image

libclc/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ set( nvptx--nvidiacl_devices none )
181181
set( nvptx64--nvidiacl_devices none )
182182
set( spirv-mesa3d-_devices none )
183183
set( spirv64-mesa3d-_devices none )
184+
# TODO: Does this need to be set for each possible triple?
185+
set( x86_64-unknown-linux-gnu_devices none )
184186

185187
# Setup aliases
186188
set( cedar_aliases palm sumo sumo2 redwood juniper )
@@ -342,6 +344,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
342344
# AMDGCN needs libclc to be compiled to high bc version since all atomic
343345
# clang builtins need to be accessible
344346
set( flags "SHELL:-mcpu=gfx940")
347+
elseif( ${ARCH} STREQUAL x86_64)
348+
# TODO: This is used by native cpu, we should define an option to set this flags
349+
set( flags "SHELL:-Xclang -target-feature -Xclang +avx"
350+
"SHELL:-Xclang -target-feature -Xclang +avx512f")
345351
else()
346352
set ( flags )
347353
endif()

libclc/generic/libspirv/SOURCES

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
atomic/loadstore_helpers_unordered.ll
2+
atomic/loadstore_helpers_release.ll
3+
atomic/loadstore_helpers_acquire.ll
4+
atomic/loadstore_helpers_seq_cst.ll
15
float16.cl
26
subnormal_config.cl
37
subnormal_helper_func.ll
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#if __clang_major__ >= 7
2+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
3+
#else
4+
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
5+
#endif
6+
; This file contains helper functions for the acquire memory ordering constraint.
7+
; Other targets can specialize this file to account for unsupported features in their backend.
8+
9+
declare void @llvm.trap()
10+
11+
define i32 @__clc__atomic_load_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
12+
entry:
13+
tail call void @llvm.trap()
14+
unreachable
15+
}
16+
17+
define i32 @__clc__atomic_load_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
18+
entry:
19+
tail call void @llvm.trap()
20+
unreachable
21+
}
22+
23+
define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
24+
entry:
25+
tail call void @llvm.trap()
26+
unreachable
27+
}
28+
29+
define i64 @__clc__atomic_load_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
30+
entry:
31+
tail call void @llvm.trap()
32+
unreachable
33+
}
34+
35+
define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
36+
entry:
37+
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4
38+
ret i32 %0
39+
}
40+
41+
define i32 @__clc__atomic_uload_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
42+
entry:
43+
tail call void @llvm.trap()
44+
unreachable
45+
}
46+
47+
define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
48+
entry:
49+
tail call void @llvm.trap()
50+
unreachable
51+
}
52+
53+
define i64 @__clc__atomic_uload_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
54+
entry:
55+
tail call void @llvm.trap()
56+
unreachable
57+
}
58+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#if __clang_major__ >= 7
2+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
3+
#else
4+
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
5+
#endif
6+
; This file contains helper functions for the release memory ordering constraint.
7+
; Other targets can specialize this file to account for unsupported features in their backend.
8+
9+
declare void @llvm.trap()
10+
11+
define void @__clc__atomic_store_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
12+
entry:
13+
tail call void @llvm.trap()
14+
unreachable
15+
}
16+
17+
define void @__clc__atomic_store_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
18+
entry:
19+
tail call void @llvm.trap()
20+
unreachable
21+
}
22+
23+
define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
24+
entry:
25+
tail call void @llvm.trap()
26+
unreachable
27+
}
28+
29+
define void @__clc__atomic_store_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
30+
entry:
31+
tail call void @llvm.trap()
32+
unreachable
33+
}
34+
35+
define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
36+
entry:
37+
tail call void @llvm.trap()
38+
unreachable
39+
}
40+
41+
define void @__clc__atomic_ustore_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
42+
entry:
43+
tail call void @llvm.trap()
44+
unreachable
45+
}
46+
47+
define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
48+
entry:
49+
tail call void @llvm.trap()
50+
unreachable
51+
}
52+
53+
define void @__clc__atomic_ustore_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
54+
entry:
55+
tail call void @llvm.trap()
56+
unreachable
57+
}
58+

0 commit comments

Comments
 (0)