-
Notifications
You must be signed in to change notification settings - Fork 12.3k
/
openmp-offload-gpu.c
404 lines (329 loc) · 27.5 KB
/
openmp-offload-gpu.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
///
/// Perform several driver tests for OpenMP offloading
///
/// ###########################################################################
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
// RUN: -Xopenmp-target -march=sm_52 -Xopenmp-target -march=sm_60 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
/// ###########################################################################
/// Check -Xopenmp-target -march=sm_52 works as expected when two triples are present.
// RUN: %clang -### -fopenmp=libomp \
// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
// RUN: -nogpulib -nogpuinc -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_52"
/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
// CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c"
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
// RUN: -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
// RUN: -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
/// ###########################################################################
/// Check that error is not thrown by toolchain when no cuda lib flag is used.
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
// RUN: -nogpulib -nogpuinc -fopenmp-relocatable-target -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FLAG-NOLIBDEVICE %s
// CHK-FLAG-NOLIBDEVICE-NOT: error:{{.*}}sm_60
/// ###########################################################################
/// Check that error is not thrown by toolchain when no cuda lib device is found when using -S.
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
// RUN: %clang -### -S -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NOLIBDEVICE %s
// CHK-NOLIBDEVICE-NOT: error:{{.*}}sm_60
/// ###########################################################################
/// Check that the runtime bitcode library is part of the compile line.
/// Create a bogus bitcode library and specify it with libomptarget-nvptx-bc-path
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB %s
/// Specify the directory containing the bitcode lib, check clang picks the right one
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget \
// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps \
// RUN: %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s
/// Create a bogus bitcode library and find it with LIBRARY_PATH
// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget/subdir %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps \
// RUN: %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s
// CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
// CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx-sm_52.bc
// CHK-ENV-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}subdir{{/|\\\\}}libomptarget-nvptx-sm_52.bc
// CHK-BCLIB-NOT: {{error:|warning:}}
/// ###########################################################################
/// Check that the error is thrown when the libomptarget bitcode library does not exist.
// RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --libomptarget-nvptx-bc-path=not-exist.bc \
// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB-ERROR %s
// CHK-BCLIB-ERROR: bitcode library 'not-exist.bc' does not exist
/// ###########################################################################
/// Check that the error is thrown when CUDA 9.1 or lower version is used.
// RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s
// CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected
/// Check that debug info is emitted in dwarf-2
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
// RUN: | FileCheck -check-prefix=NO_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=NO_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// DEBUG_DIRECTIVES-NOT: warning: debug
// NO_DEBUG-NOT: warning: debug
// NO_DEBUG: "-fopenmp-is-target-device"
// NO_DEBUG-NOT: "-debug-info-kind=
// NO_DEBUG: ptxas
// DEBUG_DIRECTIVES: "-triple" "nvptx64-nvidia-cuda"
// DEBUG_DIRECTIVES-SAME: "-debug-info-kind=line-directives-only"
// DEBUG_DIRECTIVES-SAME: "-fopenmp-is-target-device"
// DEBUG_DIRECTIVES: ptxas
// DEBUG_DIRECTIVES: "-lineinfo"
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// HAS_DEBUG-NOT: warning: debug
// HAS_DEBUG: "-triple" "nvptx64-nvidia-cuda"
// HAS_DEBUG-SAME: "-debug-info-kind={{constructor|line-tables-only}}"
// HAS_DEBUG-SAME: "-dwarf-version=2"
// HAS_DEBUG-SAME: "-fopenmp-is-target-device"
// HAS_DEBUG: ptxas
// HAS_DEBUG-SAME: "-g"
// HAS_DEBUG-SAME: "--dont-merge-basicblocks"
// HAS_DEBUG-SAME: "--return-at-end"
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// CUDA_MODE: "-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
// CUDA_MODE-SAME: "-fopenmp-cuda-mode"
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s
// CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
// CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048"
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --offload-arch=sm_52 --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
// RUN: | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
// OPENMP_NVPTX_WRAPPERS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
// OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
// RUN: --offload-arch=sm_52 -save-temps -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_52 \
// RUN: -nogpulib -nogpuinc -save-temps %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=TRIPLE %s
// TRIPLE: "-triple" "nvptx64-nvidia-cuda"
// TRIPLE: "-target-cpu" "sm_52"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
// RUN: | FileCheck %s
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
// RUN: | FileCheck %s
// verify the tools invocations
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52"
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out"
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-PHASES %s
// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp)
// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp)
// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp)
// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp)
// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp)
// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {5}, ir
// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp)
// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp)
// CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda)" {8}, object
// CHECK-PHASES: 10: clang-offload-packager, {9}, image
// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {10}, ir
// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp)
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]"
// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OBJ:.+]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ]]"], output: "[[BINARY:.+.out]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib -save-temps %s 2>&1 | FileCheck %s --check-prefix=CHECK-TEMP-BINDINGS
// CHECK-TEMP-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ:.+]]"], output: "[[BINARY:.+.out]]"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60,sm_70,sm_80 --no-offload-arch=sm_60,sm_80 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.*]]"
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]"
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[BINARY:.*]]"
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
// RUN: %clang -x ir -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp --offload-arch=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-IR
// CHECK-IR: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT_IR:.+]]"], output: "[[OBJECT:.+]]"
// CHECK-IR: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJECT]]"], output: "a.out"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp --offload-device-only \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR-BC
// CHECK-EMIT-LLVM-IR-BC: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm-bc"
// RUN: %clang -### -fopenmp=libomp --offload-arch=sm_89 \
// RUN: --no-cuda-version-check \
// RUN: -nogpulib %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s
// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBRARY %s
// CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-NO-LTO-LIBRARY %s
// CHECK-NO-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 -nogpulib \
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-NO-LIBRARY %s
// CHECK-NO-LIBRARY-NOT: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 -nogpulib \
// RUN: -Xoffload-linker a -Xoffload-linker-nvptx64-nvidia-cuda b -Xoffload-linker-nvptx64 c \
// RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-XLINKER %s
// CHECK-XLINKER: -device-linker=a{{.*}}-device-linker=nvptx64-nvidia-cuda=b{{.*}}-device-linker=nvptx64-nvidia-cuda=c{{.*}}--
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 -nogpulib \
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-FEATURES %s
// CHECK-LTO-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}}
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 -nogpulib \
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda --cuda-feature=+ptx64 -foffload-lto %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-SET-FEATURES %s
// CHECK-SET-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx64
//
// Check that `-Xarch_host` works for OpenMP offloading.
//
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --offload-arch=sm_52 -nogpulib -nogpuinc -Xarch_host -O3 %s 2>&1 \
// RUN: | FileCheck --check-prefix=XARCH-HOST %s
// XARCH-HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-O3"
// XARCH-HOST-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-O3"
//
// Check that `-Xarch_device` works for OpenMP offloading.
//
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --offload-arch=sm_52 -nogpulib -nogpuinc -Xarch_device -O3 %s 2>&1 \
// RUN: | FileCheck --check-prefix=XARCH-DEVICE %s
// XARCH-DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-O3"
// XARCH-DEVICE-NOT: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-O3"
//
// Check that `-gpulibc` includes the LLVM C libraries for the GPU.
//
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: --offload-arch=sm_52,gfx803 -gpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=LIBC-GPU %s
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -gpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=LIBC-GPU %s
// LIBC-GPU-DAG: "-lcgpu-amdgpu"
// LIBC-GPU-DAG: "-lmgpu-amdgpu"
// LIBC-GPU-DAG: "-lcgpu-nvptx"
// LIBC-GPU-DAG: "-lmgpu-nvptx"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --offload-arch=sm_52 -nogpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=NO-LIBC-GPU %s
// NO-LIBC-GPU-NOT: -lmgpu{{.*}}-lcgpu