Skip to content

[SYCLomatic] Move SYCL free query function to the default behavior and mark the "free-function-queries" to Deprecated. #2794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 27, 2025
12 changes: 10 additions & 2 deletions clang/include/clang/DPCT/DPCTOptions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,13 @@ DPCT_ENUM_OPTION(
"enqueued_barriers",
int(DPCPPExtensionsDefaultEnabled::ExtDE_EnqueueBarrier),
"Disable the enqueued barriers extension.", false),
DPCT_OPTION_ENUM_VALUE(
"free-function-queries",
int(DPCPPExtensionsDefaultEnabled::ExtDE_FreeQueries),
"Disable the free function query experimental extension that "
"allows getting 'id', 'item', 'nd_item', 'group', and 'sub_group' "
"instances globally.",
false),
DPCT_OPTION_ENUM_VALUE(
"peer_access", int(DPCPPExtensionsDefaultEnabled::ExtDE_PeerAccess),
"Disable the peer access extension.", false),
Expand Down Expand Up @@ -723,9 +730,10 @@ DPCT_ENUM_OPTION(
DPCT_OPTION_VALUES(
DPCT_OPTION_ENUM_VALUE(
"free-function-queries", int(ExperimentalFeatures::Exp_FreeQueries),
"Experimental extension that allows getting 'id', "
"DEPRECATED : Experimental extension that allows getting 'id', "
"'item', 'nd_item', 'group', and\n"
"'sub_group' instances globally.",
"'sub_group' instances globally. Deprecate this option as the "
"feature is turned on by default.",
false),
DPCT_OPTION_ENUM_VALUE(
"local-memory-kernel-scope-allocation",
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/DPCT/AnalysisInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,9 @@ class DpctGlobalInfo {
return getUsingExperimental<ExperimentalFeatures::Exp_RootGroup>();
}
static bool useFreeQueries() {
return getUsingExperimental<ExperimentalFeatures::Exp_FreeQueries>();
return getUsingExperimental<ExperimentalFeatures::Exp_FreeQueries>() ||
getUsingExtensionDE(
DPCPPExtensionsDefaultEnabled::ExtDE_FreeQueries);
}
static bool useGroupLocalMemory() {
return getUsingExperimental<ExperimentalFeatures::Exp_GroupSharedMemory>();
Expand Down
1 change: 1 addition & 0 deletions clang/lib/DPCT/CommandOption/ValidateArguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ enum class DPCPPExtensionsDefaultEnabled : unsigned int {
ExtDE_DeviceInfo,
ExtDE_BFloat16,
ExtDE_PeerAccess,
ExtDE_FreeQueries,
ExtDE_Assert,
ExtDE_QueueEmpty,
ExtDE_DPCPPExtensionsDefaultEnabledEnumSize,
Expand Down
4 changes: 2 additions & 2 deletions clang/test/dpct/a_vcxproj_test/a_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
// UNSUPPORTED: system-linux
// RUN: cat %S/DemoCudaProj.vcxproj > %T/DemoCudaProj.vcxproj
// RUN: cd %T
// RUN: dpct --format-range=none --vcxprojfile=%T/DemoCudaProj.vcxproj -in-root=%S -out-root=%T %s --cuda-include-path="%cuda-path/include"
// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none --vcxprojfile=%T/DemoCudaProj.vcxproj -in-root=%S -out-root=%T %s --cuda-include-path="%cuda-path/include"

// RUN: cat %S/check_compilation_ref.txt >%T/check_compilation_db.txt
// RUN: cat %T/compile_commands.json >> %T/check_compilation_db.txt

// RUN: FileCheck --match-full-lines --input-file %T/check_compilation_db.txt %T/check_compilation_db.txt

// RUN: dpct --format-range=none -p=%S -in-root=%S -out-root=%T/2 --process-all --cuda-include-path="%cuda-path/include"
// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none -p=%S -in-root=%S -out-root=%T/2 --process-all --cuda-include-path="%cuda-path/include"

// RUN: FileCheck --input-file %T/2/a_kernel.dp.cpp --match-full-lines %S/a_kernel.cu
// RUN: %if build_lit %{icpx -c -fsycl %T/2/a_kernel.dp.cpp -o %T/2/a_kernel.dp.o %}
Expand Down
4 changes: 2 additions & 2 deletions clang/test/dpct/anonymous_shared_var_macro.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
// CHECK: float a1d[7]; \
// CHECK: }; \
// CHECK: type_ct2 *atoms = (type_ct2 *)atoms_ct1; \
// CHECK: if (item_ct1.get_local_id(2) < 7) { \
// CHECK: bspline_coeffs.a1d[item_ct1.get_local_id(2)] = 0; \
// CHECK: if (sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2) < 7) { \
// CHECK: bspline_coeffs.a1d[sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2)] = 0; \
// CHECK: };

#define BSPLINE_DEFS \
Expand Down
15 changes: 7 additions & 8 deletions clang/test/dpct/asm/cvta.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2
// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2
// RUN: dpct --format-range=none -out-root %T/cvta %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
// RUN: dpct --format-range=none -out-root %T/cvta %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
// RUN: FileCheck %s --match-full-lines --input-file %T/cvta/cvta.dp.cpp
// RUN: %if build_lit %{icpx -c -fsycl %T/cvta/cvta.dp.cpp -o %T/cvta/cvta.dp.o %}

Expand All @@ -9,13 +9,12 @@
#include <cuda_runtime.h>


// CHECK: void test_cvta_to_shared_u64(uint64_t* output, const sycl::nd_item<3> &item_ct1,
// CHECK-NEXT: int *shared_data) {
// CHECK: void test_cvta_to_shared_u64(uint64_t* output, int *shared_data) {
// CHECK-NEXT: // Shared memory
// CHECK-NEXT: shared_data[0] = 0;
// CHECK-NEXT: uint64_t shared_addr = 0;
// CHECK-NEXT: shared_addr = (uint64_t)(&shared_data[0]);
// CHECK-NEXT: output[item_ct1.get_local_id(2)] = shared_addr;
// CHECK-NEXT: output[sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2)] = shared_addr;
// CHECK-NEXT:}
__global__ void test_cvta_to_shared_u64(uint64_t* output) {
__shared__ int shared_data[1]; // Shared memory
Expand All @@ -27,8 +26,8 @@ __global__ void test_cvta_to_shared_u64(uint64_t* output) {


#define N 128
// CHECK: void testKernel(unsigned int *addr_out, const sycl::nd_item<3> &item_ct1,
// CHECK-NEXT: int *B_shared) {
// CHECK: void testKernel(unsigned int *addr_out, int *B_shared) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-NEXT: // Shared memory
// CHECK-NEXT: unsigned int addr1;
// CHECK-NEXT: int k_0_1 = item_ct1.get_group(2);
Expand All @@ -55,8 +54,8 @@ __global__ void testKernel(unsigned int *addr_out) {
}


// CHECK: void read_shared_value(int *output, const sycl::nd_item<3> &item_ct1,
// CHECK-NEXT: int *shared_data) {
// CHECK: void read_shared_value(int *output, int *shared_data) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-NEXT: // Shared memory allocation
// CHECK-NEXT: if (item_ct1.get_local_id(2) == 0) {
// CHECK-NEXT: shared_data[0] = 42;
Expand Down
59 changes: 24 additions & 35 deletions clang/test/dpct/asm/red.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,107 +8,96 @@
#include <cstdint>
#include <cuda_runtime.h>

// CHECK: void atomicAddKernel(int* lock, int val, const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>());
// CHECK: void atomicAddKernel(int* lock, int val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::plus<>());
// CHECK-NEXT:}
__global__ void atomicAddKernel(int* lock, int val) {
asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>());
// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_or<>());
// CHECK-NEXT:}
__global__ void atomicOrKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.or.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>());
// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_xor<>());
// CHECK-NEXT:}
__global__ void atomicXorKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.xor.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>());
// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_and<>());
// CHECK-NEXT: }
__global__ void atomicAndKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.and.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>());
// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::maximum<>());
// CHECK-NEXT: }
__global__ void atomicMaxKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.max.u32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>());
// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::minimum<>());
// CHECK-NEXT: }
__global__ void atomicMinKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.min.u32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicAddKernelRelease(int* lock, int val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>());
// CHECK: void atomicAddKernelRelease(int* lock, int val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::plus<>());
// CHECK-NEXT:}
__global__ void atomicAddKernelRelease(int* lock, int val) {
asm volatile("red.release.gpu.global.add.s32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicOrKernelRelease(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>());
// CHECK: void atomicOrKernelRelease(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_or<>());
// CHECK-NEXT:}
__global__ void atomicOrKernelRelease(uint32_t* lock, uint32_t val) {
asm volatile("red.release.gpu.global.or.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicXorKernelRelease(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>());
// CHECK: void atomicXorKernelRelease(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_xor<>());
// CHECK-NEXT:}
__global__ void atomicXorKernelRelease(uint32_t* lock, uint32_t val) {
asm volatile("red.release.gpu.global.xor.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicAndKernelRelease(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>());
// CHECK: void atomicAndKernelRelease(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_and<>());
// CHECK-NEXT: }
__global__ void atomicAndKernelRelease(uint32_t* lock, uint32_t val) {
asm volatile("red.release.gpu.global.and.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicMaxKernelRelease(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>());
// CHECK: void atomicMaxKernelRelease(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::maximum<>());
// CHECK-NEXT: }
__global__ void atomicMaxKernelRelease(uint32_t* lock, uint32_t val) {
asm volatile("red.release.gpu.global.max.u32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicMinKernelRelease(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>());
// CHECK: void atomicMinKernelRelease(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::minimum<>());
// CHECK-NEXT: }
__global__ void atomicMinKernelRelease(uint32_t* lock, uint32_t val) {
asm volatile("red.release.gpu.global.min.u32 [%0], %1;\n"
Expand Down
25 changes: 12 additions & 13 deletions clang/test/dpct/atomic_functions.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

template <typename T>
__global__ void test(T *data) {
// CHECK: T tid = item_ct1.get_local_id(2);
// CHECK: T tid = sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2);
T tid = threadIdx.x;

// CHECK: dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&data[0], tid);
Expand Down Expand Up @@ -108,7 +108,7 @@ void InvokeKernel() {
// CHECK-NEXT: cgh.parallel_for<dpct_kernel_name<class test_{{[a-f0-9]+}}, T>>(
// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, k_threads_per_block), sycl::range<3>(1, 1, k_threads_per_block)),
// CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) {
// CHECK-NEXT: test<T>(dev_ptr_acc_ct0.get_raw_pointer(), item_ct1);
// CHECK-NEXT: test<T>(dev_ptr_acc_ct0.get_raw_pointer());
// CHECK-NEXT: });
// CHECK-NEXT: });
test<T><<<1, k_threads_per_block>>>(dev_ptr);
Expand Down Expand Up @@ -139,8 +139,8 @@ int main() {
InvokeKernel<double>();
}

// CHECK: void foo(const sycl::nd_item<3> &item_ct1, uint8_t *dpct_local,
// CHECK-NEXT: uint32_t &share_v) {
// CHECK: void foo(uint8_t *dpct_local, uint32_t &share_v) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-NEXT: auto share_array = (uint32_t *)dpct_local;
// CHECK-NEXT: for (int b = item_ct1.get_local_id(2); b < 64; b += item_ct1.get_local_range(2)) {
// CHECK-NEXT: dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&share_array[b], 1);
Expand All @@ -159,8 +159,8 @@ __shared__ uint32_t share_v;
atomicAdd(&share_v, 1);
}

// CHECK: void foo_2(const sycl::nd_item<3> &item_ct1, uint8_t *dpct_local,
// CHECK-NEXT: uint32_t &share_v) {
// CHECK: void foo_2(uint8_t *dpct_local, uint32_t &share_v) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-NEXT: auto share_array = (uint32_t *)dpct_local;
// CHECK-NEXT: for (int b = item_ct1.get_local_id(2); b < 64; b += item_ct1.get_local_range(2)) {
// CHECK-NEXT: uint32_t *p_1 = &share_array[b];
Expand Down Expand Up @@ -454,8 +454,8 @@ __global__ void k() {
atomicAdd(&f, f);
}

// CHECK: void mykernel(unsigned int *dev, const sycl::nd_item<3> &item_ct1,
// CHECK-NEXT: uint8_t *dpct_local) {
// CHECK: void mykernel(unsigned int *dev, uint8_t *dpct_local) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-NEXT: auto sm = (unsigned int *)dpct_local;
// CHECK-NEXT: unsigned int* as= (unsigned int*)sm;
// CHECK-NEXT: const int kc=item_ct1.get_local_id(2);
Expand All @@ -474,9 +474,8 @@ __global__ void mykernel(unsigned int *dev) {

// TODO: Further refine the analysis of barrier to support this case.
// CHECK: void mykernel_1(unsigned char *buffer, long size,
// CHECK-NEXT: unsigned int *histo,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1,
// CHECK-NEXT: unsigned int *temp) {
// CHECK-NEXT: unsigned int *histo, unsigned int *temp) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-EMPTY:
// CHECK-NEXT: temp[item_ct1.get_local_id(2)] = 0;
// CHECK-NEXT: /*
Expand Down Expand Up @@ -552,8 +551,8 @@ __device__ void __gpu_sync(int blocks_to_synch) {
while(g_mutex < blocks_to_synch);
}

//CHECK:void atomicInc_foo(const sycl::nd_item<3> &item_ct1, uint8_t *dpct_local,
//CHECK-NEXT: unsigned int &share_v) {
//CHECK:void atomicInc_foo(uint8_t *dpct_local, unsigned int &share_v) {
//CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
//CHECK-NEXT: auto share_array = (unsigned int *)dpct_local;
//CHECK-NEXT: for (int b = item_ct1.get_local_id(2); b < 64; b += item_ct1.get_local_range(2)) {
//CHECK-NEXT: dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(&share_array[b], 1);
Expand Down
3 changes: 2 additions & 1 deletion clang/test/dpct/atomic_functions_system_wide.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
#include <iostream>
#include <memory>

// CHECK:void atomic_kernel(int *atomic_array, const sycl::nd_item<3> &item_ct1) {
// CHECK:void atomic_kernel(int *atomic_array) {
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
// CHECK-NEXT: unsigned int tid = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2);
// CHECK-NEXT: dpct::atomic_fetch_add<sycl::access::address_space::generic_space, sycl::memory_order::relaxed, sycl::memory_scope::system>(&atomic_array[0], 10);
// CHECK-NEXT: dpct::atomic_exchange<sycl::access::address_space::generic_space, sycl::memory_order::relaxed, sycl::memory_scope::system>(&atomic_array[1], tid);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/dpct/b_vcxproj_test/b_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// UNSUPPORTED: system-linux
// RUN: cat %S/SVMbenchmark.vcxproj > %T/SVMbenchmark.vcxproj
// RUN: dpct -output-file=b_kernel_outputfile_win.txt --format-range=none --vcxprojfile=%T/SVMbenchmark.vcxproj -in-root=%S -out-root=%T %s -extra-arg="-I %S" --cuda-include-path="%cuda-path/include"
// RUN: dpct --no-dpcpp-extensions=free-function-queries -output-file=b_kernel_outputfile_win.txt --format-range=none --vcxprojfile=%T/SVMbenchmark.vcxproj -in-root=%S -out-root=%T %s -extra-arg="-I %S" --cuda-include-path="%cuda-path/include"

// RUN: cat %S/check_compilation_ref.txt >%T/check_compilation_db.txt
// RUN: cat %T/compile_commands.json >>%T/check_compilation_db.txt
Expand All @@ -10,7 +10,7 @@
// RUN: cat %T/b_kernel_outputfile_win.txt >>%T/check_b_kernel_outputfile_windows.txt
// RUN: FileCheck --match-full-lines --input-file %T/check_b_kernel_outputfile_windows.txt %T/check_b_kernel_outputfile_windows.txt

// RUN: dpct --format-range=none -output-file=output-file.txt -in-root=%S -out-root=%T/2 %s --process-all --cuda-include-path="%cuda-path/include"
// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none -output-file=output-file.txt -in-root=%S -out-root=%T/2 %s --process-all --cuda-include-path="%cuda-path/include"
// RUN: cat %S/readme_2_ref.txt > %T/2/readme_2.txt
// RUN: cat %S/readme_2.txt > %T/2/check_output-file.txt
// RUN: cat %T/2/output-file.txt >>%T/2/check_output-file.txt
Expand Down
Loading
Loading