Skip to content

Commit aa01a1d

Browse files
committed
[SYCL] Refactor inline asm tests to reduce code duplication
Signed-off-by: Alexey Sachkov <alexey.sachkov@intel.com>
1 parent 7179d3a commit aa01a1d

23 files changed

+729
-1127
lines changed

sycl/test/inline-asm/asm_16_empty.cpp

Lines changed: 20 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,72 +2,39 @@
22
// REQUIRES: gpu,linux
33
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
44
// RUN: %t.out
5+
// RUN: %clangxx -fsycl %s -o %t.ref.out
6+
// RUN: %t.ref.out
57

6-
#include "include/asmcheck.h"
8+
#include "include/asmhelper.h"
79
#include <CL/sycl.hpp>
810
#include <iostream>
9-
#include <string>
1011
#include <vector>
1112

12-
constexpr int LIST_SIZE = 1024;
13-
using arr_t = std::vector<cl::sycl::cl_int>;
14-
constexpr auto sycl_write = cl::sycl::access::mode::write;
13+
using dataType = cl::sycl::cl_int;
1514

16-
// class is used for kernel name
17-
template <typename T>
18-
class no_opts;
15+
template <typename T = dataType>
16+
struct KernelFunctor : WithOutputBuffer<T> {
17+
KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
1918

20-
template <typename T>
21-
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
22-
cl::sycl::range<1> numOfItems{sz};
23-
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
24-
25-
deviceQueue.submit([&](cl::sycl::handler &cgh) {
26-
auto C = bufferC.template get_access<sycl_write>(cgh);
27-
28-
auto kern = [C](cl::sycl::id<1> wiID)
29-
[[cl::intel_reqd_sub_group_size(16)]] {
30-
C[wiID] = 43;
19+
void operator()(cl::sycl::handler &cgh) {
20+
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
21+
cgh.parallel_for<KernelFunctor<T>>(
22+
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
23+
C[wiID] = 43;
3124
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
32-
asm volatile("");
25+
asm volatile("");
3326
#endif
34-
};
35-
cgh.parallel_for<class no_opts<T>>(numOfItems, kern);
36-
});
27+
});
28+
}
3729
};
3830

3931
int main() {
40-
arr_t C(LIST_SIZE);
41-
42-
cl::sycl::gpu_selector gpsel;
43-
cl::sycl::queue deviceQueue(gpsel);
44-
45-
sycl::device Device = deviceQueue.get_device();
46-
47-
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
48-
std::cout << "Skipping test\n";
32+
KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
33+
if (!launchInlineASMTest(f))
4934
return 0;
50-
}
5135

52-
for (int i = 0; i < LIST_SIZE; i++) {
53-
C[i] = 0;
54-
}
55-
56-
process_buffers(deviceQueue, C.data(), LIST_SIZE);
57-
58-
bool all_right = true;
59-
60-
for (int i = 0; i < LIST_SIZE; ++i)
61-
if (C[i] != 43) {
62-
std::cerr << "At index: " << i << ". ";
63-
std::cerr << C[i] << " != " << 43 << "\n";
64-
all_right = false;
65-
break;
66-
}
67-
if (all_right) {
68-
std::cout << "Pass" << std::endl;
36+
if (verify_all_the_same(f.getOutputBufferData(), 43))
6937
return 0;
70-
}
71-
std::cout << "Error" << std::endl;
72-
return -1;
38+
39+
return 1;
7340
}

sycl/test/inline-asm/asm_16_matrix_mult.cpp

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,72 +2,43 @@
22
// REQUIRES: gpu,linux
33
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
44
// RUN: %t.out
5+
// RUN: %clangxx -fsycl %s -o %t.ref.out
6+
// RUN: %t.ref.out
57

6-
#include "include/asmcheck.h"
8+
#include "include/asmhelper.h"
79
#include <CL/sycl.hpp>
810
#include <iostream>
911
#include <vector>
1012

11-
constexpr int LIST_SIZE = 8;
12-
using arr_t = std::vector<cl::sycl::cl_int>;
13-
constexpr auto sycl_write = cl::sycl::access::mode::write;
13+
using dataType = cl::sycl::cl_int;
1414

15-
// class is used for kernel name
16-
template <typename T>
17-
class simple_vector_add;
15+
template <typename T = dataType>
16+
struct KernelFunctor : WithOutputBuffer<T> {
17+
KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
1818

19-
template <typename T>
20-
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21-
cl::sycl::range<1> numOfItems{sz};
22-
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23-
24-
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25-
auto C = bufferC.template get_access<sycl_write>(cgh);
26-
27-
auto kern = [C](cl::sycl::id<1> wiID)
28-
[[cl::intel_reqd_sub_group_size(16)]] {
29-
volatile int output = 0;
19+
void operator()(cl::sycl::handler &cgh) {
20+
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
21+
cgh.parallel_for<KernelFunctor<T>>(
22+
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
23+
volatile int output = 0;
3024
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31-
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
32-
: "=rw"(output));
25+
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
26+
: "=rw"(output));
3327
#else
34-
output = 7;
28+
output = 7;
3529
#endif
36-
C[wiID] = output;
37-
};
38-
cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
39-
});
30+
C[wiID] = output;
31+
});
32+
}
4033
};
4134

4235
int main() {
43-
arr_t C(LIST_SIZE);
44-
45-
cl::sycl::gpu_selector gpsel;
46-
cl::sycl::queue deviceQueue(gpsel);
47-
sycl::device Device = deviceQueue.get_device();
48-
49-
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
50-
std::cout << "Skipping test\n";
36+
KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
37+
if (!launchInlineASMTest(f))
5138
return 0;
52-
}
53-
for (int i = 0; i < LIST_SIZE; i++) {
54-
C[i] = 0;
55-
}
56-
57-
process_buffers(deviceQueue, C.data(), LIST_SIZE);
5839

59-
bool all_right = true;
60-
for (int i = 0; i < LIST_SIZE; ++i)
61-
if (C[i] != 7) {
62-
std::cerr << "At index: " << i << ". ";
63-
std::cerr << C[i] << " != " << 7 << "\n";
64-
all_right = false;
65-
break;
66-
}
67-
if (all_right) {
68-
std::cout << "Pass" << std::endl;
40+
if (verify_all_the_same(f.getOutputBufferData(), 7))
6941
return 0;
70-
}
71-
std::cout << "Error" << std::endl;
72-
return -1;
42+
43+
return 1;
7344
}

sycl/test/inline-asm/asm_16_no_input_int.cpp

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,72 +2,43 @@
22
// REQUIRES: gpu,linux
33
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
44
// RUN: %t.out
5+
// RUN: %clangxx -fsycl %s -o %t.ref.out
6+
// RUN: %t.ref.out
57

6-
#include "include/asmcheck.h"
8+
#include "include/asmhelper.h"
79
#include <CL/sycl.hpp>
810
#include <iostream>
911
#include <vector>
1012

11-
constexpr int LIST_SIZE = 8;
12-
using arr_t = std::vector<cl::sycl::cl_int>;
13-
constexpr auto sycl_write = cl::sycl::access::mode::write;
13+
using dataType = cl::sycl::cl_int;
1414

15-
// class is used for kernel name
16-
template <typename T>
17-
class simple_vector_add;
15+
template <typename T = dataType>
16+
struct KernelFunctor : WithOutputBuffer<T> {
17+
KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
1818

19-
template <typename T>
20-
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21-
cl::sycl::range<1> numOfItems{sz};
22-
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23-
24-
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25-
auto C = bufferC.template get_access<sycl_write>(cgh);
26-
27-
auto kern = [C](cl::sycl::id<1> wiID)
28-
[[cl::intel_reqd_sub_group_size(16)]] {
29-
volatile int output = 0;
19+
void operator()(cl::sycl::handler &cgh) {
20+
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
21+
cgh.parallel_for<KernelFunctor<T>>(
22+
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
23+
volatile int output = 0;
3024
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31-
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
32-
: "=rw"(output));
25+
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
26+
: "=rw"(output));
3327
#else
34-
output = 7;
28+
output = 7;
3529
#endif
36-
C[wiID] = output;
37-
};
38-
cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
39-
});
30+
C[wiID] = output;
31+
});
32+
}
4033
};
4134

4235
int main() {
43-
arr_t C(LIST_SIZE);
44-
45-
cl::sycl::gpu_selector gpsel;
46-
cl::sycl::queue deviceQueue(gpsel);
47-
sycl::device Device = deviceQueue.get_device();
48-
49-
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
50-
std::cout << "Skipping test\n";
36+
KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
37+
if (!launchInlineASMTest(f))
5138
return 0;
52-
}
53-
for (int i = 0; i < LIST_SIZE; i++) {
54-
C[i] = 0;
55-
}
56-
57-
process_buffers(deviceQueue, C.data(), LIST_SIZE);
5839

59-
bool all_right = true;
60-
for (int i = 0; i < LIST_SIZE; ++i)
61-
if (C[i] != 7) {
62-
std::cerr << "At index: " << i << ". ";
63-
std::cerr << C[i] << " != " << 7 << "\n";
64-
all_right = false;
65-
break;
66-
}
67-
if (all_right) {
68-
std::cout << "Pass" << std::endl;
40+
if (verify_all_the_same(f.getOutputBufferData(), 7))
6941
return 0;
70-
}
71-
std::cout << "Error" << std::endl;
72-
return -1;
42+
43+
return 1;
7344
}

sycl/test/inline-asm/asm_16_no_opts.cpp

Lines changed: 23 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,74 +2,44 @@
22
// REQUIRES: gpu,linux
33
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
44
// RUN: %t.out
5+
// RUN: %clangxx -fsycl %s -o %t.ref.out
6+
// RUN: %t.ref.out
57

6-
#include "include/asmcheck.h"
8+
#include "include/asmhelper.h"
79
#include <CL/sycl.hpp>
810
#include <iostream>
911
#include <vector>
1012

11-
constexpr int LIST_SIZE = 1024;
12-
using arr_t = std::vector<cl::sycl::cl_int>;
13-
constexpr auto sycl_write = cl::sycl::access::mode::write;
13+
using dataType = cl::sycl::cl_int;
1414

15-
// class is used for kernel name
16-
template <typename T>
17-
class simple_vector_add;
15+
template <typename T = dataType>
16+
struct KernelFunctor : WithOutputBuffer<T> {
17+
KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
1818

19-
template <typename T>
20-
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21-
cl::sycl::range<1> numOfItems{sz};
22-
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23-
24-
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25-
auto C = bufferC.template get_access<sycl_write>(cgh);
26-
27-
auto kern = [C](cl::sycl::id<1> wiID)
28-
[[cl::intel_reqd_sub_group_size(16)]] {
29-
for (int i = 0; i < 10; ++i) {
19+
void operator()(cl::sycl::handler &cgh) {
20+
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
21+
cgh.parallel_for<KernelFunctor<T>>(
22+
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
23+
for (int i = 0; i < 10; ++i) {
3024
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31-
asm("fence_sw");
32-
C[wiID] += i;
25+
asm("fence_sw");
26+
C[wiID] += i;
3327

3428
#else
35-
C[wiID] += i;
29+
C[wiID] += i;
3630
#endif
37-
}
38-
};
39-
cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
40-
});
31+
}
32+
});
33+
}
4134
};
4235

4336
int main() {
44-
arr_t C(LIST_SIZE);
45-
46-
cl::sycl::gpu_selector gpsel;
47-
cl::sycl::queue deviceQueue(gpsel);
48-
sycl::device Device = deviceQueue.get_device();
49-
50-
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
51-
std::cout << "Skipping test\n";
37+
KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
38+
if (!launchInlineASMTest(f))
5239
return 0;
53-
}
54-
for (int i = 0; i < LIST_SIZE; i++) {
55-
C[i] = 0;
56-
}
57-
58-
process_buffers(deviceQueue, C.data(), LIST_SIZE);
59-
60-
bool all_right = true;
6140

62-
for (int i = 0; i < LIST_SIZE; ++i)
63-
if (C[i] != 45) {
64-
std::cerr << "At index: " << i << ". ";
65-
std::cerr << C[i] << " != " << 45 << "\n";
66-
all_right = false;
67-
break;
68-
}
69-
if (all_right) {
70-
std::cout << "Pass" << std::endl;
41+
if (verify_all_the_same(f.getOutputBufferData(), 45))
7142
return 0;
72-
}
73-
std::cout << "Error" << std::endl;
74-
return -1;
43+
44+
return 1;
7545
}

0 commit comments

Comments
 (0)