intel
diff --git a/‎sycl/test/inline-asm/asm_16_empty.cpp
Lines changed: 20 additions & 53 deletions b/‎sycl/test/inline-asm/asm_16_empty.cpp
Lines changed: 20 additions & 53 deletions
diff --git a/‎sycl/test/inline-asm/asm_16_matrix_mult.cpp
Lines changed: 23 additions & 52 deletions b/‎sycl/test/inline-asm/asm_16_matrix_mult.cpp
Lines changed: 23 additions & 52 deletions
diff --git a/‎sycl/test/inline-asm/asm_16_no_input_int.cpp
Lines changed: 23 additions & 52 deletions b/‎sycl/test/inline-asm/asm_16_no_input_int.cpp
Lines changed: 23 additions & 52 deletions
diff --git a/‎sycl/test/inline-asm/asm_16_no_opts.cpp
Lines changed: 23 additions & 53 deletions b/‎sycl/test/inline-asm/asm_16_no_opts.cpp
Lines changed: 23 additions & 53 deletions
@@ -2,72 +2,39 @@
 // REQUIRES: gpu,linux
 // RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
 // RUN: %t.out
+// RUN: %clangxx -fsycl %s -o %t.ref.out
+// RUN: %t.ref.out
 
-#include "include/asmcheck.h"
+#include "include/asmhelper.h"
 #include <CL/sycl.hpp>
 #include <iostream>
-#include <string>
 #include <vector>
 
-constexpr int LIST_SIZE = 1024;
-using arr_t = std::vector<cl::sycl::cl_int>;
-constexpr auto sycl_write = cl::sycl::access::mode::write;
+using dataType = cl::sycl::cl_int;
 
-// class is used for kernel name
-template <typename T>
-class no_opts;
+template <typename T = dataType>
+struct KernelFunctor : WithOutputBuffer<T> {
+  KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
 
-template <typename T>
-void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
-  cl::sycl::range<1> numOfItems{sz};
-  cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
-
-  deviceQueue.submit([&](cl::sycl::handler &cgh) {
-    auto C = bufferC.template get_access<sycl_write>(cgh);
-
-    auto kern = [C](cl::sycl::id<1> wiID)
-        [[cl::intel_reqd_sub_group_size(16)]] {
-      C[wiID] = 43;
+  void operator()(cl::sycl::handler &cgh) {
+    auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
+    cgh.parallel_for<KernelFunctor<T>>(
+        cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
+          C[wiID] = 43;
 #if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
-      asm volatile("");
+          asm volatile("");
 #endif
-    };
-    cgh.parallel_for<class no_opts<T>>(numOfItems, kern);
-  });
+        });
+  }
 };
 
 int main() {
-  arr_t C(LIST_SIZE);
-
-  cl::sycl::gpu_selector gpsel;
-  cl::sycl::queue deviceQueue(gpsel);
-
-  sycl::device Device = deviceQueue.get_device();
-
-  if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
-    std::cout << "Skipping test\n";
+  KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
+  if (!launchInlineASMTest(f))
     return 0;
-  }
 
-  for (int i = 0; i < LIST_SIZE; i++) {
-    C[i] = 0;
-  }
-
-  process_buffers(deviceQueue, C.data(), LIST_SIZE);
-
-  bool all_right = true;
-
-  for (int i = 0; i < LIST_SIZE; ++i)
-    if (C[i] != 43) {
-      std::cerr << "At index: " << i << ". ";
-      std::cerr << C[i] << " != " << 43 << "\n";
-      all_right = false;
-      break;
-    }
-  if (all_right) {
-    std::cout << "Pass" << std::endl;
+  if (verify_all_the_same(f.getOutputBufferData(), 43))
     return 0;
-  }
-  std::cout << "Error" << std::endl;
-  return -1;
+
+  return 1;
 }
@@ -2,72 +2,43 @@
 // REQUIRES: gpu,linux
 // RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
 // RUN: %t.out
+// RUN: %clangxx -fsycl %s -o %t.ref.out
+// RUN: %t.ref.out
 
-#include "include/asmcheck.h"
+#include "include/asmhelper.h"
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <vector>
 
-constexpr int LIST_SIZE = 8;
-using arr_t = std::vector<cl::sycl::cl_int>;
-constexpr auto sycl_write = cl::sycl::access::mode::write;
+using dataType = cl::sycl::cl_int;
 
-// class is used for kernel name
-template <typename T>
-class simple_vector_add;
+template <typename T = dataType>
+struct KernelFunctor : WithOutputBuffer<T> {
+  KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
 
-template <typename T>
-void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
-  cl::sycl::range<1> numOfItems{sz};
-  cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
-
-  deviceQueue.submit([&](cl::sycl::handler &cgh) {
-    auto C = bufferC.template get_access<sycl_write>(cgh);
-
-    auto kern = [C](cl::sycl::id<1> wiID)
-        [[cl::intel_reqd_sub_group_size(16)]] {
-      volatile int output = 0;
+  void operator()(cl::sycl::handler &cgh) {
+    auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
+    cgh.parallel_for<KernelFunctor<T>>(
+        cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
+          volatile int output = 0;
 #if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
-      asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
-                   : "=rw"(output));
+          asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
+                       : "=rw"(output));
 #else
-      output = 7;
+          output = 7;
 #endif
-      C[wiID] = output;
-    };
-    cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
-  });
+          C[wiID] = output;
+        });
+  }
 };
 
 int main() {
-  arr_t C(LIST_SIZE);
-
-  cl::sycl::gpu_selector gpsel;
-  cl::sycl::queue deviceQueue(gpsel);
-  sycl::device Device = deviceQueue.get_device();
-
-  if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
-    std::cout << "Skipping test\n";
+  KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
+  if (!launchInlineASMTest(f))
     return 0;
-  }
-  for (int i = 0; i < LIST_SIZE; i++) {
-    C[i] = 0;
-  }
-
-  process_buffers(deviceQueue, C.data(), LIST_SIZE);
 
-  bool all_right = true;
-  for (int i = 0; i < LIST_SIZE; ++i)
-    if (C[i] != 7) {
-      std::cerr << "At index: " << i << ". ";
-      std::cerr << C[i] << " != " << 7 << "\n";
-      all_right = false;
-      break;
-    }
-  if (all_right) {
-    std::cout << "Pass" << std::endl;
+  if (verify_all_the_same(f.getOutputBufferData(), 7))
     return 0;
-  }
-  std::cout << "Error" << std::endl;
-  return -1;
+
+  return 1;
 }
@@ -2,72 +2,43 @@
 // REQUIRES: gpu,linux
 // RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
 // RUN: %t.out
+// RUN: %clangxx -fsycl %s -o %t.ref.out
+// RUN: %t.ref.out
 
-#include "include/asmcheck.h"
+#include "include/asmhelper.h"
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <vector>
 
-constexpr int LIST_SIZE = 8;
-using arr_t = std::vector<cl::sycl::cl_int>;
-constexpr auto sycl_write = cl::sycl::access::mode::write;
+using dataType = cl::sycl::cl_int;
 
-// class is used for kernel name
-template <typename T>
-class simple_vector_add;
+template <typename T = dataType>
+struct KernelFunctor : WithOutputBuffer<T> {
+  KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
 
-template <typename T>
-void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
-  cl::sycl::range<1> numOfItems{sz};
-  cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
-
-  deviceQueue.submit([&](cl::sycl::handler &cgh) {
-    auto C = bufferC.template get_access<sycl_write>(cgh);
-
-    auto kern = [C](cl::sycl::id<1> wiID)
-        [[cl::intel_reqd_sub_group_size(16)]] {
-      volatile int output = 0;
+  void operator()(cl::sycl::handler &cgh) {
+    auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
+    cgh.parallel_for<KernelFunctor<T>>(
+        cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
+          volatile int output = 0;
 #if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
-      asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
-                   : "=rw"(output));
+          asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
+                       : "=rw"(output));
 #else
-      output = 7;
+          output = 7;
 #endif
-      C[wiID] = output;
-    };
-    cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
-  });
+          C[wiID] = output;
+        });
+  }
 };
 
 int main() {
-  arr_t C(LIST_SIZE);
-
-  cl::sycl::gpu_selector gpsel;
-  cl::sycl::queue deviceQueue(gpsel);
-  sycl::device Device = deviceQueue.get_device();
-
-  if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
-    std::cout << "Skipping test\n";
+  KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
+  if (!launchInlineASMTest(f))
     return 0;
-  }
-  for (int i = 0; i < LIST_SIZE; i++) {
-    C[i] = 0;
-  }
-
-  process_buffers(deviceQueue, C.data(), LIST_SIZE);
 
-  bool all_right = true;
-  for (int i = 0; i < LIST_SIZE; ++i)
-    if (C[i] != 7) {
-      std::cerr << "At index: " << i << ". ";
-      std::cerr << C[i] << " != " << 7 << "\n";
-      all_right = false;
-      break;
-    }
-  if (all_right) {
-    std::cout << "Pass" << std::endl;
+  if (verify_all_the_same(f.getOutputBufferData(), 7))
     return 0;
-  }
-  std::cout << "Error" << std::endl;
-  return -1;
+
+  return 1;
 }
@@ -2,74 +2,44 @@
 // REQUIRES: gpu,linux
 // RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
 // RUN: %t.out
+// RUN: %clangxx -fsycl %s -o %t.ref.out
+// RUN: %t.ref.out
 
-#include "include/asmcheck.h"
+#include "include/asmhelper.h"
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <vector>
 
-constexpr int LIST_SIZE = 1024;
-using arr_t = std::vector<cl::sycl::cl_int>;
-constexpr auto sycl_write = cl::sycl::access::mode::write;
+using dataType = cl::sycl::cl_int;
 
-// class is used for kernel name
-template <typename T>
-class simple_vector_add;
+template <typename T = dataType>
+struct KernelFunctor : WithOutputBuffer<T> {
+  KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
 
-template <typename T>
-void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
-  cl::sycl::range<1> numOfItems{sz};
-  cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
-
-  deviceQueue.submit([&](cl::sycl::handler &cgh) {
-    auto C = bufferC.template get_access<sycl_write>(cgh);
-
-    auto kern = [C](cl::sycl::id<1> wiID)
-        [[cl::intel_reqd_sub_group_size(16)]] {
-      for (int i = 0; i < 10; ++i) {
+  void operator()(cl::sycl::handler &cgh) {
+    auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
+    cgh.parallel_for<KernelFunctor<T>>(
+        cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
+          for (int i = 0; i < 10; ++i) {
 #if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
-        asm("fence_sw");
-        C[wiID] += i;
+            asm("fence_sw");
+            C[wiID] += i;
 
 #else
-        C[wiID] += i;
+            C[wiID] += i;
 #endif
-      }
-    };
-    cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
-  });
+          }
+        });
+  }
 };
 
 int main() {
-  arr_t C(LIST_SIZE);
-
-  cl::sycl::gpu_selector gpsel;
-  cl::sycl::queue deviceQueue(gpsel);
-  sycl::device Device = deviceQueue.get_device();
-
-  if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
-    std::cout << "Skipping test\n";
+  KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
+  if (!launchInlineASMTest(f))
     return 0;
-  }
-  for (int i = 0; i < LIST_SIZE; i++) {
-    C[i] = 0;
-  }
-
-  process_buffers(deviceQueue, C.data(), LIST_SIZE);
-
-  bool all_right = true;
 
-  for (int i = 0; i < LIST_SIZE; ++i)
-    if (C[i] != 45) {
-      std::cerr << "At index: " << i << ". ";
-      std::cerr << C[i] << " != " << 45 << "\n";
-      all_right = false;
-      break;
-    }
-  if (all_right) {
-    std::cout << "Pass" << std::endl;
+  if (verify_all_the_same(f.getOutputBufferData(), 45))
     return 0;
-  }
-  std::cout << "Error" << std::endl;
-  return -1;
+
+  return 1;
 }