diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 0642f36588b6c..ce8c25268c717 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -796,19 +796,11 @@ class __SYCL_EXPORT handler { // Get the kernal name to check condition 3. std::string KName = typeid(NameT *).name(); - using KI = detail::KernelInfo; - bool DisableRounding = - (getenv("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != nullptr) || - (KName.find("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != - std::string::npos) || - (KI::getName() == nullptr || KI::getName()[0] == '\0') || - (KI::callsThisItem()); // Perform range rounding if rounding-up is enabled // and there are sufficient work-items to need rounding // and the user-specified range is not a multiple of a "good" value. - if (!DisableRounding && (NumWorkItems[0] >= MinRangeX) && - (NumWorkItems[0] % MinFactorX != 0)) { + if ((NumWorkItems[0] >= MinRangeX) && (NumWorkItems[0] % MinFactorX != 0)) { // It is sufficient to round up just the first dimension. // Multiplying the rounded-up value of the first dimension // by the values of the remaining dimensions (if any) diff --git a/sycl/test/on-device/range-rounding.cpp b/sycl/test/on-device/range-rounding.cpp new file mode 100644 index 0000000000000..fe8194ce5b738 --- /dev/null +++ b/sycl/test/on-device/range-rounding.cpp @@ -0,0 +1,187 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// REQUIRES: cpu +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER + +#include + +using namespace sycl; + +range<1> Range1 = {0}; +range<2> Range2 = {0, 0}; +range<3> Range3 = {0, 0, 0}; + +void check(const char *msg, size_t v, size_t ref) { + std::cout << msg << v << std::endl; + assert(v == ref); +} + +int try_item1(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<1> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0] = this_item<1>().get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range1.get(0), size); + check("Counter = ", Counter, size); + return 0; +} + +void try_item2(size_t size) { + range<2> Size{size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range2, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<2> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = this_item<2>().get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range2.get(0), size); + check("Counter = ", Counter, size * size); +} + +void try_item3(size_t size) { + range<3> Size{size, size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range3, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<3> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = this_item<3>().get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range3.get(0), size); + check("Counter = ", Counter, size * size * size); +} + +void try_id1(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](id<1> ID) { + AccCounter[0].fetch_add(1); + AccRange[0] = this_id<1>()[0]; + }); + }); + myQueue.wait(); + } + check("Counter = ", Counter, size); +} + +void try_id2(size_t size) { + range<2> Size{size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range2, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](id<2> ID) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = this_id<2>()[0]; + }); + }); + myQueue.wait(); + } + check("Counter = ", Counter, size * size); +} + +void try_id3(size_t size) { + range<3> Size{size, size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range3, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](id<3> ID) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = this_id<3>()[0]; + }); + }); + myQueue.wait(); + } + check("Counter = ", Counter, size * size * size); +} + +int main() { + int x; + + x = 10; + try_item1(x); + try_item2(x); + try_item3(x); + try_id1(x); + try_id2(x); + try_id3(x); + + x = 1025; + try_item1(x); + try_item2(x); + try_item3(x); + try_id1(x); + try_id2(x); + try_id3(x); + + return 0; +} + +// CHECK: Size seen by user = 10 +// CHECK-NEXT: Counter = 10 +// CHECK-NEXT: Size seen by user = 10 +// CHECK-NEXT: Counter = 100 +// CHECK-NEXT: Size seen by user = 10 +// CHECK-NEXT: Counter = 1000 +// CHECK-NEXT: Counter = 10 +// CHECK-NEXT: Counter = 100 +// CHECK-NEXT: Counter = 1000 +// CHECK-NEXT: Size seen by user = 1025 +// CHECK-NEXT: Counter = 1025 +// CHECK-NEXT: Size seen by user = 1025 +// CHECK-NEXT: Counter = 1050625 +// CHECK-NEXT: Size seen by user = 1025 +// CHECK-NEXT: Counter = 1076890625 +// CHECK-NEXT: Counter = 1025 +// CHECK-NEXT: Counter = 1050625 +// CHECK-NEXT: Counter = 1076890625 diff --git a/sycl/test/regression/unnamed-lambda.cpp b/sycl/test/regression/unnamed-lambda.cpp new file mode 100644 index 0000000000000..162bf86189dbb --- /dev/null +++ b/sycl/test/regression/unnamed-lambda.cpp @@ -0,0 +1,10 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-only -c %s -o %t.temp + +#include "CL/sycl.hpp" + +void foo(cl::sycl::queue queue) { + cl::sycl::event queue_event2 = queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for(cl::sycl::range<1>{1}, + [=](cl::sycl::item<1> id) {}); + }); +}