diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b13492fb85152..891f82f962641 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10700,6 +10700,12 @@ def err_sycl_non_trivially_copy_ctor_dtor_type "constructible|destructible}0 class/struct type %1">; def err_sycl_non_std_layout_type : Error< "kernel parameter has non-standard layout class/struct type %0">; +def err_sycl_illegal_memory_reference : Error< + "Illegal memory reference in SYCL device kernel. Use USM (malloc_shared, etc) instead.">; +def note_unknown_memory_reference : Note< + "Unknown memory reference in SYCL device kernel. Be sure memory was allocated with USM (malloc_shared, etc).">; +def note_sycl_capture_declared_here : Note< + "Declared here.">; def err_conflicting_sycl_kernel_attributes : Error< "conflicting attributes applied to a SYCL kernel">; def err_conflicting_sycl_function_attributes : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e5ef7433d7a50..b8f241b785186 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12456,6 +12456,7 @@ class Sema final { bool isKnownGoodSYCLDecl(const Decl *D); void checkSYCLDeviceVarDecl(VarDecl *Var); + void checkSYCLDevicePointerCapture(VarDecl *Var, SourceLocation CaptureLoc); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void MarkDevice(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bd7db4a9aca18..7bcf0911fffac 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16320,6 +16320,10 @@ static bool isVariableCapturable(CapturingScopeInfo *CSI, VarDecl *Var, S.Diag(Loc, diag::err_opencl_block_ref_block); return false; } + // SYCL: Emit diagnostics for any illegal pointer derefs. + if (Diagnose && S.getLangOpts().SYCLIsDevice && + Var->getType()->isAnyPointerType()) + S.checkSYCLDevicePointerCapture(Var, Loc); return true; } diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 72a1551edd9d9..a979f40258838 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -283,6 +283,69 @@ void Sema::checkSYCLDeviceVarDecl(VarDecl *Var) { checkSYCLVarType(*this, Ty, Loc, Visited); } +void Sema::checkSYCLDevicePointerCapture(VarDecl *Var, + SourceLocation CaptureLoc) { + // Any pointer captured into the SYCL kernel lambda will fail when + // dereferenced...except USM. If it weren't for USM we could just emit a + // deferred diagnostic for every pointer capture. Instead, we attempt to + // identify which pointers are USM, and which are definitely not. For those + // that are definitely not, we emit an error. For those that are unknown, we + // emit a gentle note suggesting the user ensure they are using USM. For USM + // pointers, we do nothing. + assert(getLangOpts().SYCLIsDevice && + "Should only be called during SYCL compilation"); + assert(Var->getType()->isAnyPointerType() && + "Should only be called for pointer types being captured."); + + enum ExprAllocation { Unknown, USM, Not_USM }; + ExprAllocation howAllocated = Unknown; + + SourceLocation DecLoc = SourceLocation(); + + // Try to determine provenance of this Var. + const Expr *Init = Var->getAnyInitializer(); + if (Init) { + Init = Init->IgnoreCasts(); + DecLoc = Init->getExprLoc(); + + const CallExpr *CE = dyn_cast(Init); + if (CE) { + // Captured pointer is result of function call. + const FunctionDecl *func = CE->getDirectCallee(); + auto FullName = func->getQualifiedNameAsString(); + // Check to see if this function call is one of the USM allocators. + if ((FullName.rfind("cl::sycl::malloc", 0) == 0) || + (FullName.rfind("cl::sycl::aligned_alloc", 0) == 0)) + howAllocated = USM; + else if ((FullName.compare("malloc") == 0) || + (FullName.compare("calloc") == 0)) + howAllocated = Not_USM; + + } else { + // Var has initialization, but not as return result of a function, + // disqualify any other obvious bad initialization expressions. + const StringLiteral *SL = dyn_cast(Init); + if (SL) + howAllocated = Not_USM; + + if (Init->isRValue()) // pr-value + howAllocated = Not_USM; + } + } + // else: Var does not have local initialization, might be parameter, or + // initialized via ref, etc. + // Nothing more we can determine at this time. + + // diagnostics + if (howAllocated == Not_USM) + SYCLDiagIfDeviceCode(CaptureLoc, diag::err_sycl_illegal_memory_reference); + else if (howAllocated == Unknown) + SYCLDiagIfDeviceCode(CaptureLoc, diag::note_unknown_memory_reference); + + if (howAllocated != USM && DecLoc.isValid()) + SYCLDiagIfDeviceCode(DecLoc, diag::note_sycl_capture_declared_here); +} + class MarkDeviceFunction : public RecursiveASTVisitor { public: MarkDeviceFunction(Sema &S) diff --git a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp index 82cd21bf01552..734830dc531d6 100644 --- a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp +++ b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp @@ -21,10 +21,12 @@ void test(const int some_const) { }); } +int *unknownFunc(); + int main() { int data = 5; - int* data_addr = &data; - int* new_data_addr = nullptr; + int *data_addr = unknownFunc(); //#decl_data_addr + int *new_data_addr = unknownFunc(); //#decl_new_data_addr test_struct s; s.data = data; kernel( @@ -38,6 +40,9 @@ int main() { }); kernel( [=]() { + // expected-note@#decl_new_data_addr {{Declared here.}} + // expected-note@#decl_data_addr {{Declared here.}} + // expected-note@+1 2{{Unknown memory reference in SYCL device kernel. Be sure memory was allocated with USM (malloc_shared, etc).}} new_data_addr[0] = data_addr[0]; }); const int some_const = 10; diff --git a/clang/test/SemaSYCL/sycl-pointer-capture-diagnostics.cpp b/clang/test/SemaSYCL/sycl-pointer-capture-diagnostics.cpp new file mode 100644 index 0000000000000..f5bb06b6180f2 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-pointer-capture-diagnostics.cpp @@ -0,0 +1,271 @@ +// RUN: %clang_cc1 -fsycl -triple spir64 -fsycl-is-device -verify -fsyntax-only %s +// +// Pointer variables captured by kernel lambda are checked. +// Ensure those diagnostics are working correctly. + +namespace std { +class type_info; +typedef __typeof__(sizeof(int)) size_t; +} // namespace std + +inline namespace cl { +namespace sycl { + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); //#call_kernelFunc +} + +typedef int device; +typedef int context; +typedef double queue; + +//-- Mock USM memory allocation functions. +namespace usm { +enum class alloc { host, + device, + shared, + unknown }; +} // namespace usm + +void *malloc(std::size_t sz, const device &dev, const context &ctxt, cl::sycl::usm::alloc kind); +void *malloc(std::size_t sz, const queue &q, cl::sycl::usm::alloc kind); +void *malloc_device(std::size_t sz, const device &dev, const context &ctxt); +void *malloc_device(std::size_t sz, const queue &q); +void *malloc_shared(std::size_t sz, const device &dev, const context &ctxt); +void *malloc_shared(std::size_t sz, const queue &q); +void *malloc_host(std::size_t sz, const context &ctxt); +void *malloc_host(std::size_t sz, const queue &q); +void *aligned_alloc(std::size_t alignment, std::size_t sz, const device &dev, const context &ctxt, cl::sycl::usm::alloc kind); +void *aligned_alloc(std::size_t alignment, std::size_t sz, const queue &q, cl::sycl::usm::alloc kind); +void *aligned_alloc_device(std::size_t alignment, std::size_t sz, const device &dev, const context &ctxt); +void *aligned_alloc_device(std::size_t alignment, std::size_t sz, const queue &q); +void *aligned_alloc_shared(std::size_t alignment, std::size_t sz, const device &dev, const context &ctxt); +void *aligned_alloc_shared(std::size_t alignment, std::size_t sz, const queue &q); +void *aligned_alloc_host(std::size_t alignment, std::size_t sz, const context &ctxt); +void *aligned_alloc_host(std::size_t alignment, std::size_t sz, const queue &q); + +//template form +template +T *malloc_shared(std::size_t Count, const device &Dev, const context &Ctxt) { + return static_cast(malloc_shared(Count * sizeof(T), Dev, Ctxt)); +} + +} // namespace sycl +} // namespace cl + +void *malloc(std::size_t sz); +void *calloc(std::size_t num, std::size_t sz); +//-- End Mocks + +// User functions that might allocate memory in some way unknown to us. +float *unknownFunc(); + +void allocateUSMByHandle(float **pointerHandle, std::size_t sz, sycl::device &dev, sycl::context &ctxt) { + float *mem = sycl::malloc_shared(sz, dev, ctxt); + *pointerHandle = mem; +} + +struct Mesh { + float a; +}; + +float calledFromLambda(float *first) { + return first[0]; +} + +int main(int argc, char **argv) { + + int device = 0, context = 0; + double queue = 0; + + //-- Declarations + + //-- various bad pointers + float stackFloat = 20.0; + float *stackFloatP = &stackFloat; //#decl_stackFloatP + + float *frenemy = stackFloatP; //#decl_frenemy + frenemy++; + + // expected-warning@+1 {{cast to 'float *' from smaller integer type 'int'}} + float *fromParam = (float *)(argc); //#decl_fromParam + + // std::string is already caught by 'non-trivially copy constructible' check. + // so we only worry about literal strings. + auto stringLiteral = "omgwtf"; //#decl_stringLiteral + + float *apocryphal = unknownFunc(); //#decl_apocryphal + float *usmByHandle; + allocateUSMByHandle(&usmByHandle, 10, device, context); + + //-- structs + Mesh stackMesh; + stackMesh.a = 31.0; + Mesh *stackMeshP = &stackMesh; //#decl_stackMeshP + Mesh *stackMeshP2; + Mesh *mallocMeshP = static_cast(malloc(sizeof(Mesh))); //#decl_mallocMeshP + Mesh *mallocMeshP2 = static_cast(malloc(sizeof(Mesh))); //#decl_mallocMeshP2 + Mesh *usmMeshP = static_cast(sycl::malloc_shared(sizeof(Mesh), device, context)); //#decl_usmMeshP + Mesh *usmMeshP2 = sycl::malloc_shared(1, device, context); //#decl_usmMeshP2 + + //-- malloc + float *mallocFloatP = static_cast(malloc(sizeof(float) * 2)); //#decl_mallocFloatP + float *mallocFloatP2 = static_cast(malloc(sizeof(float) * 2)); //#decl_mallocFloatP2 + float *callocFloatP = static_cast(calloc(2, sizeof(float))); //#decl_callocFloatP + float *callocFloatP2 = static_cast(calloc(2, sizeof(float))); //#decl_callocFloatP2 + + //usm + float *usmSharedP = static_cast(sycl::malloc_shared(sizeof(float), device, context)); + float *usmSharedP2 = static_cast(sycl::malloc_shared(sizeof(float), queue)); + float *usmSharedP3 = static_cast(sycl::malloc(sizeof(float), device, context, cl::sycl::usm::alloc::shared)); + float *usmSharedP4 = static_cast(sycl::malloc(sizeof(float), queue, cl::sycl::usm::alloc::shared)); + float *usmSharedP5 = sycl::malloc_shared(1, device, context); + + float *usmShAlignP = static_cast(sycl::aligned_alloc_shared(1, sizeof(float), device, context)); + float *usmShAlignP2 = static_cast(sycl::aligned_alloc_shared(1, sizeof(float), queue)); + float *usmShAlignP3 = static_cast(sycl::aligned_alloc(1, sizeof(float), device, context, cl::sycl::usm::alloc::shared)); + float *usmShAlignP4 = static_cast(sycl::aligned_alloc(1, sizeof(float), queue, cl::sycl::usm::alloc::shared)); + + float *usmHostP = static_cast(sycl::malloc_host(sizeof(float), context)); + float *usmHostP2 = static_cast(sycl::malloc_host(sizeof(float), queue)); + float *usmHostP3 = static_cast(sycl::malloc(sizeof(float), device, context, cl::sycl::usm::alloc::host)); + float *usmHostP4 = static_cast(sycl::malloc(sizeof(float), queue, cl::sycl::usm::alloc::host)); + + float *usmHoAlignP = static_cast(sycl::aligned_alloc_host(1, sizeof(float), context)); + float *usmHoAlignP2 = static_cast(sycl::aligned_alloc_host(1, sizeof(float), queue)); + float *usmHoAlignP3 = static_cast(sycl::aligned_alloc(1, sizeof(float), device, context, cl::sycl::usm::alloc::host)); + float *usmHoAlignP4 = static_cast(sycl::aligned_alloc(1, sizeof(float), queue, cl::sycl::usm::alloc::host)); + + float *usmDeviceP = static_cast(sycl::malloc_device(sizeof(float), device, context)); + float *usmDeviceP2 = static_cast(sycl::malloc_device(sizeof(float), queue)); + float *usmDeviceP3 = static_cast(sycl::malloc(sizeof(float), device, context, cl::sycl::usm::alloc::device)); + float *usmDeviceP4 = static_cast(sycl::malloc(sizeof(float), queue, cl::sycl::usm::alloc::device)); + + cl::sycl::kernel_single_task([=]() { + // --- Captures + + //-- various bad pointers + + // expected-note@#call_kernelFunc {{called by 'kernel_single_taska; + + // expected-note@+1 {{Unknown memory reference in SYCL device kernel. Be sure memory was allocated with USM (malloc_shared, etc).}} + stackMeshP2->a = 34.0; + + // expected-note@#decl_mallocMeshP {{Declared here.}} + // expected-error@+1 {{Illegal memory reference in SYCL device kernel. Use USM (malloc_shared, etc) instead.}} + float mmpa = mallocMeshP->a; + + // expected-note@#decl_mallocMeshP2 {{Declared here.}} + // expected-error@+1 {{Illegal memory reference in SYCL device kernel. Use USM (malloc_shared, etc) instead.}} + mallocMeshP2->a = 45.0; + + //-- malloc + + // expected-note@#decl_mallocFloatP2 {{Declared here.}} + // expected-error@+1 {{Illegal memory reference in SYCL device kernel. Use USM (malloc_shared, etc) instead.}} + mallocFloatP2[0] = 80; + + // expected-note@#decl_callocFloatP {{Declared here.}} + // expected-error@+1 {{Illegal memory reference in SYCL device kernel. Use USM (malloc_shared, etc) instead.}} + callocFloatP[0] = 80; + + // expected-note@#decl_callocFloatP2 {{Declared here.}} + // expected-error@+1 {{Illegal memory reference in SYCL device kernel. Use USM (malloc_shared, etc) instead.}} + float someValue = *callocFloatP2; + + // --- Only the first capture of a pointer emits anything. So these accesses will NOT emit redundant diagnostics. + calledFromLambda(mallocFloatP); + stackFloatP[0] = 31.0; + frenemy[0] = 41.0; + fromParam[0] = 71.0; + char y = stringLiteral[0]; + apocryphal[0] = 88.1; + float smpa2 = stackMeshP->a; + stackMeshP2->a = 34.2; + float mmpa2 = mallocMeshP->a; + mallocMeshP2->a = 45.2; + mallocFloatP2[0] = 81; + callocFloatP[0] = 81; + float someOtherValue = *callocFloatP2; + + // --- These captures all use USM, and should pass without any notes or errors. + calledFromLambda(usmSharedP); + float umpa = usmMeshP->a; + usmMeshP2->a = 61.0; + usmSharedP[0] = 1; + usmSharedP2[0] = 1; + usmSharedP3[0] = 1; + usmSharedP4[0] = 1; + usmSharedP5[0] = 1; + usmShAlignP[0] = 1; + usmShAlignP2[0] = 1; + usmShAlignP3[0] = 1; + usmShAlignP4[0] = 1; + usmHostP[0] = 1; + usmHostP2[0] = 1; + usmHostP3[0] = 1; + usmHostP4[0] = 1; + usmHoAlignP[0] = 1; + usmHoAlignP2[0] = 1; + usmHoAlignP3[0] = 1; + usmHoAlignP4[0] = 1; + usmDeviceP[0] = 1; + usmDeviceP2[0] = 1; + usmDeviceP3[0] = 1; + usmDeviceP4[0] = 1; + }); + + auto noProblemLambda = [=]() { + // --- Outside a SYCL context no errors are emitted. + calledFromLambda(mallocFloatP); + calledFromLambda(usmSharedP); + stackFloatP[0] = 30.0; + frenemy[0] = 40.0; + fromParam[0] = 70.0; + char x = stringLiteral[0]; + apocryphal[0] = 89.0; + usmByHandle[0] = 90.0; + float smpa = stackMeshP->a; + stackMeshP2->a = 34.0; + float mmpa = mallocMeshP->a; + mallocMeshP2->a = 45.0; + mallocFloatP2[0] = 80; + callocFloatP[0] = 80; + float someValue = *callocFloatP2; + }; + noProblemLambda(); + + return 0; +} \ No newline at end of file