Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,48 @@ class BinaryWrapper {
appendToGlobalDtors(M, Func, /*Priority*/ 1);
}

void createSyclRegisterWithAtexitUnregister(GlobalVariable *BinDesc) {
auto *UnregFuncTy =
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *UnregFunc =
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg.atexit", &M);
UnregFunc->setSection(".text.startup");

// Declaration for __sycl_unregister_lib(void*).
auto *UnregTargetTy =
FunctionType::get(Type::getVoidTy(C), getPtrTy(), /*isVarArg=*/false);
FunctionCallee UnregTargetC =
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy);

IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc));
UnregBuilder.CreateCall(UnregTargetC, BinDesc);
UnregBuilder.CreateRetVoid();

auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_reg", &M);
RegFunc->setSection(".text.startup");

auto *RegTargetTy =
FunctionType::get(Type::getVoidTy(C), getPtrTy(), false);
FunctionCallee RegTargetC =
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy);

// `atexit` takes a `void(*)()` function pointer arg and returns an i32.
FunctionType *AtExitTy =
FunctionType::get(Type::getInt32Ty(C), getPtrTy(), false);
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy);

IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc));
RegBuilder.CreateCall(RegTargetC, BinDesc);
RegBuilder.CreateCall(AtExitC, UnregFunc);
RegBuilder.CreateRetVoid();

// Add this function to global destructors.
appendToGlobalCtors(M, RegFunc, /*Priority*/ 1);
}

public:
BinaryWrapper(StringRef Target, StringRef ToolName,
StringRef SymPropBCFiles = "")
Expand Down Expand Up @@ -1370,8 +1412,13 @@ class BinaryWrapper {

if (EmitRegFuncs) {
GlobalVariable *Desc = *DescOrErr;
createRegisterFunction(Kind, Desc);
createUnregisterFunction(Kind, Desc);
if (Kind == OffloadKind::SYCL &&
Triple(M.getTargetTriple()).isOSWindows()) {
createSyclRegisterWithAtexitUnregister(Desc);
} else {
createRegisterFunction(Kind, Desc);
createUnregisterFunction(Kind, Desc);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't necessarily have to be part of this patch, but could you please document this approach in a design document?

}
}
return &M;
Expand Down
53 changes: 51 additions & 2 deletions llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/PropertySetIO.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <memory>
#include <string>
Expand Down Expand Up @@ -734,6 +735,50 @@ struct Wrapper {
// Add this function to global destructors.
appendToGlobalDtors(M, Func, /*Priority*/ 1);
}

void createSyclRegisterWithAtexitUnregister(GlobalVariable *FatbinDesc) {
auto *UnregFuncTy =
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *UnregFunc =
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg.atexit", &M);
UnregFunc->setSection(".text.startup");

// Declaration for __sycl_unregister_lib(void*).
auto *UnregTargetTy =
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false);
FunctionCallee UnregTargetC =
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy);

// Body of the unregister wrapper.
IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc));
UnregBuilder.CreateCall(UnregTargetC, FatbinDesc);
UnregBuilder.CreateRetVoid();

auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_reg", &M);
RegFunc->setSection(".text.startup");

auto *RegTargetTy =
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false);
FunctionCallee RegTargetC =
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy);

// `atexit` takes a `void(*)()` function pointer arg and returns an i32.
FunctionType *AtExitTy = FunctionType::get(
Type::getInt32Ty(C), PointerType::getUnqual(C), false);
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy);

IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc));
RegBuilder.CreateCall(RegTargetC, FatbinDesc);
RegBuilder.CreateCall(AtExitC, UnregFunc);
RegBuilder.CreateRetVoid();

// Finally, add to global constructors.
appendToGlobalCtors(M, RegFunc, /*Priority*/ 1);
}

}; // end of Wrapper

} // anonymous namespace
Expand All @@ -747,7 +792,11 @@ Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M,
return createStringError(inconvertibleErrorCode(),
"No binary descriptors created.");

W.createRegisterFatbinFunction(Desc);
W.createUnregisterFunction(Desc);
if (Triple(M.getTargetTriple()).isOSWindows()) {
W.createSyclRegisterWithAtexitUnregister(Desc);
} else {
W.createRegisterFatbinFunction(Desc);
W.createUnregisterFunction(Desc);
}
return Error::success();
}
3 changes: 2 additions & 1 deletion sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ context_impl::~context_impl() {
DeviceGlobalMapEntry *DGEntry =
detail::ProgramManager::getInstance().getDeviceGlobalEntry(
DeviceGlobal);
DGEntry->removeAssociatedResources(this);
if (DGEntry != nullptr)
DGEntry->removeAssociatedResources(this);
}
MCachedLibPrograms.clear();
// TODO catch an exception and put it to list of asynchronous exceptions
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/device_global_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DeviceGlobalMap {
});
if (findDevGlobalByValue != MPtr2DeviceGlobal.end())
MPtr2DeviceGlobal.erase(findDevGlobalByValue);

MDeviceGlobals.erase(DevGlobalIt);
}
}
Expand All @@ -112,8 +113,7 @@ class DeviceGlobalMap {
DeviceGlobalMapEntry *getEntry(const void *DeviceGlobalPtr) {
std::lock_guard<std::mutex> DeviceGlobalsGuard(MDeviceGlobalsMutex);
auto Entry = MPtr2DeviceGlobal.find(DeviceGlobalPtr);
assert(Entry != MPtr2DeviceGlobal.end() && "Device global entry not found");
return Entry->second;
return (Entry != MPtr2DeviceGlobal.end()) ? Entry->second : nullptr;
}

DeviceGlobalMapEntry *
Expand Down
24 changes: 16 additions & 8 deletions sycl/source/detail/device_global_map_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ DeviceGlobalUSMMem::~DeviceGlobalUSMMem() {
// removeAssociatedResources is expected to have cleaned up both the pointer
// and the event. When asserts are enabled the values are set, so we check
// these here.
auto ContextImplPtr = MAllocatingContext.lock();
if (ContextImplPtr) {
if (MPtr != nullptr) {
detail::usm::freeInternal(MPtr, ContextImplPtr.get());
MPtr = nullptr;
}
if (MInitEvent != nullptr) {
ContextImplPtr->getAdapter().call<UrApiKind::urEventRelease>(MInitEvent);
MInitEvent = nullptr;
}
}

assert(MPtr == nullptr && "MPtr has not been cleaned up.");
assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up.");
}
Expand Down Expand Up @@ -63,6 +75,7 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
assert(NewAllocIt.second &&
"USM allocation for device and context already happened.");
DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first->second;
NewAlloc.MAllocatingContext = CtxImpl.shared_from_this();

// Initialize here and save the event.
{
Expand Down Expand Up @@ -120,6 +133,7 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
assert(NewAllocIt.second &&
"USM allocation for device and context already happened.");
DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first->second;
NewAlloc.MAllocatingContext = CtxImpl.shared_from_this();

if (MDeviceGlobalPtr) {
// C++ guarantees members appear in memory in the order they are declared,
Expand Down Expand Up @@ -161,12 +175,9 @@ void DeviceGlobalMapEntry::removeAssociatedResources(
if (USMMem.MInitEvent != nullptr)
CtxImpl->getAdapter().call<UrApiKind::urEventRelease>(
USMMem.MInitEvent);
#ifndef NDEBUG
// For debugging we set the event and memory to some recognizable values
// to allow us to check that this cleanup happens before erasure.
// Set to nullptr to avoid double free.
USMMem.MPtr = nullptr;
USMMem.MInitEvent = nullptr;
#endif
MDeviceToUSMPtrMap.erase(USMPtrIt);
}
}
Expand All @@ -185,12 +196,9 @@ void DeviceGlobalMapEntry::cleanup() {
detail::usm::freeInternal(USMMem.MPtr, CtxImpl);
if (USMMem.MInitEvent != nullptr)
CtxImpl->getAdapter().call<UrApiKind::urEventRelease>(USMMem.MInitEvent);
#ifndef NDEBUG
// For debugging we set the event and memory to some recognizable values
// to allow us to check that this cleanup happens before erasure.
// Set to nullptr to avoid double free.
USMMem.MPtr = nullptr;
USMMem.MInitEvent = nullptr;
#endif
}
MDeviceToUSMPtrMap.clear();
}
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/device_global_map_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct DeviceGlobalUSMMem {
std::mutex MInitEventMutex;
ur_event_handle_t MInitEvent = nullptr;

std::weak_ptr<context_impl> MAllocatingContext;
friend struct DeviceGlobalMapEntry;
};

Expand Down
5 changes: 0 additions & 5 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3886,10 +3886,5 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) {

// Executed as a part of current module's (.exe, .dll) static initialization
extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) {
// Partial cleanup is not necessary at shutdown
#ifndef _WIN32
if (!sycl::detail::GlobalHandler::instance().isOkToDefer())
return;
sycl::detail::ProgramManager::getInstance().removeImages(desc);
#endif
}
3 changes: 3 additions & 0 deletions sycl/test-e2e/Basic/stream/zero_buffer_size.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// UNSUPPORTED: hip
// UNSUPPORTED-TRACKER: CMPLRLLVM-69478

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

Expand Down
38 changes: 38 additions & 0 deletions sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include <sycl/detail/core.hpp>

#if defined(_WIN32)
#define API_EXPORT __declspec(dllexport)
#else
#define API_EXPORT
#endif

#ifndef INC
#define INC 1
#endif

#ifndef CLASSNAME
#define CLASSNAME same
#endif

#ifdef WITH_DEVICE_GLOBALS
// Using device globals within the shared libraries only
// works if the names do not collide. Note that we cannot
// load a library multiple times if it has a device global.
#define CONCAT_HELPER(a, b) a##b
#define CONCAT(a, b) CONCAT_HELPER(a, b)

using SomeProperties = decltype(sycl::ext::oneapi::experimental::properties{});
sycl::ext::oneapi::experimental::device_global<int, SomeProperties>
CONCAT(DGVar, CLASSNAME) __attribute__((visibility("default")));

#endif // WITH_DEVICE_GLOBALS

extern "C" API_EXPORT void performIncrementation(sycl::queue &q,
sycl::buffer<int, 1> &buf) {
sycl::range<1> r = buf.get_range();
q.submit([&](sycl::handler &cgh) {
auto acc = buf.get_access<sycl::access::mode::write>(cgh);
cgh.parallel_for<class CLASSNAME>(
r, [=](sycl::id<1> idx) { acc[idx] += INC; });
});
}
Loading