Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/intel-llvm-mirror-base-commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
25323c85d7091f92bea2c057202612ff941a36d2
22ee417ae6191b8e3940b82d19ae587092021224
2 changes: 1 addition & 1 deletion source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_LOW_POWER_EVENTS_SUPPORT_EXP:
return ReturnValue(false);
case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP:
return ReturnValue(false);
return ReturnValue(true);
case UR_DEVICE_INFO_USE_NATIVE_ASSERT:
return ReturnValue(true);
case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
Expand Down
8 changes: 5 additions & 3 deletions source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,9 @@ urUSMPoolTrimToExp(ur_context_handle_t hContext, ur_device_handle_t hDevice,
}

UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t,
void *, const void *,
size_t) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
void *pDst,
const void *pSrc,
size_t Size) {
UR_CHECK_ERROR(cuMemcpy((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size));
return UR_RESULT_SUCCESS;
}
10 changes: 9 additions & 1 deletion source/adapters/level_zero/v2/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,20 @@ populateP2PDevices(size_t maxDevices,
return p2pDevices;
}

static std::vector<ur_device_handle_t>
uniqueDevices(uint32_t numDevices, const ur_device_handle_t *phDevices) {
std::vector<ur_device_handle_t> devices(phDevices, phDevices + numDevices);
std::sort(devices.begin(), devices.end());
devices.erase(std::unique(devices.begin(), devices.end()), devices.end());
return devices;
}

ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext,
uint32_t numDevices,
const ur_device_handle_t *phDevices,
bool ownZeContext)
: hContext(hContext, ownZeContext),
hDevices(phDevices, phDevices + numDevices),
hDevices(uniqueDevices(numDevices, phDevices)),
commandListCache(hContext,
{phDevices[0]->Platform->ZeCopyOffloadExtensionSupported,
phDevices[0]->Platform->ZeMutableCmdListExt.Supported}),
Expand Down
15 changes: 9 additions & 6 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -807,11 +807,14 @@ ur_result_t AsanInterceptor::prepareLaunch(
LocalWorkSize[Dim];
}

uint64_t NumWI = 1;
uint64_t NumWILocal = 1;
for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
NumWI *= LaunchInfo.GlobalWorkSize[Dim];
NumWILocal *= LocalWorkSize[Dim];
}

size_t SGSize = GetSubGroupSize(Kernel, DeviceInfo->Handle);
uint32_t NumSG = ((NumWILocal + SGSize - 1) / SGSize) * NumWG;

// Prepare asan runtime data
LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
Expand Down Expand Up @@ -841,20 +844,20 @@ ur_result_t AsanInterceptor::prepareLaunch(
// Write shadow memory offset for private memory
if (getContext()->Options.DetectPrivates) {
if (DeviceInfo->Shadow->AllocPrivateShadow(
Queue, NumWI, NumWG, LaunchInfo.Data.Host.PrivateBase,
Queue, NumSG, LaunchInfo.Data.Host.PrivateBase,
LaunchInfo.Data.Host.PrivateShadowOffset,
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) {
UR_LOG_L(getContext()->logger, WARN,
"Failed to allocate shadow memory for private memory, "
"maybe the number of workgroup ({}) is too large",
NumWG);
"maybe the number of subgroup ({}) is too large",
NumSG);
UR_LOG_L(getContext()->logger, WARN,
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
LaunchInfo.Data.Host.PrivateShadowOffset = 0;
} else {
UR_LOG_L(getContext()->logger, INFO,
"ShadowMemory(Private, WorkGroup={}, {} - {})", NumWG,
"ShadowMemory(Private, SubGroup={}, {} - {})", NumSG,
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
Expand Down
4 changes: 4 additions & 0 deletions source/loader/layers/sanitizer/asan/asan_libdevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ struct LocalArgsInfo {
uint64_t SizeWithRedZone = 0;
};

constexpr uint32_t ASAN_MAX_WG_LOCAL = 8192;

constexpr uint32_t ASAN_MAX_SG_PRIVATE = 256;

constexpr uint64_t ASAN_MAX_NUM_REPORTS = 10;

struct AsanRuntimeData {
Expand Down
14 changes: 8 additions & 6 deletions source/loader/layers/sanitizer/asan/asan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
uptr &End) {
const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device);
const size_t RequiredShadowSize =
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
(std::min(ASAN_MAX_WG_LOCAL, NumWG) * LocalMemorySize) >>
ASAN_SHADOW_SCALE;
static size_t LastAllocedSize = 0;
if (RequiredShadowSize > LastAllocedSize) {
ur_context_handle_t QueueContext = GetContext(Queue);
Expand Down Expand Up @@ -285,16 +286,17 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
}

ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
uint64_t NumWI, uint32_t NumWG,
uptr *&Base, uptr &Begin,
uptr &End) {
uint32_t NumSG, uptr *&Base,
uptr &Begin, uptr &End) {
// Trying to allocate private base array and private shadow, and any one of
// them fail to allocate would be a failure
static size_t LastPrivateBaseAllocedSize = 0;
static size_t LastPrivateShadowAllocedSize = 0;

NumSG = std::min(NumSG, ASAN_MAX_SG_PRIVATE);

try {
const size_t NewPrivateBaseSize = NumWI * sizeof(uptr);
const size_t NewPrivateBaseSize = NumSG * sizeof(uptr);
if (NewPrivateBaseSize > LastPrivateBaseAllocedSize) {
if (PrivateBasePtr) {
UR_CALL_THROWS(getContext()->urDdiTable.USM.pfnFree(
Expand All @@ -317,7 +319,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
}

const size_t NewPrivateShadowSize =
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
(NumSG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
if (NewPrivateShadowSize > LastPrivateShadowAllocedSize) {
ur_context_handle_t QueueContext = GetContext(Queue);
auto ContextInfo = getAsanInterceptor()->getContextInfo(QueueContext);
Expand Down
11 changes: 5 additions & 6 deletions source/loader/layers/sanitizer/asan/asan_shadow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ struct ShadowMemory {
uptr &Begin, uptr &End) = 0;

virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue,
uint64_t NumWI, uint32_t NumWG,
uptr *&Base, uptr &Begin,
uptr &End) = 0;
uint32_t NumSG, uptr *&Base,
uptr &Begin, uptr &End) = 0;

ur_context_handle_t Context{};

Expand Down Expand Up @@ -90,7 +89,7 @@ struct ShadowMemoryCPU final : public ShadowMemory {
return UR_RESULT_SUCCESS;
}

ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint64_t, uint32_t, uptr *&,
ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr *&,
uptr &Begin, uptr &End) override {
Begin = ShadowBegin;
End = ShadowEnd;
Expand All @@ -110,8 +109,8 @@ struct ShadowMemoryGPU : public ShadowMemory {
ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
uptr &Begin, uptr &End) override final;

ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint64_t NumWI,
uint32_t NumWG, uptr *&Base, uptr &Begin,
ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumSG,
uptr *&Base, uptr &Begin,
uptr &End) override final;

ur_mutex VirtualMemMapsMutex;
Expand Down
17 changes: 10 additions & 7 deletions source/loader/layers/sanitizer/msan/msan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,11 +530,14 @@ ur_result_t MsanInterceptor::prepareLaunch(
LocalWorkSize[Dim];
}

uint64_t NumWI = 1;
uint64_t NumWILocal = 1;
for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
NumWI *= LaunchInfo.GlobalWorkSize[Dim];
NumWILocal *= LocalWorkSize[Dim];
}

size_t SGSize = GetSubGroupSize(Kernel, DeviceInfo->Handle);
uint32_t NumSG = ((NumWILocal + SGSize - 1) / SGSize) * NumWG;

// Write shadow memory offset for local memory
if (KernelInfo.IsCheckLocals) {
if (DeviceInfo->Shadow->AllocLocalShadow(
Expand All @@ -558,22 +561,22 @@ ur_result_t MsanInterceptor::prepareLaunch(
// Write shadow memory offset for private memory
if (KernelInfo.IsCheckPrivates) {
if (DeviceInfo->Shadow->AllocPrivateShadow(
Queue, NumWI, NumWG, LaunchInfo.Data.Host.PrivateBase,
Queue, NumSG, LaunchInfo.Data.Host.PrivateBase,
LaunchInfo.Data.Host.PrivateShadowOffset,
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) {
UR_LOG_L(getContext()->logger, WARN,
"Failed to allocate shadow memory for private memory, "
"maybe the number of workgroup ({}) is too large",
NumWG);
"maybe the number of subgroup ({}) is too large",
NumSG);
UR_LOG_L(getContext()->logger, WARN,
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
LaunchInfo.Data.Host.PrivateShadowOffset = 0;
} else {
UR_LOG_L(
getContext()->logger, DEBUG,
"ShadowMemory(Private, WorkGroup={}, PrivateBase={}, Shadow={} - {})",
NumWG, (void *)LaunchInfo.Data.Host.PrivateBase,
"ShadowMemory(Private, SubGroup={}, PrivateBase={}, Shadow={} - {})",
NumSG, (void *)LaunchInfo.Data.Host.PrivateBase,
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
Expand Down
4 changes: 4 additions & 0 deletions source/loader/layers/sanitizer/msan/msan_libdevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ namespace ur_sanitizer_layer {

constexpr unsigned MSAN_ORIGIN_GRANULARITY = 4U;

constexpr uint32_t MSAN_MAX_WG_LOCAL = 1024;

constexpr uint32_t MSAN_MAX_SG_PRIVATE = 32;

struct MsanErrorReport {
int Flag = 0;

Expand Down
12 changes: 7 additions & 5 deletions source/loader/layers/sanitizer/msan/msan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,8 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
uint32_t NumWG, uptr &Begin,
uptr &End) {
const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device);
const size_t RequiredShadowSize = NumWG * LocalMemorySize;
const size_t RequiredShadowSize =
std::min(NumWG, MSAN_MAX_WG_LOCAL) * LocalMemorySize;
static size_t LastAllocedSize = 0;
if (RequiredShadowSize > LastAllocedSize) {
auto ContextInfo = getMsanInterceptor()->getContextInfo(Context);
Expand Down Expand Up @@ -414,16 +415,17 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
}

ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
uint64_t NumWI,
uint32_t NumWG, uptr *&Base,
uint32_t NumSG, uptr *&Base,
uptr &Begin, uptr &End) {
// Trying to allocate private base array and private shadow, and any one of
// them fail to allocate would be a failure
static size_t LastPrivateBaseAllocedSize = 0;
static size_t LastPrivateShadowAllocedSize = 0;

NumSG = std::min(NumSG, MSAN_MAX_SG_PRIVATE);

try {
const size_t NewPrivateBaseSize = NumWI * sizeof(uptr);
const size_t NewPrivateBaseSize = NumSG * sizeof(uptr);
if (NewPrivateBaseSize > LastPrivateBaseAllocedSize) {
if (PrivateBasePtr) {
UR_CALL_THROWS(getContext()->urDdiTable.USM.pfnFree(
Expand All @@ -445,7 +447,7 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
LastPrivateBaseAllocedSize = NewPrivateBaseSize;
}

const size_t NewPrivateShadowSize = NumWG * MSAN_PRIVATE_SIZE;
const size_t NewPrivateShadowSize = NumSG * MSAN_PRIVATE_SIZE;
if (NewPrivateShadowSize > LastPrivateShadowAllocedSize) {

if (PrivateShadowOffset) {
Expand Down
11 changes: 5 additions & 6 deletions source/loader/layers/sanitizer/msan/msan_shadow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ struct MsanShadowMemory {
uptr &Begin, uptr &End) = 0;

virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue,
uint64_t NumWI, uint32_t NumWG,
uptr *&Base, uptr &Begin,
uptr &End) = 0;
uint32_t NumSG, uptr *&Base,
uptr &Begin, uptr &End) = 0;

ur_context_handle_t Context{};

Expand Down Expand Up @@ -113,7 +112,7 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory {
return UR_RESULT_SUCCESS;
}

ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint64_t, uint32_t, uptr *&,
ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr *&,
uptr &Begin, uptr &End) override {
// This is necessary as msan_rtl use it to check whether detecting private
// is enabled
Expand Down Expand Up @@ -147,8 +146,8 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory {
ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
uptr &Begin, uptr &End) override final;

ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint64_t NumWI,
uint32_t NumWG, uptr *&Base, uptr &Begin,
ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG,
uptr *&Base, uptr &Begin,
uptr &End) override final;

virtual size_t GetShadowSize() = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,33 @@ std::string GetKernelName(ur_kernel_handle_t Kernel) {
return std::string(KernelNameBuf.data(), KernelNameSize - 1);
}

size_t GetSubGroupSize(ur_kernel_handle_t Kernel, ur_device_handle_t Device) {
uint32_t SubGroupSize = 0;
[[maybe_unused]] auto Result =
getContext()->urDdiTable.Kernel.pfnGetSubGroupInfo(
Kernel, Device, UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL,
sizeof(uint32_t), &SubGroupSize, nullptr);
assert(Result == UR_RESULT_SUCCESS && "getSubGroupSize() failed");

// If user doesn't require the subgroup size, choose device supported smallest
// one.
if (SubGroupSize == 0) {
size_t PropertySize = 0;
Result = getContext()->urDdiTable.Device.pfnGetInfo(
Device, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, nullptr,
&PropertySize);
assert(Result == UR_RESULT_SUCCESS && "getDeviceInfo() failed");
std::vector<uint32_t> SupportedSubGroupSize(PropertySize /
sizeof(uint32_t));
Result = getContext()->urDdiTable.Device.pfnGetInfo(
Device, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, PropertySize,
SupportedSubGroupSize.data(), nullptr);
assert(Result == UR_RESULT_SUCCESS && "getDeviceInfo() failed");
SubGroupSize = SupportedSubGroupSize[0];
}
return SubGroupSize;
}

bool IsUSM(ur_context_handle_t Context, const void *MemPtr) {
ur_usm_type_t USMType = GetUSMType(Context, MemPtr);
return USMType != UR_USM_TYPE_UNKNOWN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ ur_device_handle_t GetParentDevice(ur_device_handle_t Device);
bool GetDeviceUSMCapability(ur_device_handle_t Device,
ur_device_info_t Feature);
std::string GetKernelName(ur_kernel_handle_t Kernel);
size_t GetSubGroupSize(ur_kernel_handle_t Kernel, ur_device_handle_t Device);
size_t GetDeviceLocalMemorySize(ur_device_handle_t Device);
ur_program_handle_t GetProgram(ur_kernel_handle_t Kernel);
bool IsUSM(ur_context_handle_t Context, const void *MemPtr);
Expand Down
2 changes: 1 addition & 1 deletion test/conformance/adapter/urAdapterRelease.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ TEST_P(urAdapterReleaseTest, Success) {
&referenceCountBefore, nullptr));

uint32_t referenceCountAfter = 0;
EXPECT_SUCCESS(urAdapterRelease(adapter));
ASSERT_SUCCESS(urAdapterRelease(adapter));
ASSERT_SUCCESS(urAdapterGetInfo(adapter, UR_ADAPTER_INFO_REFERENCE_COUNT,
sizeof(referenceCountAfter),
&referenceCountAfter, nullptr));
Expand Down
2 changes: 1 addition & 1 deletion test/conformance/adapter/urAdapterRetain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ TEST_P(urAdapterRetainTest, Success) {
&referenceCountBefore, nullptr));

uint32_t referenceCountAfter = 0;
EXPECT_SUCCESS(urAdapterRetain(adapter));
ASSERT_SUCCESS(urAdapterRetain(adapter));
ASSERT_SUCCESS(urAdapterGetInfo(adapter, UR_ADAPTER_INFO_REFERENCE_COUNT,
sizeof(referenceCountAfter),
&referenceCountAfter, nullptr));
Expand Down
2 changes: 1 addition & 1 deletion test/conformance/context/urContextRetain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ TEST_P(urContextRetainTest, Success) {

ASSERT_LT(prevRefCount, refCount);

EXPECT_SUCCESS(urContextRelease(context));
ASSERT_SUCCESS(urContextRelease(context));
}

TEST_P(urContextRetainTest, InvalidNullHandleContext) {
Expand Down
6 changes: 3 additions & 3 deletions test/conformance/device/urDeviceRelease.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ TEST_P(urDeviceReleaseTest, Success) {
uint32_t prevRefCount = 0;
ASSERT_SUCCESS(uur::GetObjectReferenceCount(device, prevRefCount));

EXPECT_SUCCESS(urDeviceRelease(device));
ASSERT_SUCCESS(urDeviceRelease(device));

uint32_t refCount = 0;
ASSERT_SUCCESS(uur::GetObjectReferenceCount(device, refCount));
Expand Down Expand Up @@ -46,14 +46,14 @@ TEST_P(urDeviceReleaseTest, SuccessSubdevices) {
uint32_t prevRefCount = 0;
ASSERT_SUCCESS(uur::GetObjectReferenceCount(sub_device, prevRefCount));

EXPECT_SUCCESS(urDeviceRelease(sub_device));
ASSERT_SUCCESS(urDeviceRelease(sub_device));

uint32_t refCount = 0;
ASSERT_SUCCESS(uur::GetObjectReferenceCount(sub_device, refCount));

ASSERT_GT(prevRefCount, refCount);

EXPECT_SUCCESS(urDeviceRelease(sub_device));
ASSERT_SUCCESS(urDeviceRelease(sub_device));
}

TEST_P(urDeviceReleaseTest, InvalidNullHandle) {
Expand Down
Loading