Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions source/adapters/native_cpu/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,141 @@

#pragma once

#include <mutex>
#include <set>
#include <ur_api.h>

#include "common.hpp"
#include "device.hpp"
#include "ur/ur.hpp"

namespace native_cpu {
struct usm_alloc_info {
ur_usm_type_t type;
const void *base_ptr;
size_t size;
ur_device_handle_t device;
ur_usm_pool_handle_t pool;

// We store a pointer to the actual allocation because it is needed when
// freeing memory.
void *base_alloc_ptr;
constexpr usm_alloc_info(ur_usm_type_t type, const void *base_ptr,
size_t size, ur_device_handle_t device,
ur_usm_pool_handle_t pool, void *base_alloc_ptr)
: type(type), base_ptr(base_ptr), size(size), device(device), pool(pool),
base_alloc_ptr(base_alloc_ptr) {}
};

constexpr usm_alloc_info usm_alloc_info_null_entry(UR_USM_TYPE_UNKNOWN, nullptr,
0, nullptr, nullptr,
nullptr);

constexpr size_t alloc_header_size = sizeof(usm_alloc_info);

// Computes the padding that we need to add to ensure the
// pointer returned by UR is aligned as the user requested.
static size_t get_padding(uint32_t alignment) {
assert(alignment >= alignof(usm_alloc_info) &&
"memory not aligned to usm_alloc_info");
if (!alignment || alloc_header_size % alignment == 0)
return 0;
size_t padd = 0;
if (alignment <= alloc_header_size) {
padd = alignment - (alloc_header_size % alignment);
} else {
padd = alignment - alloc_header_size;
}
return padd;
}

// In order to satisfy the MemAllocInfo queries we allocate extra memory
// for the native_cpu::usm_alloc_info struct.
// To satisfy the alignment requirements we "pad" the memory
// allocation so that the pointer returned to the user
// always satisfies (ptr % align) == 0.
static inline void *malloc_impl(uint32_t alignment, size_t size) {
void *ptr = nullptr;
assert(alignment >= alignof(usm_alloc_info) &&
"memory not aligned to usm_alloc_info");
#ifdef _MSC_VER
ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size,
alignment);

#else
ptr = std::aligned_alloc(alignment,
alloc_header_size + get_padding(alignment) + size);
#endif
return ptr;
}

// The info struct is retrieved by subtracting its size from the pointer
// returned to the user.
static inline uint8_t *get_alloc_info_addr(const void *ptr) {
return (uint8_t *)const_cast<void *>(ptr) - alloc_header_size;
}

static usm_alloc_info get_alloc_info(void *ptr) {
return *(usm_alloc_info *)get_alloc_info_addr(ptr);
}

} // namespace native_cpu

struct ur_context_handle_t_ : RefCounted {
ur_context_handle_t_(ur_device_handle_t_ *phDevices) : _device{phDevices} {}

ur_device_handle_t _device;

ur_result_t remove_alloc(void *ptr) {
std::lock_guard<std::mutex> lock(alloc_mutex);
const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr);
UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
#ifdef _MSC_VER
_aligned_free(info.base_alloc_ptr);
#else
free(info.base_alloc_ptr);
#endif
allocations.erase(ptr);
return UR_RESULT_SUCCESS;
}

const native_cpu::usm_alloc_info &
get_alloc_info_entry(const void *ptr) const {
auto it = allocations.find(ptr);
if (it == allocations.end()) {
return native_cpu::usm_alloc_info_null_entry;
}

return *(native_cpu::usm_alloc_info *)native_cpu::get_alloc_info_addr(ptr);
}

void *add_alloc(uint32_t alignment, ur_usm_type_t type, size_t size,
ur_usm_pool_handle_t pool) {
std::lock_guard<std::mutex> lock(alloc_mutex);
// We need to ensure that we align to at least alignof(usm_alloc_info),
// otherwise its start address may be unaligned.
alignment =
std::max<size_t>(alignment, alignof(native_cpu::usm_alloc_info));
void *alloc = native_cpu::malloc_impl(alignment, size);
if (!alloc)
return nullptr;
// Compute the address of the pointer that we'll return to the user.
void *ptr = native_cpu::alloc_header_size +
native_cpu::get_padding(alignment) + (uint8_t *)alloc;
uint8_t *info_addr = native_cpu::get_alloc_info_addr(ptr);
if (!info_addr)
return nullptr;
// Do a placement new of the alloc_info to avoid allocation and copy
auto info = new (info_addr)
native_cpu::usm_alloc_info(type, ptr, size, this->_device, pool, alloc);
if (!info)
return nullptr;
allocations.insert(ptr);
return ptr;
}

private:
std::mutex alloc_mutex;
std::set<const void *> allocations;
};
80 changes: 44 additions & 36 deletions source/adapters/native_cpu/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,90 +8,98 @@
//
//===----------------------------------------------------------------------===//

#include "ur/ur.hpp"
#include "ur_api.h"

#include "common.hpp"
#include "context.hpp"
#include <cstdlib>

UR_APIEXPORT ur_result_t UR_APICALL
urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
ur_usm_pool_handle_t pool, size_t size, void **ppMem) {
std::ignore = hContext;
std::ignore = pUSMDesc;
std::ignore = pool;
namespace native_cpu {

static ur_result_t alloc_helper(ur_context_handle_t hContext,
const ur_usm_desc_t *pUSMDesc, size_t size,
void **ppMem, ur_usm_type_t type) {
auto alignment = pUSMDesc ? pUSMDesc->align : 1u;
UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE);
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);

*ppMem = malloc(size);
auto *ptr = hContext->add_alloc(alignment, type, size, nullptr);
UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_RESOURCES);
*ppMem = ptr;

return UR_RESULT_SUCCESS;
}

} // namespace native_cpu

UR_APIEXPORT ur_result_t UR_APICALL
urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
ur_usm_pool_handle_t pool, size_t size, void **ppMem) {
std::ignore = pool;

return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
UR_USM_TYPE_HOST);
}

UR_APIEXPORT ur_result_t UR_APICALL
urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
size_t size, void **ppMem) {
std::ignore = hContext;
std::ignore = hDevice;
std::ignore = pUSMDesc;
std::ignore = pool;

UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);

*ppMem = malloc(size);

return UR_RESULT_SUCCESS;
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
UR_USM_TYPE_DEVICE);
}

UR_APIEXPORT ur_result_t UR_APICALL
urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
size_t size, void **ppMem) {
std::ignore = hContext;
std::ignore = hDevice;
std::ignore = pUSMDesc;
std::ignore = pool;

UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);

*ppMem = malloc(size);

return UR_RESULT_SUCCESS;
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
UR_USM_TYPE_SHARED);
}

UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
void *pMem) {
std::ignore = hContext;

UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_POINTER);

free(pMem);
auto res = hContext->remove_alloc(pMem);

return UR_RESULT_SUCCESS;
return res;
}

UR_APIEXPORT ur_result_t UR_APICALL
urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
ur_usm_alloc_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
std::ignore = hContext;
std::ignore = pMem;
std::ignore = propName;
std::ignore = propSize;
std::ignore = pPropValue;
std::ignore = pPropSizeRet;

UR_ASSERT(pMem != nullptr, UR_RESULT_ERROR_INVALID_NULL_POINTER);
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
if (propName == UR_USM_ALLOC_INFO_BASE_PTR) {
// TODO: logic to compute base ptr given ptr
DIE_NO_IMPLEMENTATION;
}

const native_cpu::usm_alloc_info &alloc_info =
hContext->get_alloc_info_entry(pMem);
switch (propName) {
case UR_USM_ALLOC_INFO_TYPE:
// Todo implement this in context
return ReturnValue(UR_USM_TYPE_DEVICE);
return ReturnValue(alloc_info.type);
case UR_USM_ALLOC_INFO_SIZE:
return ReturnValue(alloc_info.size);
case UR_USM_ALLOC_INFO_DEVICE:
return ReturnValue(alloc_info.device);
case UR_USM_ALLOC_INFO_POOL:
return ReturnValue(alloc_info.pool);
default:
DIE_NO_IMPLEMENTATION;
}
Expand Down