Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion sycl/doc/syclcompat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ static void destroy_event(event_ptr event);
} // syclcompat
```

### Memory Allocation
### Memory Operations

This library provides interfaces to allocate memory to be accessed within kernel
functions and on the host. The `syclcompat::malloc` function allocates device
Expand Down Expand Up @@ -510,6 +510,64 @@ public:
} // syclcompat
```

The `syclcompat::experimental` namespace contains currently unsupported `memcpy` overloads which take a `syclcompat::experimental::memcpy_parameter` argument. These are included for forwards compatibility and currently throw a `std::runtime_error`.

```cpp
namespace syclcompat {
namespace experimental {
// Forward declarations for types relating to unsupported memcpy_parameter API:

enum memcpy_direction {
host_to_host,
host_to_device,
device_to_host,
device_to_device,
automatic
};

#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
class image_mem_wrapper;
#endif
class image_matrix;

/// Memory copy parameters for 2D/3D memory data.
struct memcpy_parameter {
struct data_wrapper {
pitched_data pitched{};
sycl::id<3> pos{};
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
experimental::image_mem_wrapper *image_bindless{nullptr};
#endif
image_matrix *image{nullptr};
};
data_wrapper from{};
data_wrapper to{};
sycl::range<3> size{};
syclcompat::detail::memcpy_direction direction{syclcompat::detail::memcpy_direction::automatic};
};

/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
/// The function will return after the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy(const memcpy_parameter &param,
sycl::queue q = get_default_queue());

/// [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
/// . The return of the function does NOT guarantee the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy_async(const memcpy_parameter &param,
sycl::queue q = get_default_queue());

} // namespace experimental
} // namespace syclcompat
```

Finally, the class `pitched_data`, which manages memory allocation for 3D
spaces, padded to avoid uncoalesced memory accesses.

Expand Down
178 changes: 143 additions & 35 deletions sycl/include/syclcompat/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ template <typename AllocT> auto *local_mem() {
return As;
}

namespace detail {
namespace experimental {
enum memcpy_direction {
host_to_host,
host_to_device,
device_to_host,
device_to_device,
automatic
};
} // namespace detail
}

enum class memory_region {
global = 0, // device global memory
Expand Down Expand Up @@ -122,6 +122,42 @@ class pitched_data {
size_t _pitch, _x, _y;
};

namespace experimental {
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
class image_mem_wrapper;
namespace detail {
static sycl::event memcpy(const image_mem_wrapper *src,
const sycl::id<3> &src_id, pitched_data &dest,
const sycl::id<3> &dest_id,
const sycl::range<3> &copy_extend, sycl::queue q);
static sycl::event memcpy(const pitched_data src, const sycl::id<3> &src_id,
image_mem_wrapper *dest, const sycl::id<3> &dest_id,
const sycl::range<3> &copy_extend, sycl::queue q);
} // namespace detail
#endif
class image_matrix;
namespace detail {
static pitched_data to_pitched_data(image_matrix *image);
}

/// Memory copy parameters for 2D/3D memory data.
struct memcpy_parameter {
struct data_wrapper {
pitched_data pitched{};
sycl::id<3> pos{};
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
experimental::image_mem_wrapper *image_bindless{nullptr};
#endif
image_matrix *image{nullptr};
};
data_wrapper from{};
data_wrapper to{};
sycl::range<3> size{};
syclcompat::experimental::memcpy_direction direction{
syclcompat::experimental::memcpy_direction::automatic};
};
} // namespace experimental

namespace detail {

template <class T, memory_region Memory, size_t Dimension> class accessor;
Expand Down Expand Up @@ -263,21 +299,16 @@ static pointer_access_attribute get_pointer_attribute(sycl::queue q,
}
}

static memcpy_direction deduce_memcpy_direction(sycl::queue q, void *to_ptr,
const void *from_ptr) {
static experimental::memcpy_direction
deduce_memcpy_direction(sycl::queue q, void *to_ptr, const void *from_ptr) {
// table[to_attribute][from_attribute]
using namespace experimental; // for memcpy_direction
static const memcpy_direction
direction_table[static_cast<unsigned>(pointer_access_attribute::end)]
[static_cast<unsigned>(pointer_access_attribute::end)] = {
{memcpy_direction::host_to_host,
memcpy_direction::device_to_host,
memcpy_direction::host_to_host},
{memcpy_direction::host_to_device,
memcpy_direction::device_to_device,
memcpy_direction::device_to_device},
{memcpy_direction::host_to_host,
memcpy_direction::device_to_device,
memcpy_direction::device_to_device}};
{host_to_host, device_to_host, host_to_host},
{host_to_device, device_to_device, device_to_device},
{host_to_host, device_to_device, device_to_device}};
return direction_table[static_cast<unsigned>(get_pointer_attribute(
q, to_ptr))][static_cast<unsigned>(get_pointer_attribute(q, from_ptr))];
}
Expand All @@ -300,35 +331,36 @@ static inline size_t get_offset(sycl::id<3> id, size_t slice, size_t pitch) {
return slice * id.get(2) + pitch * id.get(1) + id.get(0);
}

// RAII for host pointer
class host_buffer {
void *_buf;
size_t _size;
sycl::queue _q;
const std::vector<sycl::event> &_deps; // free operation depends

public:
host_buffer(size_t size, sycl::queue q, const std::vector<sycl::event> &deps)
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
void *get_ptr() const { return _buf; }
size_t get_size() const { return _size; }
~host_buffer() {
if (_buf) {
_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(_deps);
cgh.host_task([buf = _buf] { std::free(buf); });
});
}
}
};

/// copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr
/// and \p from_range to another specified by \p to_ptr and \p to_range.
static inline std::vector<sycl::event>
memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
sycl::range<3> to_range, sycl::range<3> from_range, sycl::id<3> to_id,
sycl::id<3> from_id, sycl::range<3> size,
const std::vector<sycl::event> &dep_events = {}) {
// RAII for host pointer
class host_buffer {
void *_buf;
size_t _size;
sycl::queue _q;
const std::vector<sycl::event> &_deps; // free operation depends

public:
host_buffer(size_t size, sycl::queue q,
const std::vector<sycl::event> &deps)
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
void *get_ptr() const { return _buf; }
size_t get_size() const { return _size; }
~host_buffer() {
if (_buf) {
_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(_deps);
cgh.host_task([buf = _buf] { std::free(buf); });
});
}
}
};

std::vector<sycl::event> event_list;

size_t to_slice = to_range.get(1) * to_range.get(0);
Expand All @@ -343,6 +375,7 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
return {memcpy(q, to_surface, from_surface, to_slice * size.get(2),
dep_events)};
}
using namespace experimental; // for memcpy_direction
memcpy_direction direction = deduce_memcpy_direction(q, to_ptr, from_ptr);
size_t size_slice = size.get(1) * size.get(0);
switch (direction) {
Expand Down Expand Up @@ -448,6 +481,56 @@ static sycl::event combine_events(std::vector<sycl::event> &events,

} // namespace detail

namespace experimental {
namespace detail {
static inline std::vector<sycl::event>
memcpy(sycl::queue q, const experimental::memcpy_parameter &param) {
auto to = param.to.pitched;
auto from = param.from.pitched;
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
if (param.to.image_bindless != nullptr &&
param.from.image_bindless != nullptr) {
throw std::runtime_error(
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
// TODO: Need change logic when sycl support image_mem to image_mem copy.
std::vector<sycl::event> event_list;
syclcompat::detail::host_buffer buf(param.size.size(), q, event_list);
to.set_data_ptr(buf.get_ptr());
experimental::detail::memcpy(param.from.image_bindless, param.from.pos, to,
sycl::id<3>(0, 0, 0), param.size, q);
from.set_data_ptr(buf.get_ptr());
event_list.push_back(experimental::detail::memcpy(
from, sycl::id<3>(0, 0, 0), param.to.image_bindless, param.to.pos,
param.size, q));
return event_list;
} else if (param.to.image_bindless != nullptr) {
throw std::runtime_error(
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
return {experimental::detail::memcpy(from, param.from.pos,
param.to.image_bindless, param.to.pos,
param.size, q)};
} else if (param.from.image_bindless != nullptr) {
throw std::runtime_error(
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
return {experimental::detail::memcpy(param.from.image_bindless,
param.from.pos, to, param.to.pos,
param.size, q)};
}
#endif
if (param.to.image != nullptr) {
throw std::runtime_error("[SYCLcompat] memcpy: Unsupported image API.");
to = experimental::detail::to_pitched_data(param.to.image);
}
if (param.from.image != nullptr) {
throw std::runtime_error("[SYCLcompat] memcpy: Unsupported image API.");
from = experimental::detail::to_pitched_data(param.from.image);
}
return syclcompat::detail::memcpy(q, to, param.to.pos, from, param.from.pos,
param.size);
}
} // namespace detail
} // namespace experimental

/// Allocate memory block on the device.
/// \param num_bytes Number of bytes to allocate.
/// \param q Queue to execute the allocate task.
Expand Down Expand Up @@ -757,6 +840,31 @@ static sycl::event inline fill_async(void *dev_ptr, const T &pattern,
return detail::fill(q, dev_ptr, pattern, count);
}

namespace experimental {

/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
/// The function will return after the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy(const memcpy_parameter &param,
sycl::queue q = get_default_queue()) {
sycl::event::wait(syclcompat::experimental::detail::memcpy(q, param));
}

/// [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
/// . The return of the function does NOT guarantee the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy_async(const memcpy_parameter &param,
sycl::queue q = get_default_queue()) {
syclcompat::experimental::detail::memcpy(q, param);
}
} // namespace experimental

/// Synchronously sets \p value to the first \p size bytes starting from \p
/// dev_ptr. The function will return after the memset operation is completed.
///
Expand Down
Loading