diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c2182a2d..a1d5d5631 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - PR #545 Fix build to support using `clang` as the host compiler - PR #534 Fix `pool_memory_resource` failure when init and max pool sizes are equal - PR #546 Remove CUDA driver linking and correct NVTX macro. +- PR #569 Correct `device_scalar::set_value` to pass host value by reference to avoid copying from invalid value - PR #559 Fix `align_down` to only change unaligned values. diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 80c8aef1f..0d1b2639e 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -114,7 +114,25 @@ class device_scalar { * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling * this function, otherwise there may be a race condition. * - * Does not synchronize `stream`. + * This function does not synchronize `stream` before returning. Therefore, the object + * referenced by `host_value` should not be destroyed or modified until `stream` has been + * synchronized. Otherwise, behavior is undefined. + * + * @note: This function incurs a host to device memcpy and should be used sparingly. + + * Example: + * \code{cpp} + * rmm::device_scalar s; + * + * int v{42}; + * + * // Copies 42 to device storage on `stream`. Does _not_ synchronize + * vec.set_value(v, stream); + * ... + * cudaStreamSynchronize(stream); + * // Synchronization is required before `v` can be modified + * v = 13; + * \endcode * * @throws `rmm::cuda_error` if copying `host_value` to device memory fails. * @throws `rmm::cuda_error` if synchronizing `stream` fails. @@ -122,9 +140,9 @@ class device_scalar { * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy */ - template - auto set_value(T host_value, cudaStream_t stream = 0) - -> std::enable_if_t::value, Dummy> + template + auto set_value(T const &host_value, cudaStream_t stream = 0) + -> std::enable_if_t::value, Placeholder> { if (host_value == T{0}) { RMM_CUDA_TRY(cudaMemsetAsync(buffer.data(), 0, sizeof(T), stream)); @@ -141,7 +159,25 @@ class device_scalar { * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling * this function, otherwise there may be a race condition. * - * Does not synchronize `stream`. + * This function does not synchronize `stream` before returning. Therefore, the object + * referenced by `host_value` should not be destroyed or modified until `stream` has been + * synchronized. Otherwise, behavior is undefined. + * + * @note: This function incurs a host to device memcpy and should be used sparingly. + + * Example: + * \code{cpp} + * rmm::device_scalar s; + * + * int v{42}; + * + * // Copies 42 to device storage on `stream`. Does _not_ synchronize + * vec.set_value(v, stream); + * ... + * cudaStreamSynchronize(stream); + * // Synchronization is required before `v` can be modified + * v = 13; + * \endcode * * @throws `rmm::cuda_error` if copying `host_value` to device memory fails * @throws `rmm::cuda_error` if synchronizing `stream` fails @@ -149,9 +185,9 @@ class device_scalar { * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy */ - template - auto set_value(T host_value, cudaStream_t stream = 0) - -> std::enable_if_t::value, Dummy> + template + auto set_value(T const &host_value, cudaStream_t stream = 0) + -> std::enable_if_t::value, Placeholder> { _memcpy(buffer.data(), &host_value, stream); }