From 02b1ecdf4128a09ec3b3254f8598d4bcdc1221de Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 21 Sep 2020 14:19:53 -0500 Subject: [PATCH 1/6] Pass by ref instead of value. --- include/rmm/device_scalar.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 80c8aef1f..82fb72a55 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -123,7 +123,7 @@ class device_scalar { * @param stream CUDA stream on which to perform the copy */ template - auto set_value(T host_value, cudaStream_t stream = 0) + auto set_value(T const &host_value, cudaStream_t stream = 0) -> std::enable_if_t::value, Dummy> { if (host_value == T{0}) { @@ -150,7 +150,7 @@ class device_scalar { * @param stream CUDA stream on which to perform the copy */ template - auto set_value(T host_value, cudaStream_t stream = 0) + auto set_value(T const &host_value, cudaStream_t stream = 0) -> std::enable_if_t::value, Dummy> { _memcpy(buffer.data(), &host_value, stream); From 44bece4c5acec71abaa3851b8409059ed27f22aa Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 21 Sep 2020 14:20:06 -0500 Subject: [PATCH 2/6] Improve docs for device_scalar asynchony. --- include/rmm/device_scalar.hpp | 40 +++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 82fb72a55..5cb9b762f 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -114,7 +114,25 @@ class device_scalar { * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling * this function, otherwise there may be a race condition. * - * Does not synchronize `stream`. + * This function does not synchronize stream `stream` before returning. Therefore, the object + * referenced by `host_value` should not be destroyed or modified until `stream` has been + * synchronized. Otherwise, behavior is undefined. + * + * @note: This function incurs a host to device memcpy and should be used sparingly. + + * Example: + * \code{cpp} + * rmm::device_scalar s; + * + * int v{42}; + * + * // Copies 42 to element 0 on `stream`. Does _not_ synchronize + * vec.set_value(v, stream); + * ... + * cudaStreamSynchronize(stream); + * // Synchronization is required before `v` can be modified + * v = 13; + * \endcode * * @throws `rmm::cuda_error` if copying `host_value` to device memory fails. * @throws `rmm::cuda_error` if synchronizing `stream` fails. @@ -141,7 +159,25 @@ class device_scalar { * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling * this function, otherwise there may be a race condition. * - * Does not synchronize `stream`. + * This function does not synchronize stream `stream` before returning. Therefore, the object + * referenced by `host_value` should not be destroyed or modified until `stream` has been + * synchronized. Otherwise, behavior is undefined. + * + * @note: This function incurs a host to device memcpy and should be used sparingly. + + * Example: + * \code{cpp} + * rmm::device_scalar s; + * + * int v{42}; + * + * // Copies 42 to element 0 on `stream`. Does _not_ synchronize + * vec.set_value(v, stream); + * ... + * cudaStreamSynchronize(stream); + * // Synchronization is required before `v` can be modified + * v = 13; + * \endcode * * @throws `rmm::cuda_error` if copying `host_value` to device memory fails * @throws `rmm::cuda_error` if synchronizing `stream` fails From ff59e62d56032c68e2953c5f676676a247902b45 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 21 Sep 2020 14:24:38 -0500 Subject: [PATCH 3/6] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74414ee32..27dacf370 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - PR #545 Fix build to support using `clang` as the host compiler - PR #534 Fix `pool_memory_resource` failure when init and max pool sizes are equal - PR #546 Remove CUDA driver linking and correct NVTX macro. +- PR #569 Correct `device_scalar::set_value` to pass host value by reference to avoid copying from invalid value # RMM 0.15.0 (26 Aug 2020) From 0518779df5a3d944f6cad9e55c044b5eec72ed80 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 21 Sep 2020 16:16:12 -0500 Subject: [PATCH 4/6] Remove extraneous stream. --- include/rmm/device_scalar.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 5cb9b762f..03028deda 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -114,7 +114,7 @@ class device_scalar { * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling * this function, otherwise there may be a race condition. * - * This function does not synchronize stream `stream` before returning. Therefore, the object + * This function does not synchronize `stream` before returning. Therefore, the object * referenced by `host_value` should not be destroyed or modified until `stream` has been * synchronized. Otherwise, behavior is undefined. * @@ -159,7 +159,7 @@ class device_scalar { * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling * this function, otherwise there may be a race condition. * - * This function does not synchronize stream `stream` before returning. Therefore, the object + * This function does not synchronize `stream` before returning. Therefore, the object * referenced by `host_value` should not be destroyed or modified until `stream` has been * synchronized. Otherwise, behavior is undefined. * From 6e4a3a7f0107e041242f9cfc66036aeaedeeba35 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 21 Sep 2020 16:16:19 -0500 Subject: [PATCH 5/6] Rename Dummy to Placeholder. --- include/rmm/device_scalar.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 03028deda..13348cbd5 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -140,9 +140,9 @@ class device_scalar { * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy */ - template + template auto set_value(T const &host_value, cudaStream_t stream = 0) - -> std::enable_if_t::value, Dummy> + -> std::enable_if_t::value, Placeholder> { if (host_value == T{0}) { RMM_CUDA_TRY(cudaMemsetAsync(buffer.data(), 0, sizeof(T), stream)); @@ -185,9 +185,9 @@ class device_scalar { * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy */ - template + template auto set_value(T const &host_value, cudaStream_t stream = 0) - -> std::enable_if_t::value, Dummy> + -> std::enable_if_t::value, Placeholder> { _memcpy(buffer.data(), &host_value, stream); } From 5bed57812f790b5fffe35e3476199016655007d1 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 21 Sep 2020 16:22:38 -0500 Subject: [PATCH 6/6] Correct copy/paste docs. --- include/rmm/device_scalar.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 13348cbd5..0d1b2639e 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -126,7 +126,7 @@ class device_scalar { * * int v{42}; * - * // Copies 42 to element 0 on `stream`. Does _not_ synchronize + * // Copies 42 to device storage on `stream`. Does _not_ synchronize * vec.set_value(v, stream); * ... * cudaStreamSynchronize(stream); @@ -171,7 +171,7 @@ class device_scalar { * * int v{42}; * - * // Copies 42 to element 0 on `stream`. Does _not_ synchronize + * // Copies 42 to device storage on `stream`. Does _not_ synchronize * vec.set_value(v, stream); * ... * cudaStreamSynchronize(stream);