From 69753c682f64d1551f5029be83be5fe85e2376e4 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 17 Nov 2020 14:13:41 +1100 Subject: [PATCH 01/10] Order arena MR before cuda MR in random allocations bench --- benchmarks/random_allocations/random_allocations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 98d6ea6ba..6dca2f7b9 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -310,7 +310,7 @@ int main(int argc, char** argv) std::string mr_name = args["resource"].as(); declare_benchmark(mr_name); } else { - std::array mrs{"pool", "binning", "cuda", "arena"}; + std::array mrs{"pool", "binning", "arena", "cuda"}; std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); }); } ::benchmark::RunSpecifiedBenchmarks(); From b42f434b87ab7fe65b9ca27310600ba3642d4a02 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 17 Nov 2020 14:13:56 +1100 Subject: [PATCH 02/10] Formatting --- include/rmm/thrust_rmm_allocator.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h index 70b867d00..5228f7e84 100644 --- a/include/rmm/thrust_rmm_allocator.h +++ b/include/rmm/thrust_rmm_allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. + * Copyright (c) 2018-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,6 @@ * limitations under the License. */ -/** - Allocator class compatible with thrust arrays that uses RMM device memory - manager. - - Author: Mark Harris - */ - #ifndef THRUST_RMM_ALLOCATOR_H #define THRUST_RMM_ALLOCATOR_H @@ -42,7 +35,6 @@ using par_t = decltype(thrust::cuda::par(*(new rmm::mr::thrust_allocator using deleter_t = std::function; using exec_policy_t = std::unique_ptr; -/* --------------------------------------------------------------------------*/ /** * @brief Returns a unique_ptr to a Thrust CUDA execution policy that uses RMM * for temporary memory allocation. @@ -52,8 +44,7 @@ using exec_policy_t = std::unique_ptr; * @Returns A Thrust execution policy that will use RMM for temporary memory * allocation. */ -/* --------------------------------------------------------------------------*/ -inline exec_policy_t exec_policy(cuda_stream_view const &stream = cuda_stream_view{}) +inline exec_policy_t exec_policy(cudaStream_t stream = 0) { auto *alloc = new rmm::mr::thrust_allocator(stream); auto deleter = [alloc](par_t *pointer) { From 7f1b5df1b3e029bfc2bf5dd1101b7b74d49b7791 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 17 Nov 2020 15:10:42 +1100 Subject: [PATCH 03/10] New thrust_rmm header with improved `exec_policy` --- include/rmm/thrust_rmm.hpp | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 include/rmm/thrust_rmm.hpp diff --git a/include/rmm/thrust_rmm.hpp b/include/rmm/thrust_rmm.hpp new file mode 100644 index 000000000..0fb0a21d4 --- /dev/null +++ b/include/rmm/thrust_rmm.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + @file thrust_rmm.hpp + Allocator class compatible with thrust arrays that uses RMM device memory + manager. + */ + +#ifndef THRUST_RMM_HPP +#define THRUST_RMM_HPP + +#include +#include +#include + +#include +#include + +namespace rmm { + +/** + * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation. + */ +inline auto exec_policy(cuda_stream_view stream = cuda_stream_default) +{ + return thrust::cuda::par(rmm::mr::thrust_allocator(stream)).on(stream.value()); +} + +} // namespace rmm + +#endif // THRUST_RMM_HPP From cdb44c78bc68cd9c600c14336bf0e8f86533752f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Fri, 4 Dec 2020 14:43:44 +1100 Subject: [PATCH 04/10] Fix includes --- benchmarks/replay/replay.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 08185cee0..6fbd5f2ab 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,8 @@ #include +#include + #include #include #include @@ -41,9 +44,6 @@ #include #include -#include "rmm/cuda_stream_view.hpp" -#include "spdlog/common.h" - /// MR factory functions std::shared_ptr make_cuda(std::size_t = 0) { From d706284a215ceb2dd701139d711994618d013d0e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Fri, 4 Dec 2020 14:57:38 +1100 Subject: [PATCH 05/10] readme cleanup --- README.md | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 17deb97e8..014f2422a 100644 --- a/README.md +++ b/README.md @@ -210,8 +210,8 @@ which can lead to ambiguity in APIs when it is assigned `0`.) All RMM stream-or `rmm::cuda_stream` is a simple owning wrapper around a CUDA `cudaStream_t`. This class provides RAII semantics (constructor creates the CUDA stream, destructor destroys it). An `rmm::cuda_stream` -can never represent the CUDA default stream or per-thread default stream, it only ever represents -a single non-default stream. `rmm::cuda_stream` cannot be copied but can be moved. +can never represent the CUDA default stream or per-thread default stream; it only ever represents +a single non-default stream. `rmm::cuda_stream` cannot be copied, but can be moved. ### Thread Safety @@ -377,23 +377,25 @@ See [below](#using-rmm-with-thrust) for more information on using RMM with Thrus ### `device_buffer` -An untyped, unintialized RAII class for stream ordered device memory allocation. +An untyped, uninitialized RAII class for stream ordered device memory allocation. #### Example ```c++ cuda_stream_view s{...}; -rmm::device_buffer b{100,s}; // Allocates at least 100 bytes on stream `s` using the *default* resource -void* p = b.data(); // Raw, untyped pointer to underlying device memory +// Allocates at least 100 bytes on stream `s` using the *default* resource +rmm::device_buffer b{100,s}; +void* p = b.data(); // Raw, untyped pointer to underlying device memory kernel<<<..., s.value()>>>(b.data()); // `b` is only safe to use on `s` rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; -rmm::device_buffer b2{100, s, mr}; // Allocates at least 100 bytes on stream `s` using the explicitly provided resource +// Allocates at least 100 bytes on stream `s` using the resource `mr` +rmm::device_buffer b2{100, s, mr}; ``` ### `device_uvector` -A typed, unintialized RAII class for allocation of a contiguous set of elements in device memory. +A typed, uninitialized RAII class for allocation of a contiguous set of elements in device memory. Similar to a `thrust::device_vector`, but as an optimization, does not default initialize the contained elements. This optimization restricts the types `T` to trivially copyable types. @@ -401,11 +403,14 @@ contained elements. This optimization restricts the types `T` to trivially copya ```c++ cuda_stream_view s{...}; -rmm::device_uvector v(100, s); /// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default resource -thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); // Initializes the elements to 0 +// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default +rmm::device_uvector v(100, s); resource +// Initializes the elements to 0 +thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; -rmm::device_uvector v2{100, s, mr}; // Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the explicitly provided resource +// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the resource `mr` +rmm::device_uvector v2{100, s, mr}; ``` ### `device_scalar` @@ -416,7 +421,8 @@ modifying the value in device memory from the host, or retrieving the value from #### Example ```c++ cuda_stream_view s{...}; -rmm::device_scalar a{s}; // Allocates uninitialized storage for a single `int32_t` in device memory +// Allocates uninitialized storage for a single `int32_t` in device memory +rmm::device_scalar a{s}; a.set_value(42, s); // Updates the value in device memory to `42` on stream `s` kernel<<<...,s.value()>>>(a.data()); // Pass raw pointer to underlying element in device memory From 0998fe02a57f92c0c4ff0337e492674789a2db0b Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Fri, 4 Dec 2020 15:02:17 +1100 Subject: [PATCH 06/10] Refactoring thrust support. --- include/rmm/device_vector.hpp | 31 +++++++++++++++++++ .../rmm/{thrust_rmm.hpp => exec_policy.hpp} | 17 +++------- include/rmm/thrust_rmm_allocator.h | 13 ++------ 3 files changed, 38 insertions(+), 23 deletions(-) create mode 100644 include/rmm/device_vector.hpp rename include/rmm/{thrust_rmm.hpp => exec_policy.hpp} (75%) diff --git a/include/rmm/device_vector.hpp b/include/rmm/device_vector.hpp new file mode 100644 index 000000000..c38f06b66 --- /dev/null +++ b/include/rmm/device_vector.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace rmm { +/** + * @brief Alias for a thrust::device_vector that uses RMM for memory allocation. + * + */ +template +using device_vector = thrust::device_vector>; + +} // namespace rmm diff --git a/include/rmm/thrust_rmm.hpp b/include/rmm/exec_policy.hpp similarity index 75% rename from include/rmm/thrust_rmm.hpp rename to include/rmm/exec_policy.hpp index 0fb0a21d4..1f694f5f9 100644 --- a/include/rmm/thrust_rmm.hpp +++ b/include/rmm/exec_policy.hpp @@ -15,25 +15,20 @@ */ /** - @file thrust_rmm.hpp - Allocator class compatible with thrust arrays that uses RMM device memory - manager. + @file exec_policy.hpp + Thrust execution policy that uses RMM's Thrust Allocator Adaptor. */ -#ifndef THRUST_RMM_HPP -#define THRUST_RMM_HPP +#pragma once -#include #include #include -#include -#include - namespace rmm { /** - * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation. + * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation on + * the specified stream. */ inline auto exec_policy(cuda_stream_view stream = cuda_stream_default) { @@ -41,5 +36,3 @@ inline auto exec_policy(cuda_stream_view stream = cuda_stream_default) } } // namespace rmm - -#endif // THRUST_RMM_HPP diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h index 5228f7e84..30a664afd 100644 --- a/include/rmm/thrust_rmm_allocator.h +++ b/include/rmm/thrust_rmm_allocator.h @@ -14,22 +14,15 @@ * limitations under the License. */ -#ifndef THRUST_RMM_ALLOCATOR_H -#define THRUST_RMM_ALLOCATOR_H +#pragma once #include +#include #include -#include #include namespace rmm { -/** - * @brief Alias for a thrust::device_vector that uses RMM for memory allocation. - * - */ -template -using device_vector = thrust::device_vector>; using par_t = decltype(thrust::cuda::par(*(new rmm::mr::thrust_allocator()))); using deleter_t = std::function; @@ -57,5 +50,3 @@ inline exec_policy_t exec_policy(cudaStream_t stream = 0) } } // namespace rmm - -#endif // THRUST_RMM_ALLOCATOR_H From 8bb59bf24ccd1006b983a69d0d5dabb1e91f6946 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 8 Dec 2020 10:25:23 +1100 Subject: [PATCH 07/10] Reorder stream, mr --- include/rmm/mr/device/thrust_allocator_adaptor.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp index 77b7bd222..e7acd00fd 100644 --- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -73,7 +73,7 @@ class thrust_allocator : public thrust::device_malloc_allocator { * @param mr The resource to be used for device memory allocation * @param stream The stream to be used for device memory (de)allocation */ - thrust_allocator(device_memory_resource* mr, cuda_stream_view stream) : _mr(mr), _stream{stream} + thrust_allocator(cuda_stream_view stream, device_memory_resource* mr) : _stream{stream}, _mr(mr) { } @@ -122,8 +122,8 @@ class thrust_allocator : public thrust::device_malloc_allocator { cuda_stream_view stream() const noexcept { return _stream; } private: - device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; cuda_stream_view _stream{}; + device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; }; } // namespace mr } // namespace rmm From bef943d835bc91d33c48beb6eb7680312c842686 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 8 Dec 2020 10:26:42 +1100 Subject: [PATCH 08/10] Update readme --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 014f2422a..750ab97c3 100644 --- a/README.md +++ b/README.md @@ -403,8 +403,9 @@ contained elements. This optimization restricts the types `T` to trivially copya ```c++ cuda_stream_view s{...}; -// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default -rmm::device_uvector v(100, s); resource +// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the +// default resource +rmm::device_uvector v(100, s); // Initializes the elements to 0 thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); @@ -480,12 +481,8 @@ RMM provides `rmm::mr::thrust_allocator` as a conforming Thrust allocator that u To instruct a Thrust algorithm to use `rmm::mr::thrust_allocator` to allocate temporary storage, you can use the custom Thrust CUDA device execution policy: `rmm::exec_policy(stream)`. -`rmm::exec_policy(stream)` returns a `std::unique_ptr` to a Thrust execution policy that uses -`rmm::mr::thrust_allocator` for temporary allocations. In order to specify that the Thrust algorithm -be executed on a specific stream, the usage is: - ```c++ -thrust::sort(rmm::exec_policy(stream)->on(stream), ...); +thrust::sort(rmm::exec_policy(stream, ...); ``` The first `stream` argument is the `stream` to use for `rmm::mr::thrust_allocator`. From f4108ef025de29079dcd4fdbb4632dcdfd5c946d Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 8 Dec 2020 10:27:38 +1100 Subject: [PATCH 09/10] Add optional MR parameter to exec_policy --- include/rmm/exec_policy.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp index 1f694f5f9..34af512e7 100644 --- a/include/rmm/exec_policy.hpp +++ b/include/rmm/exec_policy.hpp @@ -30,9 +30,10 @@ namespace rmm { * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation on * the specified stream. */ -inline auto exec_policy(cuda_stream_view stream = cuda_stream_default) +inline auto exec_policy(cuda_stream_view stream = cuda_stream_default, + rmm::mr::device_memory_resource* mr = mr::get_current_device_resource()) { - return thrust::cuda::par(rmm::mr::thrust_allocator(stream)).on(stream.value()); + return thrust::cuda::par(rmm::mr::thrust_allocator(stream, mr)).on(stream.value()); } } // namespace rmm From fac037092d938dab55a1620e90ef050bc7ed5acf Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 8 Dec 2020 10:44:49 +1100 Subject: [PATCH 10/10] Changelog for #647 --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 479582348..00e36edd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,11 @@ ## Improvements +- PR #647 Simplify `rmm::exec_policy` and refactor Thrust support + ## Bug Fixes + # RMM 0.17.0 (Date TBD) ## New Features