diff --git a/CHANGELOG.md b/CHANGELOG.md index 479582348..00e36edd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,11 @@ ## Improvements +- PR #647 Simplify `rmm::exec_policy` and refactor Thrust support + ## Bug Fixes + # RMM 0.17.0 (Date TBD) ## New Features diff --git a/README.md b/README.md index 17deb97e8..750ab97c3 100644 --- a/README.md +++ b/README.md @@ -210,8 +210,8 @@ which can lead to ambiguity in APIs when it is assigned `0`.) All RMM stream-or `rmm::cuda_stream` is a simple owning wrapper around a CUDA `cudaStream_t`. This class provides RAII semantics (constructor creates the CUDA stream, destructor destroys it). An `rmm::cuda_stream` -can never represent the CUDA default stream or per-thread default stream, it only ever represents -a single non-default stream. `rmm::cuda_stream` cannot be copied but can be moved. +can never represent the CUDA default stream or per-thread default stream; it only ever represents +a single non-default stream. `rmm::cuda_stream` cannot be copied, but can be moved. ### Thread Safety @@ -377,23 +377,25 @@ See [below](#using-rmm-with-thrust) for more information on using RMM with Thrus ### `device_buffer` -An untyped, unintialized RAII class for stream ordered device memory allocation. +An untyped, uninitialized RAII class for stream ordered device memory allocation. #### Example ```c++ cuda_stream_view s{...}; -rmm::device_buffer b{100,s}; // Allocates at least 100 bytes on stream `s` using the *default* resource -void* p = b.data(); // Raw, untyped pointer to underlying device memory +// Allocates at least 100 bytes on stream `s` using the *default* resource +rmm::device_buffer b{100,s}; +void* p = b.data(); // Raw, untyped pointer to underlying device memory kernel<<<..., s.value()>>>(b.data()); // `b` is only safe to use on `s` rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; -rmm::device_buffer b2{100, s, mr}; // Allocates at least 100 bytes on stream `s` using the explicitly provided resource +// Allocates at least 100 bytes on stream `s` using the resource `mr` +rmm::device_buffer b2{100, s, mr}; ``` ### `device_uvector` -A typed, unintialized RAII class for allocation of a contiguous set of elements in device memory. +A typed, uninitialized RAII class for allocation of a contiguous set of elements in device memory. Similar to a `thrust::device_vector`, but as an optimization, does not default initialize the contained elements. This optimization restricts the types `T` to trivially copyable types. @@ -401,11 +403,15 @@ contained elements. This optimization restricts the types `T` to trivially copya ```c++ cuda_stream_view s{...}; -rmm::device_uvector v(100, s); /// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default resource -thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); // Initializes the elements to 0 +// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the +// default resource +rmm::device_uvector v(100, s); +// Initializes the elements to 0 +thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; -rmm::device_uvector v2{100, s, mr}; // Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the explicitly provided resource +// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the resource `mr` +rmm::device_uvector v2{100, s, mr}; ``` ### `device_scalar` @@ -416,7 +422,8 @@ modifying the value in device memory from the host, or retrieving the value from #### Example ```c++ cuda_stream_view s{...}; -rmm::device_scalar a{s}; // Allocates uninitialized storage for a single `int32_t` in device memory +// Allocates uninitialized storage for a single `int32_t` in device memory +rmm::device_scalar a{s}; a.set_value(42, s); // Updates the value in device memory to `42` on stream `s` kernel<<<...,s.value()>>>(a.data()); // Pass raw pointer to underlying element in device memory @@ -474,12 +481,8 @@ RMM provides `rmm::mr::thrust_allocator` as a conforming Thrust allocator that u To instruct a Thrust algorithm to use `rmm::mr::thrust_allocator` to allocate temporary storage, you can use the custom Thrust CUDA device execution policy: `rmm::exec_policy(stream)`. -`rmm::exec_policy(stream)` returns a `std::unique_ptr` to a Thrust execution policy that uses -`rmm::mr::thrust_allocator` for temporary allocations. In order to specify that the Thrust algorithm -be executed on a specific stream, the usage is: - ```c++ -thrust::sort(rmm::exec_policy(stream)->on(stream), ...); +thrust::sort(rmm::exec_policy(stream, ...); ``` The first `stream` argument is the `stream` to use for `rmm::mr::thrust_allocator`. diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 98d6ea6ba..6dca2f7b9 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -310,7 +310,7 @@ int main(int argc, char** argv) std::string mr_name = args["resource"].as(); declare_benchmark(mr_name); } else { - std::array mrs{"pool", "binning", "cuda", "arena"}; + std::array mrs{"pool", "binning", "arena", "cuda"}; std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); }); } ::benchmark::RunSpecifiedBenchmarks(); diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 08185cee0..6fbd5f2ab 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,8 @@ #include +#include + #include #include #include @@ -41,9 +44,6 @@ #include #include -#include "rmm/cuda_stream_view.hpp" -#include "spdlog/common.h" - /// MR factory functions std::shared_ptr make_cuda(std::size_t = 0) { diff --git a/include/rmm/device_vector.hpp b/include/rmm/device_vector.hpp new file mode 100644 index 000000000..c38f06b66 --- /dev/null +++ b/include/rmm/device_vector.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace rmm { +/** + * @brief Alias for a thrust::device_vector that uses RMM for memory allocation. + * + */ +template +using device_vector = thrust::device_vector>; + +} // namespace rmm diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp new file mode 100644 index 000000000..34af512e7 --- /dev/null +++ b/include/rmm/exec_policy.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + @file exec_policy.hpp + Thrust execution policy that uses RMM's Thrust Allocator Adaptor. + */ + +#pragma once + +#include +#include + +namespace rmm { + +/** + * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation on + * the specified stream. + */ +inline auto exec_policy(cuda_stream_view stream = cuda_stream_default, + rmm::mr::device_memory_resource* mr = mr::get_current_device_resource()) +{ + return thrust::cuda::par(rmm::mr::thrust_allocator(stream, mr)).on(stream.value()); +} + +} // namespace rmm diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp index 77b7bd222..e7acd00fd 100644 --- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -73,7 +73,7 @@ class thrust_allocator : public thrust::device_malloc_allocator { * @param mr The resource to be used for device memory allocation * @param stream The stream to be used for device memory (de)allocation */ - thrust_allocator(device_memory_resource* mr, cuda_stream_view stream) : _mr(mr), _stream{stream} + thrust_allocator(cuda_stream_view stream, device_memory_resource* mr) : _stream{stream}, _mr(mr) { } @@ -122,8 +122,8 @@ class thrust_allocator : public thrust::device_malloc_allocator { cuda_stream_view stream() const noexcept { return _stream; } private: - device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; cuda_stream_view _stream{}; + device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; }; } // namespace mr } // namespace rmm diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h index 70b867d00..30a664afd 100644 --- a/include/rmm/thrust_rmm_allocator.h +++ b/include/rmm/thrust_rmm_allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. + * Copyright (c) 2018-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,35 +14,20 @@ * limitations under the License. */ -/** - Allocator class compatible with thrust arrays that uses RMM device memory - manager. - - Author: Mark Harris - */ - -#ifndef THRUST_RMM_ALLOCATOR_H -#define THRUST_RMM_ALLOCATOR_H +#pragma once #include +#include #include -#include #include namespace rmm { -/** - * @brief Alias for a thrust::device_vector that uses RMM for memory allocation. - * - */ -template -using device_vector = thrust::device_vector>; using par_t = decltype(thrust::cuda::par(*(new rmm::mr::thrust_allocator()))); using deleter_t = std::function; using exec_policy_t = std::unique_ptr; -/* --------------------------------------------------------------------------*/ /** * @brief Returns a unique_ptr to a Thrust CUDA execution policy that uses RMM * for temporary memory allocation. @@ -52,8 +37,7 @@ using exec_policy_t = std::unique_ptr; * @Returns A Thrust execution policy that will use RMM for temporary memory * allocation. */ -/* --------------------------------------------------------------------------*/ -inline exec_policy_t exec_policy(cuda_stream_view const &stream = cuda_stream_view{}) +inline exec_policy_t exec_policy(cudaStream_t stream = 0) { auto *alloc = new rmm::mr::thrust_allocator(stream); auto deleter = [alloc](par_t *pointer) { @@ -66,5 +50,3 @@ inline exec_policy_t exec_policy(cuda_stream_view const &stream = cuda_stream_vi } } // namespace rmm - -#endif // THRUST_RMM_ALLOCATOR_H