rapidsai · rapids-bot · Dec 9, 2020 · Nov 17, 2020 · Nov 17, 2020 · Nov 17, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,8 +4,11 @@
 
 ## Improvements
 
+- PR #647 Simplify `rmm::exec_policy` and refactor Thrust support
+
 ## Bug Fixes
 
+
 # RMM 0.17.0 (Date TBD)
 
 ## New Features

diff --git a/README.md b/README.md
@@ -210,8 +210,8 @@ which can lead to ambiguity in APIs when it is assigned `0`.)  All RMM stream-or
 
 `rmm::cuda_stream` is a simple owning wrapper around a CUDA `cudaStream_t`. This class provides 
 RAII semantics (constructor creates the CUDA stream, destructor destroys it). An `rmm::cuda_stream`
-can never represent the CUDA default stream or per-thread default stream, it only ever represents
-a single non-default stream. `rmm::cuda_stream` cannot be copied but can be moved.
+can never represent the CUDA default stream or per-thread default stream; it only ever represents
+a single non-default stream. `rmm::cuda_stream` cannot be copied, but can be moved.
 
 ### Thread Safety
 
@@ -377,35 +377,41 @@ See [below](#using-rmm-with-thrust) for more information on using RMM with Thrus
 
 ### `device_buffer`
 
-An untyped, unintialized RAII class for stream ordered device memory allocation.
+An untyped, uninitialized RAII class for stream ordered device memory allocation.
 
 #### Example
 
 ```c++
 cuda_stream_view s{...};
-rmm::device_buffer b{100,s}; // Allocates at least 100 bytes on stream `s` using the *default* resource
-void* p = b.data();          // Raw, untyped pointer to underlying device memory
+// Allocates at least 100 bytes on stream `s` using the *default* resource
+rmm::device_buffer b{100,s}; 
+void* p = b.data();                   // Raw, untyped pointer to underlying device memory
 
 kernel<<<..., s.value()>>>(b.data()); // `b` is only safe to use on `s`
 
 rmm::mr::device_memory_resource * mr = new my_custom_resource{...};
-rmm::device_buffer b2{100, s, mr}; // Allocates at least 100 bytes on stream `s` using the explicitly provided resource
+// Allocates at least 100 bytes on stream `s` using the resource `mr`
+rmm::device_buffer b2{100, s, mr};
 ```
 
 ### `device_uvector<T>`
-A typed, unintialized RAII class for allocation of a contiguous set of elements in device memory.
+A typed, uninitialized RAII class for allocation of a contiguous set of elements in device memory.
 Similar to a `thrust::device_vector`, but as an optimization, does not default initialize the
 contained elements. This optimization restricts the types `T` to trivially copyable types.
 
 #### Example
 
 ```c++
 cuda_stream_view s{...};
-rmm::device_uvector<int32_t> v(100, s); /// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default resource
-thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); // Initializes the elements to 0
+// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the
+// default resource
+rmm::device_uvector<int32_t> v(100, s);
+// Initializes the elements to 0
+thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); 
 
 rmm::mr::device_memory_resource * mr = new my_custom_resource{...};
-rmm::device_uvector<int32_t> v2{100, s, mr}; // Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the explicitly provided resource
+// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the resource `mr`
+rmm::device_uvector<int32_t> v2{100, s, mr}; 
 ```
 
 ### `device_scalar`
@@ -416,7 +422,8 @@ modifying the value in device memory from the host, or retrieving the value from
 #### Example
 ```c++
 cuda_stream_view s{...};
-rmm::device_scalar<int32_t> a{s}; // Allocates uninitialized storage for a single `int32_t` in device memory
+// Allocates uninitialized storage for a single `int32_t` in device memory
+rmm::device_scalar<int32_t> a{s}; 
 a.set_value(42, s); // Updates the value in device memory to `42` on stream `s`
 
 kernel<<<...,s.value()>>>(a.data()); // Pass raw pointer to underlying element in device memory
@@ -474,12 +481,8 @@ RMM provides `rmm::mr::thrust_allocator` as a conforming Thrust allocator that u
 To instruct a Thrust algorithm to use `rmm::mr::thrust_allocator` to allocate temporary storage, you
 can use the custom Thrust CUDA device execution policy: `rmm::exec_policy(stream)`.
 
-`rmm::exec_policy(stream)` returns a `std::unique_ptr` to a Thrust execution policy that uses
-`rmm::mr::thrust_allocator` for temporary allocations. In order to specify that the Thrust algorithm
-be executed on a specific stream, the usage is:
-
 ```c++
-thrust::sort(rmm::exec_policy(stream)->on(stream), ...);
+thrust::sort(rmm::exec_policy(stream, ...);
 ```
 
 The first `stream` argument is the `stream` to use for `rmm::mr::thrust_allocator`.

@@ -310,7 +310,7 @@ int main(int argc, char** argv)
       std::string mr_name = args["resource"].as<std::string>();
       declare_benchmark(mr_name);
     } else {
-      std::array<std::string, 4> mrs{"pool", "binning", "cuda", "arena"};
+      std::array<std::string, 4> mrs{"pool", "binning", "arena", "cuda"};
       std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); });
     }
     ::benchmark::RunSpecifiedBenchmarks();

@@ -19,6 +19,7 @@
 #include <benchmarks/utilities/log_parser.hpp>
 #include <benchmarks/utilities/simulated_memory_resource.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
@@ -34,16 +35,15 @@
 
 #include <benchmark/benchmark.h>
 
+#include <spdlog/common.h>
+
 #include <chrono>
 #include <iterator>
 #include <memory>
 #include <numeric>
 #include <string>
 #include <thread>
 
-#include "rmm/cuda_stream_view.hpp"
-#include "spdlog/common.h"
-
 /// MR factory functions
 std::shared_ptr<rmm::mr::device_memory_resource> make_cuda(std::size_t = 0)
 {

@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/mr/device/thrust_allocator_adaptor.hpp>
+
+#include <thrust/device_vector.h>
+
+namespace rmm {
+/**
+ * @brief Alias for a thrust::device_vector that uses RMM for memory allocation.
+ *
+ */
+template <typename T>
+using device_vector = thrust::device_vector<T, rmm::mr::thrust_allocator<T>>;
+
+}  // namespace rmm
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ @file exec_policy.hpp
+ Thrust execution policy that uses RMM's Thrust Allocator Adaptor.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/thrust_allocator_adaptor.hpp>
+
+namespace rmm {
+
+/**
+ * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation on
+ * the specified stream.
+ */
+inline auto exec_policy(cuda_stream_view stream             = cuda_stream_default,
+                        rmm::mr::device_memory_resource* mr = mr::get_current_device_resource())
+{
+  return thrust::cuda::par(rmm::mr::thrust_allocator<char>(stream, mr)).on(stream.value());
+}
+
+}  // namespace rmm
@@ -73,7 +73,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
    * @param mr The resource to be used for device memory allocation
    * @param stream The stream to be used for device memory (de)allocation
    */
-  thrust_allocator(device_memory_resource* mr, cuda_stream_view stream) : _mr(mr), _stream{stream}
+  thrust_allocator(cuda_stream_view stream, device_memory_resource* mr) : _stream{stream}, _mr(mr)
   {
   }
 
@@ -122,8 +122,8 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   cuda_stream_view stream() const noexcept { return _stream; }
 
  private:
-  device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
   cuda_stream_view _stream{};
+  device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
 };
 }  // namespace mr
 }  // namespace rmm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,35 +14,20 @@
  * limitations under the License.
  */
 
-/**
- Allocator class compatible with thrust arrays that uses RMM device memory
- manager.
-
- Author: Mark Harris
- */
-
-#ifndef THRUST_RMM_ALLOCATOR_H
-#define THRUST_RMM_ALLOCATOR_H
+#pragma once
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_vector.hpp>
 #include <rmm/mr/device/thrust_allocator_adaptor.hpp>
 
-#include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 
 namespace rmm {
-/**
- * @brief Alias for a thrust::device_vector that uses RMM for memory allocation.
- *
- */
-template <typename T>
-using device_vector = thrust::device_vector<T, rmm::mr::thrust_allocator<T>>;
 
 using par_t         = decltype(thrust::cuda::par(*(new rmm::mr::thrust_allocator<char>())));
 using deleter_t     = std::function<void(par_t *)>;
 using exec_policy_t = std::unique_ptr<par_t, deleter_t>;
 
-/* --------------------------------------------------------------------------*/
 /**
  * @brief Returns a unique_ptr to a Thrust CUDA execution policy that uses RMM
  * for temporary memory allocation.
@@ -52,8 +37,7 @@ using exec_policy_t = std::unique_ptr<par_t, deleter_t>;
  * @Returns A Thrust execution policy that will use RMM for temporary memory
  * allocation.
  */
-/* --------------------------------------------------------------------------*/
-inline exec_policy_t exec_policy(cuda_stream_view const &stream = cuda_stream_view{})
+inline exec_policy_t exec_policy(cudaStream_t stream = 0)
 {
   auto *alloc  = new rmm::mr::thrust_allocator<char>(stream);
   auto deleter = [alloc](par_t *pointer) {
@@ -66,5 +50,3 @@ inline exec_policy_t exec_policy(cuda_stream_view const &stream = cuda_stream_vi
 }
 
 }  // namespace rmm
-
-#endif  // THRUST_RMM_ALLOCATOR_H