From 69753c682f64d1551f5029be83be5fe85e2376e4 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 17 Nov 2020 14:13:41 +1100
Subject: [PATCH 01/10] Order arena MR before cuda MR in random allocations
 bench

---
 benchmarks/random_allocations/random_allocations.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index 98d6ea6ba..6dca2f7b9 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -310,7 +310,7 @@ int main(int argc, char** argv)
       std::string mr_name = args["resource"].as<std::string>();
       declare_benchmark(mr_name);
     } else {
-      std::array<std::string, 4> mrs{"pool", "binning", "cuda", "arena"};
+      std::array<std::string, 4> mrs{"pool", "binning", "arena", "cuda"};
       std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); });
     }
     ::benchmark::RunSpecifiedBenchmarks();

From b42f434b87ab7fe65b9ca27310600ba3642d4a02 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 17 Nov 2020 14:13:56 +1100
Subject: [PATCH 02/10] Formatting

---
 include/rmm/thrust_rmm_allocator.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h
index 70b867d00..5228f7e84 100644
--- a/include/rmm/thrust_rmm_allocator.h
+++ b/include/rmm/thrust_rmm_allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,6 @@
  * limitations under the License.
  */
 
-/**
- Allocator class compatible with thrust arrays that uses RMM device memory
- manager.
-
- Author: Mark Harris
- */
-
 #ifndef THRUST_RMM_ALLOCATOR_H
 #define THRUST_RMM_ALLOCATOR_H
 
@@ -42,7 +35,6 @@ using par_t         = decltype(thrust::cuda::par(*(new rmm::mr::thrust_allocator
 using deleter_t     = std::function<void(par_t *)>;
 using exec_policy_t = std::unique_ptr<par_t, deleter_t>;
 
-/* --------------------------------------------------------------------------*/
 /**
  * @brief Returns a unique_ptr to a Thrust CUDA execution policy that uses RMM
  * for temporary memory allocation.
@@ -52,8 +44,7 @@ using exec_policy_t = std::unique_ptr<par_t, deleter_t>;
  * @Returns A Thrust execution policy that will use RMM for temporary memory
  * allocation.
  */
-/* --------------------------------------------------------------------------*/
-inline exec_policy_t exec_policy(cuda_stream_view const &stream = cuda_stream_view{})
+inline exec_policy_t exec_policy(cudaStream_t stream = 0)
 {
   auto *alloc  = new rmm::mr::thrust_allocator<char>(stream);
   auto deleter = [alloc](par_t *pointer) {

From 7f1b5df1b3e029bfc2bf5dd1101b7b74d49b7791 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 17 Nov 2020 15:10:42 +1100
Subject: [PATCH 03/10] New thrust_rmm header with improved `exec_policy`

---
 include/rmm/thrust_rmm.hpp | 45 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 include/rmm/thrust_rmm.hpp

diff --git a/include/rmm/thrust_rmm.hpp b/include/rmm/thrust_rmm.hpp
new file mode 100644
index 000000000..0fb0a21d4
--- /dev/null
+++ b/include/rmm/thrust_rmm.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ @file thrust_rmm.hpp
+ Allocator class compatible with thrust arrays that uses RMM device memory
+ manager.
+ */
+
+#ifndef THRUST_RMM_HPP
+#define THRUST_RMM_HPP
+
+#include <rmm/thrust_rmm_allocator.h>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/thrust_allocator_adaptor.hpp>
+
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+
+namespace rmm {
+
+/**
+ * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation.
+ */
+inline auto exec_policy(cuda_stream_view stream = cuda_stream_default)
+{
+  return thrust::cuda::par(rmm::mr::thrust_allocator<char>(stream)).on(stream.value());
+}
+
+}  // namespace rmm
+
+#endif  // THRUST_RMM_HPP

From cdb44c78bc68cd9c600c14336bf0e8f86533752f Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Fri, 4 Dec 2020 14:43:44 +1100
Subject: [PATCH 04/10] Fix includes

---
 benchmarks/replay/replay.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
index 08185cee0..6fbd5f2ab 100644
--- a/benchmarks/replay/replay.cpp
+++ b/benchmarks/replay/replay.cpp
@@ -19,6 +19,7 @@
 #include <benchmarks/utilities/log_parser.hpp>
 #include <benchmarks/utilities/simulated_memory_resource.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
@@ -34,6 +35,8 @@
 
 #include <benchmark/benchmark.h>
 
+#include <spdlog/common.h>
+
 #include <chrono>
 #include <iterator>
 #include <memory>
@@ -41,9 +44,6 @@
 #include <string>
 #include <thread>
 
-#include "rmm/cuda_stream_view.hpp"
-#include "spdlog/common.h"
-
 /// MR factory functions
 std::shared_ptr<rmm::mr::device_memory_resource> make_cuda(std::size_t = 0)
 {

From d706284a215ceb2dd701139d711994618d013d0e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Fri, 4 Dec 2020 14:57:38 +1100
Subject: [PATCH 05/10] readme cleanup

---
 README.md | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 17deb97e8..014f2422a 100644
--- a/README.md
+++ b/README.md
@@ -210,8 +210,8 @@ which can lead to ambiguity in APIs when it is assigned `0`.)  All RMM stream-or
 
 `rmm::cuda_stream` is a simple owning wrapper around a CUDA `cudaStream_t`. This class provides 
 RAII semantics (constructor creates the CUDA stream, destructor destroys it). An `rmm::cuda_stream`
-can never represent the CUDA default stream or per-thread default stream, it only ever represents
-a single non-default stream. `rmm::cuda_stream` cannot be copied but can be moved.
+can never represent the CUDA default stream or per-thread default stream; it only ever represents
+a single non-default stream. `rmm::cuda_stream` cannot be copied, but can be moved.
 
 ### Thread Safety
 
@@ -377,23 +377,25 @@ See [below](#using-rmm-with-thrust) for more information on using RMM with Thrus
 
 ### `device_buffer`
 
-An untyped, unintialized RAII class for stream ordered device memory allocation.
+An untyped, uninitialized RAII class for stream ordered device memory allocation.
 
 #### Example
 
 ```c++
 cuda_stream_view s{...};
-rmm::device_buffer b{100,s}; // Allocates at least 100 bytes on stream `s` using the *default* resource
-void* p = b.data();          // Raw, untyped pointer to underlying device memory
+// Allocates at least 100 bytes on stream `s` using the *default* resource
+rmm::device_buffer b{100,s}; 
+void* p = b.data();                   // Raw, untyped pointer to underlying device memory
 
 kernel<<<..., s.value()>>>(b.data()); // `b` is only safe to use on `s`
 
 rmm::mr::device_memory_resource * mr = new my_custom_resource{...};
-rmm::device_buffer b2{100, s, mr}; // Allocates at least 100 bytes on stream `s` using the explicitly provided resource
+// Allocates at least 100 bytes on stream `s` using the resource `mr`
+rmm::device_buffer b2{100, s, mr};
 ```
 
 ### `device_uvector<T>`
-A typed, unintialized RAII class for allocation of a contiguous set of elements in device memory.
+A typed, uninitialized RAII class for allocation of a contiguous set of elements in device memory.
 Similar to a `thrust::device_vector`, but as an optimization, does not default initialize the
 contained elements. This optimization restricts the types `T` to trivially copyable types.
 
@@ -401,11 +403,14 @@ contained elements. This optimization restricts the types `T` to trivially copya
 
 ```c++
 cuda_stream_view s{...};
-rmm::device_uvector<int32_t> v(100, s); /// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default resource
-thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); // Initializes the elements to 0
+// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default 
+rmm::device_uvector<int32_t> v(100, s); resource
+// Initializes the elements to 0
+thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); 
 
 rmm::mr::device_memory_resource * mr = new my_custom_resource{...};
-rmm::device_uvector<int32_t> v2{100, s, mr}; // Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the explicitly provided resource
+// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the resource `mr`
+rmm::device_uvector<int32_t> v2{100, s, mr}; 
 ```
 
 ### `device_scalar`
@@ -416,7 +421,8 @@ modifying the value in device memory from the host, or retrieving the value from
 #### Example
 ```c++
 cuda_stream_view s{...};
-rmm::device_scalar<int32_t> a{s}; // Allocates uninitialized storage for a single `int32_t` in device memory
+// Allocates uninitialized storage for a single `int32_t` in device memory
+rmm::device_scalar<int32_t> a{s}; 
 a.set_value(42, s); // Updates the value in device memory to `42` on stream `s`
 
 kernel<<<...,s.value()>>>(a.data()); // Pass raw pointer to underlying element in device memory

From 0998fe02a57f92c0c4ff0337e492674789a2db0b Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Fri, 4 Dec 2020 15:02:17 +1100
Subject: [PATCH 06/10] Refactoring thrust support.

---
 include/rmm/device_vector.hpp                 | 31 +++++++++++++++++++
 .../rmm/{thrust_rmm.hpp => exec_policy.hpp}   | 17 +++-------
 include/rmm/thrust_rmm_allocator.h            | 13 ++------
 3 files changed, 38 insertions(+), 23 deletions(-)
 create mode 100644 include/rmm/device_vector.hpp
 rename include/rmm/{thrust_rmm.hpp => exec_policy.hpp} (75%)

diff --git a/include/rmm/device_vector.hpp b/include/rmm/device_vector.hpp
new file mode 100644
index 000000000..c38f06b66
--- /dev/null
+++ b/include/rmm/device_vector.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/mr/device/thrust_allocator_adaptor.hpp>
+
+#include <thrust/device_vector.h>
+
+namespace rmm {
+/**
+ * @brief Alias for a thrust::device_vector that uses RMM for memory allocation.
+ *
+ */
+template <typename T>
+using device_vector = thrust::device_vector<T, rmm::mr::thrust_allocator<T>>;
+
+}  // namespace rmm
diff --git a/include/rmm/thrust_rmm.hpp b/include/rmm/exec_policy.hpp
similarity index 75%
rename from include/rmm/thrust_rmm.hpp
rename to include/rmm/exec_policy.hpp
index 0fb0a21d4..1f694f5f9 100644
--- a/include/rmm/thrust_rmm.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -15,25 +15,20 @@
  */
 
 /**
- @file thrust_rmm.hpp
- Allocator class compatible with thrust arrays that uses RMM device memory
- manager.
+ @file exec_policy.hpp
+ Thrust execution policy that uses RMM's Thrust Allocator Adaptor.
  */
 
-#ifndef THRUST_RMM_HPP
-#define THRUST_RMM_HPP
+#pragma once
 
-#include <rmm/thrust_rmm_allocator.h>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/thrust_allocator_adaptor.hpp>
 
-#include <thrust/device_vector.h>
-#include <thrust/execution_policy.h>
-
 namespace rmm {
 
 /**
- * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation.
+ * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation on
+ * the specified stream.
  */
 inline auto exec_policy(cuda_stream_view stream = cuda_stream_default)
 {
@@ -41,5 +36,3 @@ inline auto exec_policy(cuda_stream_view stream = cuda_stream_default)
 }
 
 }  // namespace rmm
-
-#endif  // THRUST_RMM_HPP
diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h
index 5228f7e84..30a664afd 100644
--- a/include/rmm/thrust_rmm_allocator.h
+++ b/include/rmm/thrust_rmm_allocator.h
@@ -14,22 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef THRUST_RMM_ALLOCATOR_H
-#define THRUST_RMM_ALLOCATOR_H
+#pragma once
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_vector.hpp>
 #include <rmm/mr/device/thrust_allocator_adaptor.hpp>
 
-#include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 
 namespace rmm {
-/**
- * @brief Alias for a thrust::device_vector that uses RMM for memory allocation.
- *
- */
-template <typename T>
-using device_vector = thrust::device_vector<T, rmm::mr::thrust_allocator<T>>;
 
 using par_t         = decltype(thrust::cuda::par(*(new rmm::mr::thrust_allocator<char>())));
 using deleter_t     = std::function<void(par_t *)>;
@@ -57,5 +50,3 @@ inline exec_policy_t exec_policy(cudaStream_t stream = 0)
 }
 
 }  // namespace rmm
-
-#endif  // THRUST_RMM_ALLOCATOR_H

From 8bb59bf24ccd1006b983a69d0d5dabb1e91f6946 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 8 Dec 2020 10:25:23 +1100
Subject: [PATCH 07/10] Reorder stream, mr

---
 include/rmm/mr/device/thrust_allocator_adaptor.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index 77b7bd222..e7acd00fd 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -73,7 +73,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
    * @param mr The resource to be used for device memory allocation
    * @param stream The stream to be used for device memory (de)allocation
    */
-  thrust_allocator(device_memory_resource* mr, cuda_stream_view stream) : _mr(mr), _stream{stream}
+  thrust_allocator(cuda_stream_view stream, device_memory_resource* mr) : _stream{stream}, _mr(mr)
   {
   }
 
@@ -122,8 +122,8 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   cuda_stream_view stream() const noexcept { return _stream; }
 
  private:
-  device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
   cuda_stream_view _stream{};
+  device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
 };
 }  // namespace mr
 }  // namespace rmm

From bef943d835bc91d33c48beb6eb7680312c842686 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 8 Dec 2020 10:26:42 +1100
Subject: [PATCH 08/10] Update readme

---
 README.md | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 014f2422a..750ab97c3 100644
--- a/README.md
+++ b/README.md
@@ -403,8 +403,9 @@ contained elements. This optimization restricts the types `T` to trivially copya
 
 ```c++
 cuda_stream_view s{...};
-// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the default 
-rmm::device_uvector<int32_t> v(100, s); resource
+// Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the
+// default resource
+rmm::device_uvector<int32_t> v(100, s);
 // Initializes the elements to 0
 thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); 
 
@@ -480,12 +481,8 @@ RMM provides `rmm::mr::thrust_allocator` as a conforming Thrust allocator that u
 To instruct a Thrust algorithm to use `rmm::mr::thrust_allocator` to allocate temporary storage, you
 can use the custom Thrust CUDA device execution policy: `rmm::exec_policy(stream)`.
 
-`rmm::exec_policy(stream)` returns a `std::unique_ptr` to a Thrust execution policy that uses
-`rmm::mr::thrust_allocator` for temporary allocations. In order to specify that the Thrust algorithm
-be executed on a specific stream, the usage is:
-
 ```c++
-thrust::sort(rmm::exec_policy(stream)->on(stream), ...);
+thrust::sort(rmm::exec_policy(stream, ...);
 ```
 
 The first `stream` argument is the `stream` to use for `rmm::mr::thrust_allocator`.

From f4108ef025de29079dcd4fdbb4632dcdfd5c946d Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 8 Dec 2020 10:27:38 +1100
Subject: [PATCH 09/10] Add optional MR parameter to exec_policy

---
 include/rmm/exec_policy.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
index 1f694f5f9..34af512e7 100644
--- a/include/rmm/exec_policy.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -30,9 +30,10 @@ namespace rmm {
  * @brief Returns a Thrust CUDA execution policy that uses RMM for temporary memory allocation on
  * the specified stream.
  */
-inline auto exec_policy(cuda_stream_view stream = cuda_stream_default)
+inline auto exec_policy(cuda_stream_view stream             = cuda_stream_default,
+                        rmm::mr::device_memory_resource* mr = mr::get_current_device_resource())
 {
-  return thrust::cuda::par(rmm::mr::thrust_allocator<char>(stream)).on(stream.value());
+  return thrust::cuda::par(rmm::mr::thrust_allocator<char>(stream, mr)).on(stream.value());
 }
 
 }  // namespace rmm

From fac037092d938dab55a1620e90ef050bc7ed5acf Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 8 Dec 2020 10:44:49 +1100
Subject: [PATCH 10/10] Changelog for #647

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 479582348..00e36edd9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,8 +4,11 @@
 
 ## Improvements
 
+- PR #647 Simplify `rmm::exec_policy` and refactor Thrust support
+
 ## Bug Fixes
 
+
 # RMM 0.17.0 (Date TBD)
 
 ## New Features