rapidsai · rapids-bot · May 23, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -38,6 +38,7 @@ rapids_cmake_build_type(Release)
 # ##################################################################################################
 # * build options ----------------------------------------------------------------------------------
 
+option(USE_NVTX "Build with NVTX support" ON)
 option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
 set(RMM_LOGGING_LEVEL
@@ -46,6 +47,7 @@ set(RMM_LOGGING_LEVEL
 set_property(CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR"
                                               "CRITICAL" "OFF")
 
+message(VERBOSE "RMM: Build with NVTX support: ${USE_NVTX}")
 # Set logging level. Must go before including gtests and benchmarks. Set the possible values of
 # build type for cmake-gui
 message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
@@ -71,6 +73,7 @@ rapids_cpm_init()
 include(cmake/thirdparty/get_fmt.cmake)
 include(cmake/thirdparty/get_spdlog.cmake)
 include(cmake/thirdparty/get_cccl.cmake)
+include(cmake/thirdparty/get_nvtx.cmake)
 
 # ##################################################################################################
 # * library targets --------------------------------------------------------------------------------
@@ -93,9 +96,15 @@ target_link_libraries(rmm INTERFACE CCCL::CCCL)
 target_link_libraries(rmm INTERFACE fmt::fmt-header-only)
 target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
 target_link_libraries(rmm INTERFACE dl)
+target_link_libraries(rmm INTERFACE nvtx3-cpp)
 target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 target_compile_definitions(rmm INTERFACE LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
 
+# Disable NVTX if necessary
+if(NOT USE_NVTX)
+  target_compile_definitions(rmm PUBLIC NVTX_DISABLE)
+endif()
+
 # ##################################################################################################
 # * tests and benchmarks ---------------------------------------------------------------------------
 

@@ -0,0 +1,23 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Use CPM to find or clone NVTX3
+function(find_and_configure_nvtx3)
+
+  include(${rapids-cmake-dir}/cpm/nvtx3.cmake)
+  rapids_cpm_nvtx3(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports)
+
+endfunction()
+
+find_and_configure_nvtx3()
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <nvtx3/nvtx3.hpp>
+
+namespace rmm {
+/**
+ * @brief Tag type for librmm's NVTX domain.
+ */
+struct librmm_domain {
+  static constexpr char const* name{"librmm"};  ///< Name of the librmm domain
+};
+
+/**
+ * @brief Alias for an NVTX range in the librmm domain.
+ *
+ * Customizes an NVTX range with the given input.
+ *
+ * Example:
+ * ```
+ * void some_function(){
+ *    rmm::scoped_range rng{"custom_name"}; // Customizes range name
+ *    ...
+ * }
+ * ```
+ */
+using scoped_range = ::nvtx3::scoped_range_in<librmm_domain>;
+
+}  // namespace rmm
+
+/**
+ * @brief Convenience macro for generating an NVTX range in the `librmm` domain
+ * from the lifetime of a function.
+ *
+ * Uses the name of the immediately enclosing function returned by `__func__` to
+ * name the range.
+ *
+ * Example:
+ * ```
+ * void some_function(){
+ *    RMM_FUNC_RANGE();
+ *    ...
+ * }
+ * ```
+ */
+#define RMM_FUNC_RANGE() NVTX3_FUNC_RANGE_IN(rmm::librmm_domain)
@@ -17,6 +17,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/aligned.hpp>
+#include <rmm/detail/nvtx/ranges.hpp>
 
 #include <cuda/memory_resource>
 
@@ -117,6 +118,7 @@ class device_memory_resource {
    */
   void* allocate(std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
   {
+    RMM_FUNC_RANGE();
     return do_allocate(bytes, stream);
   }
 
@@ -138,6 +140,7 @@ class device_memory_resource {
    */
   void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
   {
+    RMM_FUNC_RANGE();
     do_deallocate(ptr, bytes, stream);
   }
 
@@ -173,6 +176,7 @@ class device_memory_resource {
    */
   void* allocate(std::size_t bytes, std::size_t alignment)
   {
+    RMM_FUNC_RANGE();
     return do_allocate(rmm::align_up(bytes, alignment), cuda_stream_view{});
   }
 
@@ -191,6 +195,7 @@ class device_memory_resource {
    */
   void deallocate(void* ptr, std::size_t bytes, std::size_t alignment)
   {
+    RMM_FUNC_RANGE();
     do_deallocate(ptr, rmm::align_up(bytes, alignment), cuda_stream_view{});
   }
 
@@ -209,6 +214,7 @@ class device_memory_resource {
    */
   void* allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream)
   {
+    RMM_FUNC_RANGE();
     return do_allocate(rmm::align_up(bytes, alignment), stream);
   }
 
@@ -226,6 +232,7 @@ class device_memory_resource {
    */
   void* allocate_async(std::size_t bytes, cuda_stream_view stream)
   {
+    RMM_FUNC_RANGE();
     return do_allocate(bytes, stream);
   }
 
@@ -248,6 +255,7 @@ class device_memory_resource {
                         std::size_t alignment,
                         cuda_stream_view stream)
   {
+    RMM_FUNC_RANGE();
     do_deallocate(ptr, rmm::align_up(bytes, alignment), stream);
   }
 
@@ -266,6 +274,7 @@ class device_memory_resource {
    */
   void deallocate_async(void* ptr, std::size_t bytes, cuda_stream_view stream)
   {
+    RMM_FUNC_RANGE();
     do_deallocate(ptr, bytes, stream);
   }
 

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,8 @@
  */
 #pragma once
 
+#include <rmm/detail/nvtx/ranges.hpp>
+
 #include <cuda/memory_resource>
 
 #include <cstddef>
@@ -76,6 +78,7 @@ class host_memory_resource {
    */
   void* allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
   {
+    RMM_FUNC_RANGE();
     return do_allocate(bytes, alignment);
   }
 
@@ -94,6 +97,7 @@ class host_memory_resource {
    */
   void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
   {
+    RMM_FUNC_RANGE();
     do_deallocate(ptr, bytes, alignment);
   }
 

@@ -18,6 +18,7 @@
 #include <rmm/aligned.hpp>
 #include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
+#include <rmm/detail/nvtx/ranges.hpp>
 
 #include <cuda/memory_resource>
 #include <cuda/stream_ref>
@@ -63,6 +64,8 @@ class pinned_host_memory_resource {
   static void* allocate(std::size_t bytes,
                         [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
+    RMM_FUNC_RANGE();
+
     // don't allocate anything if the user requested zero bytes
     if (0 == bytes) { return nullptr; }
 
@@ -84,6 +87,8 @@ class pinned_host_memory_resource {
                          std::size_t bytes,
                          std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
+    RMM_FUNC_RANGE();
+
     rmm::detail::aligned_host_deallocate(
       ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
   }
@@ -104,6 +109,8 @@ class pinned_host_memory_resource {
    */
   static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream)
   {
+    RMM_FUNC_RANGE();
+
     return allocate(bytes);
   }
 
@@ -126,6 +133,8 @@ class pinned_host_memory_resource {
                               std::size_t alignment,
                               [[maybe_unused]] cuda::stream_ref stream)
   {
+    RMM_FUNC_RANGE();
+
     return allocate(bytes, alignment);
   }
 
@@ -142,6 +151,8 @@ class pinned_host_memory_resource {
                                std::size_t bytes,
                                [[maybe_unused]] cuda::stream_ref stream) noexcept
   {
+    RMM_FUNC_RANGE();
+
     return deallocate(ptr, bytes);
   }
 
@@ -161,6 +172,8 @@ class pinned_host_memory_resource {
                                std::size_t alignment,
                                [[maybe_unused]] cuda::stream_ref stream) noexcept
   {
+    RMM_FUNC_RANGE();
+
     return deallocate(ptr, bytes, alignment);
   }
   // NOLINTEND(bugprone-easily-swappable-parameters)