Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NVTX support and RMM_FUNC_RANGE() macro #1558

Merged
merged 13 commits into from
May 23, 2024
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -38,6 +38,7 @@ rapids_cmake_build_type(Release)
# ##################################################################################################
# * build options ----------------------------------------------------------------------------------

option(USE_NVTX "Build with NVTX support" ON)
option(BUILD_TESTS "Configure CMake to build tests" ON)
option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
set(RMM_LOGGING_LEVEL
Expand All @@ -46,6 +47,7 @@ set(RMM_LOGGING_LEVEL
set_property(CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR"
"CRITICAL" "OFF")

message(VERBOSE "RMM: Build with NVTX support: ${USE_NVTX}")
# Set logging level. Must go before including gtests and benchmarks. Set the possible values of
# build type for cmake-gui
message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
Expand All @@ -71,6 +73,7 @@ rapids_cpm_init()
include(cmake/thirdparty/get_fmt.cmake)
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_cccl.cmake)
include(cmake/thirdparty/get_nvtx.cmake)

# ##################################################################################################
# * library targets --------------------------------------------------------------------------------
Expand All @@ -93,9 +96,15 @@ target_link_libraries(rmm INTERFACE CCCL::CCCL)
target_link_libraries(rmm INTERFACE fmt::fmt-header-only)
target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
target_link_libraries(rmm INTERFACE dl)
target_link_libraries(rmm INTERFACE nvtx3-cpp)
target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
target_compile_definitions(rmm INTERFACE LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)

# Disable NVTX if necessary
if(NOT USE_NVTX)
target_compile_definitions(rmm PUBLIC NVTX_DISABLE)
vyasr marked this conversation as resolved.
Show resolved Hide resolved
endif()

# ##################################################################################################
# * tests and benchmarks ---------------------------------------------------------------------------

Expand Down
23 changes: 23 additions & 0 deletions cmake/thirdparty/get_nvtx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# =============================================================================
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone NVTX3
function(find_and_configure_nvtx3)

include(${rapids-cmake-dir}/cpm/nvtx3.cmake)
rapids_cpm_nvtx3(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports)

endfunction()

find_and_configure_nvtx3()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH I find these extra modules to be overkill when they're so trivial and would prefer that we inline them. In a few of the more recent cases we have done so. @robertmaynard WDYT? This feels like a pattern that's been copy-pasted many times into a number of scenarios where it isn't really needed.

Specifically I'm talking about just moving lines 18/19 into CMakeLists.txt and removing this file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I agree, I also feel that this PR is consistent with the style of 3 other rapids-cmake packages used in RMM (CCCL, fmt, spdlog -- which has additional local cmake code for some reason). Therefore I would like to defer this change to a later PR, which should be applied in unison with the same style change across all of RAPIDS.

61 changes: 61 additions & 0 deletions include/rmm/detail/nvtx/ranges.hpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It really makes me happy to see how simple this is :)

Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <nvtx3/nvtx3.hpp>

namespace rmm {
/**
* @brief Tag type for librmm's NVTX domain.
*/
struct librmm_domain {
static constexpr char const* name{"librmm"}; ///< Name of the librmm domain
};

/**
* @brief Alias for an NVTX range in the librmm domain.
*
* Customizes an NVTX range with the given input.
*
* Example:
* ```
* void some_function(){
* rmm::scoped_range rng{"custom_name"}; // Customizes range name
* ...
* }
* ```
*/
using scoped_range = ::nvtx3::scoped_range_in<librmm_domain>;

} // namespace rmm

/**
* @brief Convenience macro for generating an NVTX range in the `librmm` domain
* from the lifetime of a function.
*
* Uses the name of the immediately enclosing function returned by `__func__` to
* name the range.
*
* Example:
* ```
* void some_function(){
* RMM_FUNC_RANGE();
* ...
* }
* ```
*/
#define RMM_FUNC_RANGE() NVTX3_FUNC_RANGE_IN(rmm::librmm_domain)
9 changes: 9 additions & 0 deletions include/rmm/mr/device/device_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/aligned.hpp>
#include <rmm/detail/nvtx/ranges.hpp>

#include <cuda/memory_resource>

Expand Down Expand Up @@ -117,6 +118,7 @@ class device_memory_resource {
*/
void* allocate(std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
{
RMM_FUNC_RANGE();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Do we want the same set of annotations for the host_memory_resource as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I added them. Note that the new pinned_host_memory_resource does not derive from host_memory_resource so I had to add ranges to it explicitly. This is the way it will have to be done for all MRs that just implement the concepts in the future, unfortunately.

return do_allocate(bytes, stream);
}

Expand All @@ -138,6 +140,7 @@ class device_memory_resource {
*/
void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
{
RMM_FUNC_RANGE();
do_deallocate(ptr, bytes, stream);
}

Expand Down Expand Up @@ -173,6 +176,7 @@ class device_memory_resource {
*/
void* allocate(std::size_t bytes, std::size_t alignment)
{
RMM_FUNC_RANGE();
return do_allocate(rmm::align_up(bytes, alignment), cuda_stream_view{});
}

Expand All @@ -191,6 +195,7 @@ class device_memory_resource {
*/
void deallocate(void* ptr, std::size_t bytes, std::size_t alignment)
{
RMM_FUNC_RANGE();
do_deallocate(ptr, rmm::align_up(bytes, alignment), cuda_stream_view{});
}

Expand All @@ -209,6 +214,7 @@ class device_memory_resource {
*/
void* allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream)
{
RMM_FUNC_RANGE();
return do_allocate(rmm::align_up(bytes, alignment), stream);
}

Expand All @@ -226,6 +232,7 @@ class device_memory_resource {
*/
void* allocate_async(std::size_t bytes, cuda_stream_view stream)
{
RMM_FUNC_RANGE();
return do_allocate(bytes, stream);
}

Expand All @@ -248,6 +255,7 @@ class device_memory_resource {
std::size_t alignment,
cuda_stream_view stream)
{
RMM_FUNC_RANGE();
do_deallocate(ptr, rmm::align_up(bytes, alignment), stream);
}

Expand All @@ -266,6 +274,7 @@ class device_memory_resource {
*/
void deallocate_async(void* ptr, std::size_t bytes, cuda_stream_view stream)
{
RMM_FUNC_RANGE();
do_deallocate(ptr, bytes, stream);
}

Expand Down
6 changes: 5 additions & 1 deletion include/rmm/mr/host/host_memory_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,8 @@
*/
#pragma once

#include <rmm/detail/nvtx/ranges.hpp>

#include <cuda/memory_resource>

#include <cstddef>
Expand Down Expand Up @@ -76,6 +78,7 @@ class host_memory_resource {
*/
void* allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
RMM_FUNC_RANGE();
return do_allocate(bytes, alignment);
}

Expand All @@ -94,6 +97,7 @@ class host_memory_resource {
*/
void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
RMM_FUNC_RANGE();
do_deallocate(ptr, bytes, alignment);
}

Expand Down
13 changes: 13 additions & 0 deletions include/rmm/mr/pinned_host_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <rmm/aligned.hpp>
#include <rmm/detail/aligned.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/detail/nvtx/ranges.hpp>

#include <cuda/memory_resource>
#include <cuda/stream_ref>
Expand Down Expand Up @@ -63,6 +64,8 @@ class pinned_host_memory_resource {
static void* allocate(std::size_t bytes,
[[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
{
RMM_FUNC_RANGE();

// don't allocate anything if the user requested zero bytes
if (0 == bytes) { return nullptr; }

Expand All @@ -84,6 +87,8 @@ class pinned_host_memory_resource {
std::size_t bytes,
std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
{
RMM_FUNC_RANGE();

rmm::detail::aligned_host_deallocate(
ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
}
Expand All @@ -104,6 +109,8 @@ class pinned_host_memory_resource {
*/
static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream)
{
RMM_FUNC_RANGE();

return allocate(bytes);
}

Expand All @@ -126,6 +133,8 @@ class pinned_host_memory_resource {
std::size_t alignment,
[[maybe_unused]] cuda::stream_ref stream)
{
RMM_FUNC_RANGE();

return allocate(bytes, alignment);
}

Expand All @@ -142,6 +151,8 @@ class pinned_host_memory_resource {
std::size_t bytes,
[[maybe_unused]] cuda::stream_ref stream) noexcept
{
RMM_FUNC_RANGE();

return deallocate(ptr, bytes);
}

Expand All @@ -161,6 +172,8 @@ class pinned_host_memory_resource {
std::size_t alignment,
[[maybe_unused]] cuda::stream_ref stream) noexcept
{
RMM_FUNC_RANGE();

return deallocate(ptr, bytes, alignment);
}
// NOLINTEND(bugprone-easily-swappable-parameters)
Expand Down
Loading