Skip to content

Commit 301ada7

Browse files
mzientcyyever
authored andcommitted
Remove Scratchpad from KernelManager (NVIDIA#3678)
* Remove ScratchpadAllocator from KernelManager. * Remove num_threads/thread_idx from KernelManager API. * Move CallAtExit to a separate header; rename AtExit to AtScopeExit. * Remove redundant calls to KernelManager::Initialize. Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
1 parent ef2c22d commit 301ada7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+242
-405
lines changed

dali/core/mm/default_resources.cc

+5-19
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
2222
#include "dali/core/mm/async_pool.h"
2323
#include "dali/core/mm/composite_resource.h"
2424
#include "dali/core/mm/cuda_vm_resource.h"
25+
#include "dali/core/call_at_exit.h"
2526

2627
namespace dali {
2728
namespace mm {
@@ -138,21 +139,6 @@ struct CUDARTLoader {
138139
}
139140
};
140141

141-
142-
template <typename Callable>
143-
struct CallAtExit {
144-
explicit CallAtExit(Callable &&c) : callable(std::move(c)) {}
145-
~CallAtExit() {
146-
callable();
147-
}
148-
Callable callable;
149-
};
150-
151-
template <typename Callable>
152-
CallAtExit<Callable> AtExit(Callable &&c) {
153-
return CallAtExit<Callable>(std::forward<Callable>(c));
154-
}
155-
156142
bool UseDeviceMemoryPool() {
157143
static bool value = []() {
158144
const char *env = std::getenv("DALI_USE_DEVICE_MEM_POOL");
@@ -239,7 +225,7 @@ const std::shared_ptr<pinned_async_resource> &ShareDefaultResourceImpl<memory_ki
239225
if (!g_resources.pinned_async) {
240226
static CUDARTLoader init_cuda; // force initialization of CUDA before creating the resource
241227
g_resources.pinned_async = CreateDefaultPinnedResource();
242-
static auto cleanup = AtExit([] {
228+
static auto cleanup = AtScopeExit([] {
243229
g_resources.ReleasePinned();
244230
});
245231
}
@@ -254,7 +240,7 @@ const std::shared_ptr<managed_async_resource> &ShareDefaultResourceImpl<memory_k
254240
if (!g_resources.managed) {
255241
static CUDARTLoader init_cuda; // force initialization of CUDA before creating the resource
256242
g_resources.managed = CreateDefaultManagedResource();
257-
static auto cleanup = AtExit([] {
243+
static auto cleanup = AtScopeExit([] {
258244
g_resources.ReleaseManaged();
259245
});
260246
}
@@ -274,7 +260,7 @@ const std::shared_ptr<device_async_resource> &ShareDefaultDeviceResourceImpl(int
274260
DeviceGuard devg(device_id);
275261
static CUDARTLoader init_cuda; // force initialization of CUDA before creating the resource
276262
g_resources.device[device_id] = CreateDefaultDeviceResource();
277-
static auto cleanup = AtExit([] {
263+
static auto cleanup = AtScopeExit([] {
278264
g_resources.ReleaseDevice();
279265
});
280266
}

dali/kernels/audio/mel_scale/mel_filter_bank_gpu_test.cc

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
1+
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -145,16 +145,15 @@ TEST_P(MelScaleGpuTest, MelScaleGpuTest) {
145145
args.normalize = false;
146146

147147
using Kernel = kernels::audio::MelFilterBankGpu<T>;
148-
kmgr.Initialize<Kernel>();
149-
kmgr.Resize<Kernel>(1, 1);
148+
kmgr.Resize<Kernel>(1);
150149
auto in_view = in_.gpu();
151150
auto req = kmgr.Setup<Kernel>(0, ctx, in_view, args);
152151
ASSERT_EQ(out_shape, req.output_shapes[0]);
153152
TestTensorList<float> out;
154153
out.reshape(out_shape);
155154

156155
auto out_view = out.gpu();
157-
kmgr.Run<Kernel>(0, 0, ctx, out_view, in_view);
156+
kmgr.Run<Kernel>(0, ctx, out_view, in_view);
158157
auto out_view_cpu = out.cpu();
159158
CUDA_CALL(cudaStreamSynchronize(0));
160159
for (int b = 0; b < batch_size; ++b) {

dali/kernels/common/join/tensor_join_gpu_impl_test.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
1+
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -68,7 +68,7 @@ struct TensorJoinGPUTest : public ::testing::Test {
6868
using Kernel = TensorJoinGPU<T, new_axis>;
6969
CUDAStream stream = CUDAStream::Create(true);
7070
KernelManager mgr;
71-
mgr.Resize<Kernel>(1, 1);
71+
mgr.Resize<Kernel>(1);
7272
KernelContext ctx;
7373
ctx.gpu.stream = stream;
7474

@@ -79,7 +79,7 @@ struct TensorJoinGPUTest : public ::testing::Test {
7979
KernelRequirements &req = mgr.Setup<Kernel>(0, ctx, make_cspan(in_gpu_tls), axis);
8080
ASSERT_EQ(req.output_shapes.size(), 1);
8181
ASSERT_EQ(req.output_shapes[0], out_shape);
82-
mgr.Run<Kernel>(0, 0, ctx, out.gpu(stream), make_cspan(in_gpu_tls));
82+
mgr.Run<Kernel>(0, ctx, out.gpu(stream), make_cspan(in_gpu_tls));
8383

8484
CUDA_CALL(cudaStreamSynchronize(stream));
8585
CheckResult(stream);

dali/kernels/imgproc/jpeg/jpeg_distortion_gpu_test.cu

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
1+
// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -108,18 +108,17 @@ class JpegDistortionTestGPU : public ::testing::TestWithParam<std::tuple<bool, b
108108
CUDAEvent start = CUDAEvent::CreateWithFlags(0);
109109
CUDAEvent end = CUDAEvent::CreateWithFlags(0);
110110

111-
kmgr_.Initialize<Kernel>();
112-
kmgr_.Resize<Kernel>(1, 1);
111+
kmgr_.Resize<Kernel>(1);
113112

114113
KernelContext ctx;
115114
ctx.gpu.stream = stream;
116115
auto req = kmgr_.Setup<Kernel>(0, ctx, in_view.shape, horz_subsample, vert_subsample);
117116
if (perf_run) // warm up
118-
kmgr_.Run<Kernel>(0, 0, ctx, out_view, in_view, args...);
117+
kmgr_.Run<Kernel>(0, ctx, out_view, in_view, args...);
119118

120119
CUDA_CALL(cudaEventRecord(start, stream));
121120

122-
kmgr_.Run<Kernel>(0, 0, ctx, out_view, in_view, args...);
121+
kmgr_.Run<Kernel>(0, ctx, out_view, in_view, args...);
123122
CUDA_CALL(cudaGetLastError());
124123

125124
CUDA_CALL(cudaEventRecord(end, stream));

dali/kernels/kernel_manager.cc

-46
This file was deleted.

0 commit comments

Comments
 (0)