NVIDIA · mzient · Feb 16, 2022 · Feb 15, 2022 · Feb 15, 2022 · Feb 16, 2022
diff --git a/dali/core/mm/default_resources.cc b/dali/core/mm/default_resources.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 #include "dali/core/mm/async_pool.h"
 #include "dali/core/mm/composite_resource.h"
 #include "dali/core/mm/cuda_vm_resource.h"
+#include "dali/core/call_at_exit.h"
 
 namespace dali {
 namespace mm {
@@ -138,21 +139,6 @@ struct CUDARTLoader {
   }
 };
 
-
-template <typename Callable>
-struct CallAtExit {
-  explicit CallAtExit(Callable &&c) : callable(std::move(c)) {}
-  ~CallAtExit() {
-    callable();
-  }
-  Callable callable;
-};
-
-template <typename Callable>
-CallAtExit<Callable> AtExit(Callable &&c) {
-  return CallAtExit<Callable>(std::forward<Callable>(c));
-}
-
 bool UseDeviceMemoryPool() {
   static bool value = []() {
     const char *env = std::getenv("DALI_USE_DEVICE_MEM_POOL");
@@ -239,7 +225,7 @@ const std::shared_ptr<pinned_async_resource> &ShareDefaultResourceImpl<memory_ki
     if (!g_resources.pinned_async) {
       static CUDARTLoader init_cuda;  // force initialization of CUDA before creating the resource
       g_resources.pinned_async = CreateDefaultPinnedResource();
-      static auto cleanup = AtExit([] {
+      static auto cleanup = AtScopeExit([] {
         g_resources.ReleasePinned();
       });
     }
@@ -254,7 +240,7 @@ const std::shared_ptr<managed_async_resource> &ShareDefaultResourceImpl<memory_k
     if (!g_resources.managed) {
       static CUDARTLoader init_cuda;  // force initialization of CUDA before creating the resource
       g_resources.managed = CreateDefaultManagedResource();
-      static auto cleanup = AtExit([] {
+      static auto cleanup = AtScopeExit([] {
         g_resources.ReleaseManaged();
       });
     }
@@ -274,7 +260,7 @@ const std::shared_ptr<device_async_resource> &ShareDefaultDeviceResourceImpl(int
       DeviceGuard devg(device_id);
       static CUDARTLoader init_cuda;  // force initialization of CUDA before creating the resource
       g_resources.device[device_id] = CreateDefaultDeviceResource();
-      static auto cleanup = AtExit([] {
+      static auto cleanup = AtScopeExit([] {
         g_resources.ReleaseDevice();
       });
     }

diff --git a/dali/kernels/audio/mel_scale/mel_filter_bank_gpu_test.cc b/dali/kernels/audio/mel_scale/mel_filter_bank_gpu_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -145,16 +145,15 @@ TEST_P(MelScaleGpuTest, MelScaleGpuTest) {
   args.normalize = false;
 
   using Kernel = kernels::audio::MelFilterBankGpu<T>;
-  kmgr.Initialize<Kernel>();
-  kmgr.Resize<Kernel>(1, 1);
+  kmgr.Resize<Kernel>(1);
   auto in_view = in_.gpu();
   auto req = kmgr.Setup<Kernel>(0, ctx, in_view, args);
   ASSERT_EQ(out_shape, req.output_shapes[0]);
   TestTensorList<float> out;
   out.reshape(out_shape);
 
   auto out_view = out.gpu();
-  kmgr.Run<Kernel>(0, 0, ctx, out_view, in_view);
+  kmgr.Run<Kernel>(0, ctx, out_view, in_view);
   auto out_view_cpu = out.cpu();
   CUDA_CALL(cudaStreamSynchronize(0));
   for (int b = 0; b < batch_size; ++b) {

diff --git a/dali/kernels/common/join/tensor_join_gpu_impl_test.cu b/dali/kernels/common/join/tensor_join_gpu_impl_test.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -68,7 +68,7 @@ struct TensorJoinGPUTest : public ::testing::Test {
     using Kernel = TensorJoinGPU<T, new_axis>;
     CUDAStream stream = CUDAStream::Create(true);
     KernelManager mgr;
-    mgr.Resize<Kernel>(1, 1);
+    mgr.Resize<Kernel>(1);
     KernelContext ctx;
     ctx.gpu.stream = stream;
 
@@ -79,7 +79,7 @@ struct TensorJoinGPUTest : public ::testing::Test {
       KernelRequirements &req = mgr.Setup<Kernel>(0, ctx, make_cspan(in_gpu_tls), axis);
       ASSERT_EQ(req.output_shapes.size(), 1);
       ASSERT_EQ(req.output_shapes[0], out_shape);
-      mgr.Run<Kernel>(0, 0, ctx, out.gpu(stream), make_cspan(in_gpu_tls));
+      mgr.Run<Kernel>(0, ctx, out.gpu(stream), make_cspan(in_gpu_tls));
 
       CUDA_CALL(cudaStreamSynchronize(stream));
       CheckResult(stream);

diff --git a/dali/kernels/imgproc/jpeg/jpeg_distortion_gpu_test.cu b/dali/kernels/imgproc/jpeg/jpeg_distortion_gpu_test.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -108,18 +108,17 @@ class JpegDistortionTestGPU : public ::testing::TestWithParam<std::tuple<bool, b
     CUDAEvent start = CUDAEvent::CreateWithFlags(0);
     CUDAEvent end = CUDAEvent::CreateWithFlags(0);
 
-    kmgr_.Initialize<Kernel>();
-    kmgr_.Resize<Kernel>(1, 1);
+    kmgr_.Resize<Kernel>(1);
 
     KernelContext ctx;
     ctx.gpu.stream = stream;
     auto req = kmgr_.Setup<Kernel>(0, ctx, in_view.shape, horz_subsample, vert_subsample);
     if (perf_run)  // warm up
-      kmgr_.Run<Kernel>(0, 0, ctx, out_view, in_view, args...);
+      kmgr_.Run<Kernel>(0, ctx, out_view, in_view, args...);
 
     CUDA_CALL(cudaEventRecord(start, stream));
 
-    kmgr_.Run<Kernel>(0, 0, ctx, out_view, in_view, args...);
+    kmgr_.Run<Kernel>(0, ctx, out_view, in_view, args...);
     CUDA_CALL(cudaGetLastError());
 
     CUDA_CALL(cudaEventRecord(end, stream));

diff --git a/dali/kernels/kernel_manager.cc b/dali/kernels/kernel_manager.cc