From c1c5ca76049b39128a44f97fadb19dd5de8a268c Mon Sep 17 00:00:00 2001
From: Michal Zientkiewicz <michalz@nvidia.com>
Date: Mon, 27 Feb 2023 16:56:39 +0100
Subject: [PATCH] Rename AddWork to AddTask.

Signed-off-by: Michal Zientkiewicz <michalz@nvidia.com>
---
 dali/benchmark/thread_pool_bench.cc           | 14 ++++-----
 dali/core/exec/thread_pool_base.cc            |  2 +-
 dali/core/exec/thread_pool_base_test.cc       |  5 ++--
 .../imgcodec/decoders/decoder_parallel_impl.h |  6 ++--
 .../nvjpeg_lossless/nvjpeg_lossless.cc        |  2 +-
 dali/imgcodec/future_decode_result_test.cc    |  6 ++--
 dali/imgcodec/tools/imagemagick_test.cc       |  4 +--
 dali/kernels/common/scatter_gather.h          |  4 +--
 .../imgproc/structure/connected_components.h  | 10 +++----
 dali/kernels/imgproc/structure/label_bbox.h   |  4 +--
 dali/kernels/slice/slice_cpu.h                |  8 ++---
 .../slice_flip_normalize_permute_pad_cpu.h    |  6 ++--
 .../audio/mel_scale/mel_filter_bank.cc        |  4 +--
 dali/operators/audio/mfcc/mfcc.cc             |  4 +--
 dali/operators/audio/nonsilence_op.cc         |  4 +--
 dali/operators/audio/preemphasis_filter_op.cc |  4 +--
 dali/operators/audio/resample.cc              |  4 +--
 dali/operators/bbox/bb_flip.cc                |  4 +--
 .../decoder/audio/audio_decoder_op.cc         |  4 +--
 .../nvjpeg/nvjpeg_decoder_decoupled_api.h     | 12 ++++----
 .../decoder/video/video_decoder_cpu.cc        |  6 ++--
 .../decoder/video/video_decoder_mixed.cc      |  6 ++--
 dali/operators/generic/cast.cc                |  4 +--
 dali/operators/generic/erase/erase.cc         |  4 +--
 dali/operators/generic/join.cc                |  2 +-
 dali/operators/generic/lookup_table.cc        |  4 +--
 dali/operators/generic/one_hot.cc             |  4 +--
 dali/operators/generic/pad.cc                 |  4 +--
 dali/operators/generic/permute_batch.cc       |  4 +--
 dali/operators/generic/reduce/reduce.h        |  2 +-
 .../generic/reduce/reduce_with_mean_input.h   |  2 +-
 dali/operators/generic/transpose/transpose.cc |  4 +--
 dali/operators/geometry/coord_flip.cc         |  4 +--
 dali/operators/geometry/coord_transform.cc    |  4 +--
 .../image/color/brightness_contrast.cc        |  4 +--
 .../image/color/color_space_conversion.cc     |  4 +--
 dali/operators/image/color/color_twist.cc     |  4 +--
 .../image/convolution/gaussian_blur.cc        |  4 +--
 dali/operators/image/convolution/laplacian.cc |  4 +--
 dali/operators/image/crop/bbox_crop.cc        |  4 +--
 .../jpeg_compression_distortion_op_cpu.cc     |  4 +--
 dali/operators/image/mask/grid_mask.cc        |  4 +--
 dali/operators/image/paste/multipaste.cc      |  6 ++--
 .../image/peek_shape/peek_image_shape.h       |  4 +--
 .../remap/displacement_filter_impl_cpu.h      |  4 +--
 dali/operators/image/remap/warp.h             |  4 +--
 .../image/resize/resize_op_impl_cpu.h         |  4 +--
 dali/operators/imgcodec/decoder.h             |  4 +--
 dali/operators/imgcodec/peek_image_shape.cc   |  4 +--
 dali/operators/math/expressions/arithmetic.cc |  4 +--
 dali/operators/math/normalize/normalize.cc    |  8 ++---
 dali/operators/numba_function/numba_func.cc   |  4 +--
 .../python_function/dltensor_function.cc      |  4 +--
 dali/operators/random/rng_base_cpu.h          |  6 ++--
 dali/operators/reader/nemo_asr_reader_op.cc   |  4 +--
 dali/operators/reader/numpy_reader_gpu_op.cc  |  6 ++--
 dali/operators/reader/numpy_reader_op.cc      |  8 ++---
 dali/operators/reader/webdataset_reader_op.cc |  4 +--
 .../segmentation/random_mask_pixel.cc         |  4 +--
 .../segmentation/random_object_bbox.cc        |  6 ++--
 dali/operators/sequence/sequence_rearrange.cc |  4 +--
 .../signal/decibel/to_decibels_op_cpu.cc      |  4 +--
 dali/operators/signal/fft/power_spectrum.cc   |  4 +--
 dali/operators/signal/fft/spectrogram.cc      |  4 +--
 .../operator/builtin/conditional/merge.cc     |  4 +--
 .../operator/builtin/input_operator.cc        |  6 ++--
 .../operator/builtin/make_contiguous.cc       |  4 +--
 dali/pipeline/operator/operator.h             |  4 +--
 dali/pipeline/util/for_each_thread.h          |  2 +-
 dali/pipeline/util/thread_pool.cc             |  4 +--
 dali/pipeline/util/thread_pool.h              |  6 ++--
 dali/pipeline/util/thread_pool_test.cc        | 26 ++++++++--------
 dali/test/plugins/dummy/dummy.cc              |  4 +--
 .../create_a_custom_operator.ipynb            |  2 +-
 .../custom_operator/customdummy/dummy.cc      |  2 +-
 include/dali/core/exec/engine.h               |  4 +--
 include/dali/core/exec/thread_pool_base.h     | 30 +++++++++++--------
 77 files changed, 203 insertions(+), 196 deletions(-)

diff --git a/dali/benchmark/thread_pool_bench.cc b/dali/benchmark/thread_pool_bench.cc
index daa3941f75f..c68c2a4fd0c 100644
--- a/dali/benchmark/thread_pool_bench.cc
+++ b/dali/benchmark/thread_pool_bench.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ static void ThreadPoolArgs(benchmark::internal::Benchmark *b) {
   b->Args({batch_size, work_size_min, work_size_max, nthreads});
 }
 
-BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) {
+BENCHMARK_DEFINE_F(ThreadPoolBench, AddTask)(benchmark::State& st) {
   int batch_size = st.range(0);
   int work_size_min = st.range(1);
   int work_size_max = st.range(2);
@@ -40,7 +40,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) {
   while (st.KeepRunning()) {
     for (int i = 0; i < batch_size; i++) {
         auto size = this->RandInt(work_size_min, work_size_max);
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [&data, size, &total_count](int thread_id){
             std::vector<uint8_t> other_data;
             for (int i = 0; i < size; i++) {
@@ -59,13 +59,13 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) {
   std::cout << total_count << std::endl;
 }
 
-BENCHMARK_REGISTER_F(ThreadPoolBench, AddWork)->Iterations(1000)
+BENCHMARK_REGISTER_F(ThreadPoolBench, AddTask)->Iterations(1000)
 ->Unit(benchmark::kMicrosecond)
 ->UseRealTime()
 ->Apply(ThreadPoolArgs);
 
 
-BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) {
+BENCHMARK_DEFINE_F(ThreadPoolBench, AddTaskDeferred)(benchmark::State& st) {
   int batch_size = st.range(0);
   int work_size_min = st.range(1);
   int work_size_max = st.range(2);
@@ -78,7 +78,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) {
   while (st.KeepRunning()) {
     for (int i = 0; i < batch_size; i++) {
         auto size = this->RandInt(work_size_min, work_size_max);
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [&data, size, &total_count](int thread_id){
             std::vector<uint8_t> other_data;
             for (int i = 0; i < size; i++) {
@@ -98,7 +98,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) {
 }
 
 
-BENCHMARK_REGISTER_F(ThreadPoolBench, AddWorkDeferred)->Iterations(1000)
+BENCHMARK_REGISTER_F(ThreadPoolBench, AddTaskDeferred)->Iterations(1000)
 ->Unit(benchmark::kMicrosecond)
 ->UseRealTime()
 ->Apply(ThreadPoolArgs);
diff --git a/dali/core/exec/thread_pool_base.cc b/dali/core/exec/thread_pool_base.cc
index b47b204d7f4..affe1d3a5ac 100644
--- a/dali/core/exec/thread_pool_base.cc
+++ b/dali/core/exec/thread_pool_base.cc
@@ -125,7 +125,7 @@ void ThreadPoolBase::PopAndRunTask(std::unique_lock<std::mutex> &lock) {
   TaskFunc t = std::move(tasks_.front());
   tasks_.pop();
   lock.unlock();
-  t();
+  t(this_thread_idx());
   lock.lock();
 }
 
diff --git a/dali/core/exec/thread_pool_base_test.cc b/dali/core/exec/thread_pool_base_test.cc
index 91c22fa3070..d001f7a887f 100644
--- a/dali/core/exec/thread_pool_base_test.cc
+++ b/dali/core/exec/thread_pool_base_test.cc
@@ -20,9 +20,10 @@ namespace dali {
 
 struct SerialExecutor {
   template <typename Runnable>
-  std::enable_if_t<std::is_convertible_v<Runnable, std::function<void()>>>
+  std::enable_if_t<std::is_convertible_v<Runnable, std::function<void(int)>>>
   AddTask(Runnable &&runnable) {
-    runnable();
+    const int idx = 0;
+    runnable(idx);
   }
 };
 
diff --git a/dali/imgcodec/decoders/decoder_parallel_impl.h b/dali/imgcodec/decoders/decoder_parallel_impl.h
index 11cf6ae31a6..0de5b49e42b 100644
--- a/dali/imgcodec/decoders/decoder_parallel_impl.h
+++ b/dali/imgcodec/decoders/decoder_parallel_impl.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ class DLL_PUBLIC BatchParallelDecoderImpl : public ImageDecoderImpl {
     ROI no_roi;
     for (int i = 0; i < in.size(); i++) {
       auto roi = rois.empty() ? no_roi : rois[i];
-      ctx.tp->AddWork([=, out = out[i], in = in[i]](int tid) mutable {
+      ctx.tp->AddTask([=, out = out[i], in = in[i]](int tid) mutable {
           try {
             promise.set(i, DecodeImplTask(tid, out, in, opts, roi));
           } catch (...) {
@@ -128,7 +128,7 @@ class DLL_PUBLIC BatchParallelDecoderImpl : public ImageDecoderImpl {
     ROI no_roi;
     for (int i = 0; i < in.size(); i++) {
       auto roi = rois.empty() ? no_roi : rois[i];
-      ctx.tp->AddWork([=, out = out[i], in = in[i]](int tid) mutable {
+      ctx.tp->AddTask([=, out = out[i], in = in[i]](int tid) mutable {
           try {
             promise.set(i, DecodeImplTask(tid, ctx.stream, out, in, opts, roi));
           } catch (...) {
diff --git a/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc b/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc
index b2ecc666cd2..f89a2429a95 100644
--- a/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc
+++ b/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc
@@ -99,7 +99,7 @@ void NvJpegLosslessDecoderInstance::Parse(DecodeResultsPromise &promise,
   DecodeResultsPromise parse_promise(nsamples);
   for (int i = 0; i < nsamples; i++) {
     int tid = 0;
-    ctx.tp->AddWork(
+    ctx.tp->AddTask(
         [&, i](int tid) {
           auto &jpeg_stream = per_thread_resources_[tid].jpeg_stream;
           auto *sample = in[i];
diff --git a/dali/imgcodec/future_decode_result_test.cc b/dali/imgcodec/future_decode_result_test.cc
index 6635e2b1b1d..3fdada471ee 100644
--- a/dali/imgcodec/future_decode_result_test.cc
+++ b/dali/imgcodec/future_decode_result_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ TEST(FutureDecodeResultsTest, WaitNew) {
 
   DecodeResultsPromise pro(3);
   auto fut = pro.get_future();
-  tp.AddWork([pro](int tidx) mutable {
+  tp.AddTask([pro](int tidx) mutable {
     std::this_thread::sleep_for(std::chrono::milliseconds(10));
     pro.set(1, DecodeResult::Success());
     pro.set(0, DecodeResult::Success());
@@ -59,7 +59,7 @@ TEST(FutureDecodeResultsTest, Benchmark) {
   auto start = std::chrono::high_resolution_clock::now();
   for (int iter = 0; iter < num_iter; iter++) {
     DecodeResultsPromise res(100);
-    tp.AddWork([&](int tidx) {
+    tp.AddTask([&](int tidx) {
       for (int i = 0; i < res.num_samples(); i++)
         res.set(i, DecodeResult::Success());
     }, 0, true);
diff --git a/dali/imgcodec/tools/imagemagick_test.cc b/dali/imgcodec/tools/imagemagick_test.cc
index 2ad3a3fa0e2..f84c92028df 100644
--- a/dali/imgcodec/tools/imagemagick_test.cc
+++ b/dali/imgcodec/tools/imagemagick_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -205,7 +205,7 @@ void run(Env &env) {
 
   while (directory_it != fs::end(directory_it)) {
     auto batch = get_batch(env, directory_it);
-    pool.AddWork([=, &env](int tid){
+    pool.AddTask([=, &env](int tid){
       process(env, batch);
     });
   }
diff --git a/dali/kernels/common/scatter_gather.h b/dali/kernels/common/scatter_gather.h
index ea3f3f08134..72adbb6ac70 100644
--- a/dali/kernels/common/scatter_gather.h
+++ b/dali/kernels/common/scatter_gather.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -170,7 +170,7 @@ class DLL_PUBLIC ScatterGatherCPU : public ScatterGatherBase {
     } else {
       MakeBlocks(exec_engine.NumThreads() * kTasksMultiplier);
       for (auto &r : blocks_) {
-        exec_engine.AddWork([=](int thread_id) { std::memcpy(r.dst, r.src, r.size); }, r.size);
+        exec_engine.AddTask([=](int thread_id) { std::memcpy(r.dst, r.src, r.size); }, r.size);
       }
       exec_engine.RunAll();
     }
diff --git a/dali/kernels/imgproc/structure/connected_components.h b/dali/kernels/imgproc/structure/connected_components.h
index 739a995dddc..9ee6f9cf630 100644
--- a/dali/kernels/imgproc/structure/connected_components.h
+++ b/dali/kernels/imgproc/structure/connected_components.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -208,7 +208,7 @@ void LabelSlice(OutLabel *label_base,
   for (int64_t i = 0; i < n; i++) {
     auto out_slice = out.slice(i);
     auto in_slice = in.slice(i);
-    engine.AddWork([=, &seq_engn](int){
+    engine.AddTask([=, &seq_engn](int){
       LabelSlice(label_base, out_slice, in_slice, background, seq_engn);
     });
   }
@@ -220,7 +220,7 @@ void LabelSlice(OutLabel *label_base,
       auto in_slice = in.slice(i);
       auto prev_out = out.slice(i-1);
       auto prev_in = in.slice(i-1);
-      engine.AddWork([=](int){
+      engine.AddTask([=](int){
         MergeSlices(label_base, prev_out, out_slice, prev_in, in_slice);
       });
     }
@@ -313,7 +313,7 @@ int64_t CompactLabels(OutLabel *labels,
       int64_t chunk_start = volume * chunk / num_chunks;
       int64_t chunk_end = volume * (chunk + 1) / num_chunks;
 
-      engine.AddWork([=, &tmp_sets, &lock](int thread) {
+      engine.AddTask([=, &tmp_sets, &lock](int thread) {
         OutLabel prev = old_bg_label;
         OutLabel remapped = old_bg_label;
         for (int64_t i = chunk_start; i < chunk_end; i++) {
@@ -354,7 +354,7 @@ int64_t CompactLabels(OutLabel *labels,
   for (int chunk = 0; chunk < num_chunks; chunk++) {
     int64_t chunk_start = volume * chunk / num_chunks;
     int64_t chunk_end = volume * (chunk + 1) / num_chunks;
-    engine.AddWork([=, &label_map](int) {
+    engine.AddTask([=, &label_map](int) {
       RemapChunk(make_span(labels + chunk_start, chunk_end - chunk_start), label_map);
     });
   }
diff --git a/dali/kernels/imgproc/structure/label_bbox.h b/dali/kernels/imgproc/structure/label_bbox.h
index 0776b256ab4..0db405bba74 100644
--- a/dali/kernels/imgproc/structure/label_bbox.h
+++ b/dali/kernels/imgproc/structure/label_bbox.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -245,7 +245,7 @@ void GetLabelBoundingBoxes(span<Box<ndim, Coord>> boxes,
       part_boxes = make_span(&tmp_boxes[(i - 1)*boxes.size()], boxes.size());
     part.size[max_d] = end - start;
     part.data = in.data + start * stride;
-    engine.AddWork([=](int) {
+    engine.AddTask([=](int) {
       i64vec<ndim> origin = {};
       origin[dim_mapping[max_d]] = start;
       GetLabelBoundingBoxes(part_boxes, part, dim_mapping, background, origin);
diff --git a/dali/kernels/slice/slice_cpu.h b/dali/kernels/slice/slice_cpu.h
index 8fbe4c0663a..63f61f9da45 100644
--- a/dali/kernels/slice/slice_cpu.h
+++ b/dali/kernels/slice/slice_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -313,7 +313,7 @@ void SliceKernel(ExecutionEngine &exec_engine,
       int64_t b_start = prev_b_end;
       int64_t b_end = prev_b_end = total_sz * (b + 1) / nblocks;
       int64_t b_nbytes = (b_end - b_start) * sizeof(OutputType);
-      exec_engine.AddWork([=](int tid) {
+      exec_engine.AddTask([=](int tid) {
         std::memcpy(out_data + b_start, in_data + b_start, b_nbytes);
       }, b_nbytes, false);  // do not start work immediately
     }
@@ -325,7 +325,7 @@ void SliceKernel(ExecutionEngine &exec_engine,
   int nblocks = split_shape(split_factor, out_shape, req_nblocks, min_blk_sz, skip_dim_mask);
 
   if (nblocks == 1) {
-    exec_engine.AddWork([=](int) {
+    exec_engine.AddTask([=](int) {
       SliceKernel(out_data, in_data, out_strides, in_strides, out_shape, in_shape,
                   args.anchor, GetPtr<OutputType>(args.fill_values), args.channel_dim);
     }, kSliceCost * volume(out_shape), false);  // do not start work immediately
@@ -345,7 +345,7 @@ void SliceKernel(ExecutionEngine &exec_engine,
         blk_shape[d] = blk_end[d] - blk_start[d];
         blk_anchor[d] = args.anchor[d] + blk_start[d];
       }
-      exec_engine.AddWork([=](int) {
+      exec_engine.AddTask([=](int) {
         SliceKernel(output_ptr, in_data, out_strides, in_strides, blk_shape, in_shape,
                     blk_anchor, GetPtr<OutputType>(args.fill_values), args.channel_dim);
       }, kSliceCost * volume(blk_shape), false);  // do not start work immediately
diff --git a/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h b/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h
index 8580a62c0c0..b928bd1670c 100644
--- a/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h
+++ b/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -260,7 +260,7 @@ void SliceFlipNormalizePermutePadKernel(
                             req_nblocks > 0 ? req_nblocks : exec_engine.NumThreads() * 8,
                             min_blk_sz, skip_dim_mask);
   if (nblocks == 1) {
-    exec_engine.AddWork([=](int) {
+    exec_engine.AddTask([=](int) {
       SliceFlipNormalizePermutePadKernel(output, input, args.in_strides, args.out_strides,
                                          args.anchor, args.in_shape, args.out_shape,
                                          GetPtr<OutputType>(fill_values),
@@ -288,7 +288,7 @@ void SliceFlipNormalizePermutePadKernel(
         blk_anchor[d] = args.anchor[d] + blk_start[d];
       }
 
-      exec_engine.AddWork([=](int) {
+      exec_engine.AddTask([=](int) {
         SliceFlipNormalizePermutePadKernel(output_ptr, input_ptr, args.in_strides, args.out_strides,
                                            blk_anchor, args.in_shape, blk_shape,
                                            GetPtr<OutputType>(fill_values),
diff --git a/dali/operators/audio/mel_scale/mel_filter_bank.cc b/dali/operators/audio/mel_scale/mel_filter_bank.cc
index 7ddfca4bf84..24497658144 100644
--- a/dali/operators/audio/mel_scale/mel_filter_bank.cc
+++ b/dali/operators/audio/mel_scale/mel_filter_bank.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ void MelFilterBank<CPUBackend>::RunImpl(Workspace &ws) {
   TYPE_SWITCH(input.type(), type2id, T, MEL_FBANK_SUPPORTED_TYPES, (
     using MelFilterBankKernel = kernels::audio::MelFilterBankCpu<T>;
     for (int i = 0; i < input.shape().num_samples(); i++) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
         [this, &input, &output, i](int thread_id) {
           auto in_view = view<const T>(input[i]);
           auto out_view = view<T>(output[i]);
diff --git a/dali/operators/audio/mfcc/mfcc.cc b/dali/operators/audio/mfcc/mfcc.cc
index 705fd362b7c..67e0cd7c528 100644
--- a/dali/operators/audio/mfcc/mfcc.cc
+++ b/dali/operators/audio/mfcc/mfcc.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -160,7 +160,7 @@ void MFCC<CPUBackend>::RunImpl(Workspace &ws) {
     VALUE_SWITCH(in_shape.sample_dim(), Dims, MFCC_SUPPORTED_NDIMS, (
       using DctKernel = kernels::signal::dct::Dct1DCpu<T, T, Dims>;
       for (int i = 0; i < input.shape().num_samples(); i++) {
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [this, &input, &output, i](int thread_id) {
             kernels::KernelContext ctx;
             auto in_view = view<const T, Dims>(input[i]);
diff --git a/dali/operators/audio/nonsilence_op.cc b/dali/operators/audio/nonsilence_op.cc
index d7a922115a5..2c191d46f11 100644
--- a/dali/operators/audio/nonsilence_op.cc
+++ b/dali/operators/audio/nonsilence_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -102,7 +102,7 @@ class NonsilenceOperatorCpu : public NonsilenceOperator<CPUBackend> {
     auto &tp = ws.GetThreadPool();
     auto in_shape = input.shape();
     for (int sample_id = 0; sample_id < curr_batch_size; sample_id++) {
-      tp.AddWork(
+      tp.AddTask(
               [&, sample_id](int thread_id) {
                   detail::Args<InputType> args;
                   args.input = view<const InputType, 1>(input[sample_id]);
diff --git a/dali/operators/audio/preemphasis_filter_op.cc b/dali/operators/audio/preemphasis_filter_op.cc
index 335414a30b1..b6db6dfba0f 100644
--- a/dali/operators/audio/preemphasis_filter_op.cc
+++ b/dali/operators/audio/preemphasis_filter_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -63,7 +63,7 @@ void PreemphasisFilterCPU::RunImplTyped(Workspace &ws) {
   auto nsamples = shape.num_samples();
 
   for (int sample_id = 0; sample_id < nsamples; sample_id++) {
-    tp.AddWork(
+    tp.AddTask(
       [this, &output, &input, sample_id](int thread_id) {
         const auto *in_ptr = input.tensor<InputType>(sample_id);
         auto *out_ptr = output.mutable_tensor<OutputType>(sample_id);
diff --git a/dali/operators/audio/resample.cc b/dali/operators/audio/resample.cc
index f52220bbfd6..8766c5e9fca 100644
--- a/dali/operators/audio/resample.cc
+++ b/dali/operators/audio/resample.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -136,7 +136,7 @@ class ResampleCPU : public ResampleBase<CPUBackend> {
     auto &tp = ws.GetThreadPool();
     in_fp32.resize(tp.NumThreads());
     for (int s = 0; s < N; s++) {
-      tp.AddWork([&, this, s](int thread_idx) {
+      tp.AddTask([&, this, s](int thread_idx) {
         InTensorCPU<float> in_view;
         TYPE_SWITCH(in.type(), type2id, T, (AUDIO_RESAMPLE_TYPES),
           (in_view = ConvertInput(in_fp32[thread_idx], view<const T>(in[s]));),
diff --git a/dali/operators/bbox/bb_flip.cc b/dali/operators/bbox/bb_flip.cc
index 2f0f1b41483..7c31b8a445d 100644
--- a/dali/operators/bbox/bb_flip.cc
+++ b/dali/operators/bbox/bb_flip.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ void BbFlipCPU::RunImpl(Workspace &ws) {
   TensorLayout layout = ltrb_ ? "xyXY" : "xyWH";
 
   for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
-    tp.AddWork(
+    tp.AddTask(
       [&, sample_idx](int thread_id) {
         bool vertical = vert_[sample_idx].data[0];
         bool horizontal = horz_[sample_idx].data[0];
diff --git a/dali/operators/decoder/audio/audio_decoder_op.cc b/dali/operators/decoder/audio/audio_decoder_op.cc
index 0bfb649bab3..8ace2929817 100644
--- a/dali/operators/decoder/audio/audio_decoder_op.cc
+++ b/dali/operators/decoder/audio/audio_decoder_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -148,7 +148,7 @@ void AudioDecoderCpu::DecodeBatch(Workspace &ws) {
   scratch_resampler_.resize(tp.NumThreads());
 
   for (int i = 0; i < batch_size; i++) {
-    tp.AddWork([&, i](int thread_id) {
+    tp.AddTask([&, i](int thread_id) {
       try {
         DecodeSample<OutputType>(decoded_output[i], thread_id, i);
         sample_rate_output[i].data[0] = use_resampling_
diff --git a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
index a48d1ef2619..6cab9db4bfe 100644
--- a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
+++ b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -242,7 +242,7 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
 
 #if NVJPEG2K_ENABLED
     auto nvjpeg2k_thread_id = nvjpeg2k_thread_.GetThreadIds()[0];
-    nvjpeg2k_thread_.AddWork([this, device_memory_padding_jpeg2k, host_memory_padding_jpeg2k,
+    nvjpeg2k_thread_.AddTask([this, device_memory_padding_jpeg2k, host_memory_padding_jpeg2k,
                               nvjpeg2k_thread_id](int) {
       nvjpeg2k_handle_ = NvJPEG2KHandle(&nvjpeg2k_dev_alloc_, &nvjpeg2k_pin_alloc_);
 
@@ -572,7 +572,7 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
       auto *input_data = input.tensor<uint8_t>(i);
       const auto in_size = input.tensor_shape(i).num_elements();
       const auto &source_info = input.GetMeta(i).GetSourceInfo();
-      thread_pool_.AddWork([this, i, input_data, in_size, source_info](int tid) {
+      thread_pool_.AddTask([this, i, input_data, in_size, source_info](int tid) {
         SampleData &data = sample_data_[i];
         data.clear();
         data.sample_idx = i;
@@ -725,7 +725,7 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
       auto *output_data = output.mutable_tensor<uint8_t>(i);
       const auto *in_data = input.tensor<uint8_t>(i);
       const auto in_size = input.tensor_shape(i).num_elements();
-      thread_pool_.AddWork(
+      thread_pool_.AddTask(
         [this, sample, in_data, in_size, output_data](int tid) {
           SampleWorker(sample->sample_idx, sample->file_name, in_size, tid,
             in_data, output_data, streams_[tid]);
@@ -807,7 +807,7 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
     if (!nvjpeg2k_handle_) {
       return;
     }
-    nvjpeg2k_thread_.AddWork([this, &ws](int) {
+    nvjpeg2k_thread_.AddTask([this, &ws](int) {
       auto &output = ws.Output<GPUBackend>(0);
       const auto &input = ws.Input<CPUBackend>(0);
       const auto &input_shape = input.shape();
@@ -833,7 +833,7 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
       auto in_size = input.tensor_shape(i).num_elements();
       auto *output_data = output.mutable_tensor<uint8_t>(i);
       ImageCache::ImageShape shape = output_shape_[i].to_static<3>();
-      thread_pool_.AddWork(
+      thread_pool_.AddTask(
         [this, sample, input_data, in_size, output_data, shape](int tid) {
           HostFallback<StorageGPU>(input_data, in_size, output_image_type_, output_data,
                                    streams_[tid], sample->file_name, sample->roi, use_fast_idct_);
diff --git a/dali/operators/decoder/video/video_decoder_cpu.cc b/dali/operators/decoder/video/video_decoder_cpu.cc
index 9a745e002b3..46e7547fffc 100644
--- a/dali/operators/decoder/video/video_decoder_cpu.cc
+++ b/dali/operators/decoder/video/video_decoder_cpu.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ bool VideoDecoderCpu::SetupImpl(std::vector<OutputDesc> &output_desc,
     auto sample = input[i];
     auto data = reinterpret_cast<const char *>(sample.data<uint8_t>());
     size_t size = sample.shape().num_elements();
-    thread_pool.AddWork([this, i, data, size](int tid) {
+    thread_pool.AddTask([this, i, data, size](int tid) {
       frames_decoders_[i] = std::make_unique<FramesDecoder>(data, size, false);
     });
   }
@@ -44,7 +44,7 @@ void VideoDecoderCpu::RunImpl(Workspace &ws) {
   int batch_size = input.num_samples();
   auto &thread_pool = ws.GetThreadPool();
   for (int s = 0; s < batch_size; ++s) {
-    thread_pool.AddWork([this, s, &output](int tid) {
+    thread_pool.AddTask([this, s, &output](int tid) {
       DecodeSample(output[s], s);
     }, volume(input[s].shape()));
   }
diff --git a/dali/operators/decoder/video/video_decoder_mixed.cc b/dali/operators/decoder/video/video_decoder_mixed.cc
index 62f1ec1da0e..26c3b6c7fa8 100644
--- a/dali/operators/decoder/video/video_decoder_mixed.cc
+++ b/dali/operators/decoder/video/video_decoder_mixed.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ bool VideoDecoderMixed::SetupImpl(
   auto stream = ws.stream();
   frames_decoders_.resize(batch_size);
   for (int i = 0; i < batch_size; ++i) {
-    thread_pool_.AddWork([this, i, &input, stream](int) {
+    thread_pool_.AddTask([this, i, &input, stream](int) {
       auto sample = input[i];
       auto data = reinterpret_cast<const char *>(sample.data<uint8_t>());
       size_t size = sample.shape().num_elements();
@@ -43,7 +43,7 @@ void VideoDecoderMixed::Run(Workspace &ws) {
   const auto &input = ws.Input<CPUBackend>(0);
   int batch_size = input.num_samples();
   for (int s = 0; s < batch_size; ++s) {
-    thread_pool_.AddWork([this, s, &output](int) {
+    thread_pool_.AddTask([this, s, &output](int) {
       DecodeSample(output[s], s);
       // when the decoding is done release the decoder,
       // so it can be reused by the next sample in the batch
diff --git a/dali/operators/generic/cast.cc b/dali/operators/generic/cast.cc
index 68aafe4670f..f426245e013 100644
--- a/dali/operators/generic/cast.cc
+++ b/dali/operators/generic/cast.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -53,7 +53,7 @@ void CastCPU::RunImpl(Workspace &ws) {
         auto *out = output.mutable_tensor<OType>(sample_id);
         const auto *in = input.tensor<IType>(sample_id);
         auto size = input_shape.tensor_size(sample_id);
-        tp.AddWork([out, in, size](int thread_id) { CpuHelper<OType, IType>(out, in, size); },
+        tp.AddTask([out, in, size](int thread_id) { CpuHelper<OType, IType>(out, in, size); },
                    size);
       }
 
diff --git a/dali/operators/generic/erase/erase.cc b/dali/operators/generic/erase/erase.cc
index 9d2af190616..e68922e716e 100644
--- a/dali/operators/generic/erase/erase.cc
+++ b/dali/operators/generic/erase/erase.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -209,7 +209,7 @@ void EraseImplCpu<T, Dims>::RunImpl(Workspace &ws) {
   auto& thread_pool = ws.GetThreadPool();
   auto in_shape = input.shape();
   for (int i = 0; i < nsamples; i++) {
-    thread_pool.AddWork(
+    thread_pool.AddTask(
       [this, &input, &output, i](int thread_id) {
         kernels::KernelContext ctx;
         auto in_view = view<const T, Dims>(input[i]);
diff --git a/dali/operators/generic/join.cc b/dali/operators/generic/join.cc
index 1fb241e8809..e042f63fdc9 100644
--- a/dali/operators/generic/join.cc
+++ b/dali/operators/generic/join.cc
@@ -233,7 +233,7 @@ void TensorJoin<Backend, new_axis>::RunTyped(
   in_shapes.resize(num_threads * njoin);
 
   for (int i = 0; i < N; i++) {
-    tp.AddWork([&, i](int tid) {
+    tp.AddTask([&, i](int tid) {
       kernels::KernelContext ctx;
       auto sample_in_tensors = make_span(&in_tensors[tid * njoin], njoin);
       auto sample_in_shapes = make_span(&in_shapes[tid * njoin], njoin);
diff --git a/dali/operators/generic/lookup_table.cc b/dali/operators/generic/lookup_table.cc
index 20c4272f7cd..fa641d81552 100644
--- a/dali/operators/generic/lookup_table.cc
+++ b/dali/operators/generic/lookup_table.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ void LookupValuesImpl(ThreadPool &tp, TensorList<CPUBackend> &output,
     auto data_size = shape.tensor_size(sample_idx);
     auto *out_data = output.mutable_tensor<Output>(sample_idx);
     const auto *in_data = input.tensor<Input>(sample_idx);
-    tp.AddWork(
+    tp.AddTask(
         [=](int thread_id) {
           for (int64_t i = 0; i < data_size; i++) {
             DoLookup<CPUBackend>(out_data[i], in_data[i], lookup_table, default_value);
diff --git a/dali/operators/generic/one_hot.cc b/dali/operators/generic/one_hot.cc
index bf1ce3bd0d4..056613ad746 100644
--- a/dali/operators/generic/one_hot.cc
+++ b/dali/operators/generic/one_hot.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -88,7 +88,7 @@ void OneHotCPU::RunImpl(Workspace &ws) {
     auto in_tensor = view<const InputType, DynamicDimensions>(input);
     auto out_tensor = view<OutputType, DynamicDimensions>(output);
     for (int sample_id = 0; sample_id < num_samples; ++sample_id) {
-      tp.AddWork(
+      tp.AddTask(
               [&, sample_id](int thread_id) {
                   auto in = in_tensor[sample_id];
                   auto out = out_tensor[sample_id];
diff --git a/dali/operators/generic/pad.cc b/dali/operators/generic/pad.cc
index 37c659ef505..06444a9a268 100644
--- a/dali/operators/generic/pad.cc
+++ b/dali/operators/generic/pad.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -235,7 +235,7 @@ void Pad<CPUBackend>::RunImpl(Workspace &ws) {
       using Args = kernels::SliceArgs<T, Dims>;
 
       for (int i = 0; i < nsamples; i++) {
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [this, &input, &output, i](int thread_id) {
             kernels::KernelContext ctx;
             auto in_view = view<const T, Dims>(input[i]);
diff --git a/dali/operators/generic/permute_batch.cc b/dali/operators/generic/permute_batch.cc
index 23943394b19..ea271e14678 100644
--- a/dali/operators/generic/permute_batch.cc
+++ b/dali/operators/generic/permute_batch.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ void PermuteBatch<CPUBackend>::RunImpl(Workspace &ws) {
   for (int i = 0; i < N; i++) {
     auto size = output_shape.tensor_size(i);
     int src = indices_[i];
-    tp.AddWork([&, i, src](int tid) {
+    tp.AddTask([&, i, src](int tid) {
       output.SetMeta(i, input.GetMeta(i));
       // TODO(klecki): SetSample
       output.CopySample(i, input, src);
diff --git a/dali/operators/generic/reduce/reduce.h b/dali/operators/generic/reduce/reduce.h
index a229117e56f..8cbe460d21c 100644
--- a/dali/operators/generic/reduce/reduce.h
+++ b/dali/operators/generic/reduce/reduce.h
@@ -90,7 +90,7 @@ class Reduce : public Operator<Backend>, AxesHelper {
 
     for (int sample = 0; sample < in_view.num_samples(); sample++) {
       int64_t priority = volume(in_view.shape.tensor_shape_span(sample));
-      thread_pool.AddWork(
+      thread_pool.AddTask(
         [&, sample](int thread_id) {
           auto in_sample_view = in_view[sample];
           auto out_sample_view = out_view[sample];
diff --git a/dali/operators/generic/reduce/reduce_with_mean_input.h b/dali/operators/generic/reduce/reduce_with_mean_input.h
index bfecfab3285..0823d4fcfc9 100644
--- a/dali/operators/generic/reduce/reduce_with_mean_input.h
+++ b/dali/operators/generic/reduce/reduce_with_mean_input.h
@@ -103,7 +103,7 @@ class ReduceWithMeanInput : public Operator<Backend>, AxesHelper {
 
     for (int sample = 0; sample < in_view.num_samples(); sample++) {
       int64_t priority = volume(in_view.shape.tensor_shape_span(sample));
-      thread_pool.AddWork(
+      thread_pool.AddTask(
         [&, sample](int thread_id) {
           auto in_sample_view = in_view[sample];
           auto mean_sample_view = mean_view[sample];
diff --git a/dali/operators/generic/transpose/transpose.cc b/dali/operators/generic/transpose/transpose.cc
index 1ef632c406f..6df3a15f0b2 100644
--- a/dali/operators/generic/transpose/transpose.cc
+++ b/dali/operators/generic/transpose/transpose.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ class TransposeCPU : public Transpose<CPUBackend> {
 
     TYPE_SWITCH(input_type, type2id, T, TRANSPOSE_ALLOWED_TYPES, (
       for (int i = 0; i < nsamples; i++) {
-        thread_pool.AddWork(
+        thread_pool.AddTask(
           [this, &input, &output, i](int thread_id) {
             TensorShape<> src_ts = input.shape()[i];
 
diff --git a/dali/operators/geometry/coord_flip.cc b/dali/operators/geometry/coord_flip.cc
index 09244c60960..7143c403b07 100644
--- a/dali/operators/geometry/coord_flip.cc
+++ b/dali/operators/geometry/coord_flip.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -78,7 +78,7 @@ void CoordFlipCPU::RunImpl(Workspace &ws) {
     mirrored_origin[z_dim_] = 2.0f * spec_.GetArgument<float>("center_z", &ws, sample_id);
 
     auto in_size = volume(input.tensor_shape(sample_id));
-    thread_pool.AddWork(
+    thread_pool.AddTask(
         [this, &input, in_size, &output, sample_id, flip_dim, mirrored_origin](int thread_id) {
           const auto *in = input.tensor<float>(sample_id);
           auto *out = output.mutable_tensor<float>(sample_id);
diff --git a/dali/operators/geometry/coord_transform.cc b/dali/operators/geometry/coord_transform.cc
index dc43012d73e..f3972fe0acd 100644
--- a/dali/operators/geometry/coord_transform.cc
+++ b/dali/operators/geometry/coord_transform.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -63,7 +63,7 @@ void CoordTransform<CPUBackend>::RunTyped(Workspace &ws) {
   auto T = GetTranslations<out_dim>();
 
   for (int idx = 0; idx < in_view.num_samples(); idx++) {
-    tp.AddWork([&, idx](int tid) {
+    tp.AddTask([&, idx](int tid) {
         kernels::KernelContext ctx;
         auto in_tensor = in_view[idx];
         auto out_tensor = out_view[idx];
diff --git a/dali/operators/image/color/brightness_contrast.cc b/dali/operators/image/color/brightness_contrast.cc
index 5e2f562b31c..90ecacceae6 100644
--- a/dali/operators/image/color/brightness_contrast.cc
+++ b/dali/operators/image/color/brightness_contrast.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -118,7 +118,7 @@ void BrightnessContrastCpu::RunImplHelper(Workspace &ws) {
         sequence_utils::unfolded_views_range<ndim - 3>(out_view[sample_id], in_view[sample_id]);
     const auto &in_range = planes_range.template get<1>();
     for (auto &&views : planes_range) {
-      tp.AddWork([&, views, add, mul](int thread_id) {
+      tp.AddTask([&, views, add, mul](int thread_id) {
           kernels::KernelContext ctx;
           auto &[tvout, tvin] = views;
           kernel_manager_.Run<Kernel>(0, ctx, tvout, tvin, add, mul);
diff --git a/dali/operators/image/color/color_space_conversion.cc b/dali/operators/image/color/color_space_conversion.cc
index 631a268c64e..0c0eaac7294 100644
--- a/dali/operators/image/color/color_space_conversion.cc
+++ b/dali/operators/image/color/color_space_conversion.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -41,7 +41,7 @@ void ColorSpaceConversion<CPUBackend>::RunImpl(Workspace &ws) {
   int ndim = in_sh.sample_dim();
   auto& thread_pool = ws.GetThreadPool();
   for (int i = 0; i < nsamples; i++) {
-    thread_pool.AddWork(
+    thread_pool.AddTask(
       [&, i](int thread_id) {
         auto in_sample_sh = in_sh.tensor_shape_span(i);
         // flatten any leading dimensions together with the height
diff --git a/dali/operators/image/color/color_twist.cc b/dali/operators/image/color/color_twist.cc
index f2c8ad20608..c8a69133357 100644
--- a/dali/operators/image/color/color_twist.cc
+++ b/dali/operators/image/color/color_twist.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -155,7 +155,7 @@ void ColorTwistCpu::RunImplHelper(Workspace &ws) {
     auto planes_range = sequence_utils::unfolded_views_range<ndim - 3>(out_view[i], in_view[i]);
     const auto &in_range = planes_range.template get<1>();
     for (auto &&views : planes_range) {
-      tp.AddWork(
+      tp.AddTask(
           [&, i, views](int thread_id) {
             kernels::KernelContext ctx;
             auto &[tvout, tvin] = views;
diff --git a/dali/operators/image/convolution/gaussian_blur.cc b/dali/operators/image/convolution/gaussian_blur.cc
index 0a7faa0ca24..bf603aa62ea 100644
--- a/dali/operators/image/convolution/gaussian_blur.cc
+++ b/dali/operators/image/convolution/gaussian_blur.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -117,7 +117,7 @@ class GaussianBlurOpCpu : public OpImplBase<CPUBackend> {
     int nsamples = input.num_samples();
     for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
       auto elem_volume = volume(input.tensor_shape(sample_idx));
-      thread_pool.AddWork(
+      thread_pool.AddTask(
           [this, &input, &output, sample_idx](int thread_id) {
             auto gaussian_windows = windows_[sample_idx].GetWindows();
             const auto &shape = input.tensor_shape(sample_idx);
diff --git a/dali/operators/image/convolution/laplacian.cc b/dali/operators/image/convolution/laplacian.cc
index 333dea5b2d2..c20a41b04eb 100644
--- a/dali/operators/image/convolution/laplacian.cc
+++ b/dali/operators/image/convolution/laplacian.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -154,7 +154,7 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {
     for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
       auto priority = volume(input.tensor_shape(sample_idx)) * args.GetTotalWindowSizes(sample_idx);
 
-      thread_pool.AddWork(
+      thread_pool.AddTask(
           [this, &input, &output, sample_idx](int thread_id) {
             const auto& scales = args.GetScales(sample_idx);
             const auto& shape = input.tensor_shape(sample_idx);
diff --git a/dali/operators/image/crop/bbox_crop.cc b/dali/operators/image/crop/bbox_crop.cc
index 77caf5d4e3e..f6285f0788e 100644
--- a/dali/operators/image/crop/bbox_crop.cc
+++ b/dali/operators/image/crop/bbox_crop.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -519,7 +519,7 @@ class RandomBBoxCropImpl : public OpImplBase<CPUBackend> {
     sample_data_.resize(num_samples);
     for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) {
       auto &data = sample_data_[sample_idx];
-      tp.AddWork([&, sample_idx](int thread_id) {
+      tp.AddTask([&, sample_idx](int thread_id) {
         auto nboxes = in_boxes_view.tensor_shape_span(sample_idx)[0];
         data.in_bboxes.resize(nboxes);
         ReadBoxes(make_span(data.in_bboxes),
diff --git a/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc b/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc
index 6678669f94e..42919782be3 100644
--- a/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc
+++ b/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ void JpegCompressionDistortionCPU::RunImpl(Workspace &ws) {
     int64_t width = shape[w_dim];
     int64_t height = shape[h_dim];
     for (int elem_idx = 0; elem_idx < nframes; elem_idx++) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
           [&, sample_idx, elem_idx, width, height, frame_size,
            quality = quality_arg_[sample_idx].data[0]](int thread_id) {
             auto *in = in_view[sample_idx].data + elem_idx * frame_size;
diff --git a/dali/operators/image/mask/grid_mask.cc b/dali/operators/image/mask/grid_mask.cc
index 37aa88f48e0..0d39d66331f 100644
--- a/dali/operators/image/mask/grid_mask.cc
+++ b/dali/operators/image/mask/grid_mask.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -75,7 +75,7 @@ void GridMaskCpu::RunImpl(Workspace &ws) {
           auto in_view = view<const Type>(input);
           auto out_view = view<Type>(output);
           for (int sid = 0; sid < input.shape().num_samples(); sid++) {
-            tp.AddWork([&, sid](int) {
+            tp.AddTask([&, sid](int) {
               kernels::KernelContext ctx;
               kernel_manager_.Run<Kernel>(sid, ctx, out_view[sid], in_view[sid],
                 tile_[sid], ratio_[sid], angle_[sid], shift_x_[sid], shift_y_[sid]);
diff --git a/dali/operators/image/paste/multipaste.cc b/dali/operators/image/paste/multipaste.cc
index 65199564e1c..21108463872 100644
--- a/dali/operators/image/paste/multipaste.cc
+++ b/dali/operators/image/paste/multipaste.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -90,7 +90,7 @@ void MultiPasteCPU::RunTyped(Workspace &ws) {
         int from_sample = in_idx_[i].data[iter];
         int to_sample = i;
 
-        tp.AddWork(
+        tp.AddTask(
           [&, i, iter, from_sample, to_sample, in_view, out_view](int thread_id) {
             kernels::KernelContext ctx;
             auto tvin = in_view[from_sample];
@@ -108,7 +108,7 @@ void MultiPasteCPU::RunTyped(Workspace &ws) {
           out_shape.tensor_size(to_sample));
       }
     } else {
-      tp.AddWork(
+      tp.AddTask(
         [&, i, paste_count, in_view, out_view](int thread_id) {
           for (int iter = 0; iter < paste_count; iter++) {
             int from_sample = in_idx_[i].data[iter];
diff --git a/dali/operators/image/peek_shape/peek_image_shape.h b/dali/operators/image/peek_shape/peek_image_shape.h
index 7a14178538e..3ad55184431 100644
--- a/dali/operators/image/peek_shape/peek_image_shape.h
+++ b/dali/operators/image/peek_shape/peek_image_shape.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -80,7 +80,7 @@ class PeekImageShape : public Operator<CPUBackend> {
     DALI_ENFORCE(input.sample_dim() == 1, "Input must be 1D encoded JPEG bit stream.");
 
     for (size_t sample_id = 0; sample_id < batch_size; ++sample_id) {
-      thread_pool.AddWork([sample_id, &input, &output, this] (int tid) {
+      thread_pool.AddTask([sample_id, &input, &output, this] (int tid) {
         const auto& image = input[sample_id];
         auto img =
             ImageFactory::CreateImage(image.data<uint8>(), image.shape().num_elements(), {});
diff --git a/dali/operators/image/remap/displacement_filter_impl_cpu.h b/dali/operators/image/remap/displacement_filter_impl_cpu.h
index a3c74a0fe2f..f70dc1346cd 100644
--- a/dali/operators/image/remap/displacement_filter_impl_cpu.h
+++ b/dali/operators/image/remap/displacement_filter_impl_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -168,7 +168,7 @@ class DisplacementFilter<CPUBackend, Displacement, per_channel_transform>
     auto &tp = ws.GetThreadPool();
 
     for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) {
-      tp.AddWork([&, sample_idx](int thread_idx) { RunSample(ws, sample_idx, thread_idx); },
+      tp.AddTask([&, sample_idx](int thread_idx) { RunSample(ws, sample_idx, thread_idx); },
                  shape.tensor_size(sample_idx));
     }
 
diff --git a/dali/operators/image/remap/warp.h b/dali/operators/image/remap/warp.h
index 01ef36100d9..317236f3ac4 100644
--- a/dali/operators/image/remap/warp.h
+++ b/dali/operators/image/remap/warp.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -167,7 +167,7 @@ class WarpOpImpl : public OpImplInterface<Backend> {
     auto interp_types = param_provider_->InterpTypes();
 
     for (int i = 0; i < input_.num_samples(); i++) {
-      pool.AddWork([&, i](int tid) {
+      pool.AddTask([&, i](int tid) {
         DALIInterpType interp_type = interp_types.size() > 1 ? interp_types[i] : interp_types[0];
         auto context = GetContext(ws);
         kmgr_.Run<Kernel>(
diff --git a/dali/operators/image/resize/resize_op_impl_cpu.h b/dali/operators/image/resize/resize_op_impl_cpu.h
index cc6d90b8697..1e7c688b354 100644
--- a/dali/operators/image/resize/resize_op_impl_cpu.h
+++ b/dali/operators/image/resize/resize_op_impl_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -102,7 +102,7 @@ class ResizeOpImplCPU : public ResizeBase<CPUBackend>::Impl {
         // NOTE: This does not account for cost of antialiasing!
         cost += std::pow(std::pow(out_size, spatial_ndim - i) * pow(in_size, i), root);
       }
-      tp.AddWork(work, std::llround(cost));
+      tp.AddTask(work, std::llround(cost));
     }
     tp.RunAll();
   }
diff --git a/dali/operators/imgcodec/decoder.h b/dali/operators/imgcodec/decoder.h
index 9bc6c444be1..7255fa771e7 100644
--- a/dali/operators/imgcodec/decoder.h
+++ b/dali/operators/imgcodec/decoder.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -100,7 +100,7 @@ class DecoderBase : public Operator<Backend> {
     auto *decoder = GetDecoderInstance();
 
     for (int i = 0; i < shapes.size(); i++) {
-      tp.AddWork([i, decoder, &input, &shapes, &ws, &spec, this] (int tid) {
+      tp.AddTask([i, decoder, &input, &shapes, &ws, &spec, this] (int tid) {
         srcs_[i] = SampleAsImageSource(input[i], input.GetMeta(i).GetSourceInfo());
         src_ptrs_[i] = &srcs_[i];
         auto info = decoder->GetInfo(src_ptrs_[i]);
diff --git a/dali/operators/imgcodec/peek_image_shape.cc b/dali/operators/imgcodec/peek_image_shape.cc
index dbf6c7291c5..b5373014f8c 100644
--- a/dali/operators/imgcodec/peek_image_shape.cc
+++ b/dali/operators/imgcodec/peek_image_shape.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -76,7 +76,7 @@ void ImgcodecPeekImageShape::RunImpl(Workspace &ws) {
                 "The input must be a raw, undecoded file stored as a flat uint8 array.");
 
   for (int i = 0; i < input.num_samples(); i++) {
-    thread_pool.AddWork([i, &input, &output, this] (int tid) {
+    thread_pool.AddTask([i, &input, &output, this] (int tid) {
       auto src = SampleAsImageSource(input[i], input.GetMeta(i).GetSourceInfo());
       auto *format = ImageFormatRegistry::instance().GetImageFormat(&src);
       DALI_ENFORCE(format, make_string("Cannot parse the image: ", src.SourceInfo()));
diff --git a/dali/operators/math/expressions/arithmetic.cc b/dali/operators/math/expressions/arithmetic.cc
index 48124d3954c..dc870096216 100644
--- a/dali/operators/math/expressions/arithmetic.cc
+++ b/dali/operators/math/expressions/arithmetic.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ void ArithmeticGenericOp<CPUBackend>::RunImpl(Workspace &ws) {
 
   int batch_size = ws.GetInputBatchSize(0);
   for (size_t task_idx = 0; task_idx < tile_range_.size(); task_idx++) {
-    pool.AddWork(
+    pool.AddTask(
         [=](int thread_idx) {
           auto range = tile_range_[task_idx];
           // Go over "tiles"
diff --git a/dali/operators/math/normalize/normalize.cc b/dali/operators/math/normalize/normalize.cc
index f496a7e3331..0e1ecac311d 100644
--- a/dali/operators/math/normalize/normalize.cc
+++ b/dali/operators/math/normalize/normalize.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -320,7 +320,7 @@ void Normalize<CPUBackend>::RunTyped(Workspace &ws) {
   if (batch_norm_) {
     if (ShouldCalcMean()) {
       for (int i = 0; i < nsamples; i++) {
-        tp.AddWork([&, i](int thread_idx) {
+        tp.AddTask([&, i](int thread_idx) {
           kernels::MeanCPU<float, InputType> mean;
           mean.Setup(mutable_mean[i], in_view[i], make_span(axes_));
           // Reset per-sample values, but don't postprocess
@@ -335,7 +335,7 @@ void Normalize<CPUBackend>::RunTyped(Workspace &ws) {
     if (ShouldCalcStdDev()) {
       auto sample_mean = mean_view[0];
       for (int i = 0; i < nsamples; i++) {
-        tp.AddWork([&, i](int thread_idx) {
+        tp.AddTask([&, i](int thread_idx) {
           kernels::VarianceCPU<float, InputType> stddev;
           stddev.Setup(mutable_stddev[i], in_view[i], make_span(axes_), sample_mean);
           // Reset per-sample values, but don't postprocess
@@ -353,7 +353,7 @@ void Normalize<CPUBackend>::RunTyped(Workspace &ws) {
 
 
   for (int i = 0; i < nsamples; i++) {
-    tp.AddWork([&, i](int thread_idx) {
+    tp.AddTask([&, i](int thread_idx) {
       auto sample_mean = mean_view.num_samples() == 1 || batch_norm_
                               ? mean_view[0]
                               : mean_view[i];
diff --git a/dali/operators/numba_function/numba_func.cc b/dali/operators/numba_function/numba_func.cc
index bb187e69f39..c9f26037047 100644
--- a/dali/operators/numba_function/numba_func.cc
+++ b/dali/operators/numba_function/numba_func.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -293,7 +293,7 @@ void NumbaFuncImpl<CPUBackend>::RunImpl(Workspace &ws) {
   auto out_shape = out.shape();
   auto &tp = ws.GetThreadPool();
   for (int sample_id = 0; sample_id < N; sample_id++) {
-    tp.AddWork([&, sample_id](int thread_id) {
+    tp.AddTask([&, sample_id](int thread_id) {
       SmallVector<uint64_t, 6> out_ptrs_per_sample;
       SmallVector<uint64_t, 6> out_shapes_per_sample;
       auto& out_shapes_ptrs = setup_fn_ ? output_shape_ptrs_ : input_shape_ptrs_;
diff --git a/dali/operators/python_function/dltensor_function.cc b/dali/operators/python_function/dltensor_function.cc
index f042a2cd0c6..cf7fe7c5ee9 100644
--- a/dali/operators/python_function/dltensor_function.cc
+++ b/dali/operators/python_function/dltensor_function.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -147,7 +147,7 @@ void CopyOutputData(TensorList<CPUBackend> &output, std::vector<DLMTensorPtr> &d
   auto &thread_pool = workspace.GetThreadPool();
   auto out_shape = output.shape();
   for (int i = 0; i < batch_size; ++i) {
-    thread_pool.AddWork([&, i](int) {
+    thread_pool.AddTask([&, i](int) {
       CopyDlTensor<CPUBackend>(output.raw_mutable_tensor(i), dl_tensors[i]);
     }, out_shape.tensor_size(i));
   }
diff --git a/dali/operators/random/rng_base_cpu.h b/dali/operators/random/rng_base_cpu.h
index a66a887e139..9f01cc7ae80 100644
--- a/dali/operators/random/rng_base_cpu.h
+++ b/dali/operators/random/rng_base_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -162,7 +162,7 @@ void RNGBase<Backend, Impl, IsNoiseGen>::RunImplTyped(Workspace &ws, CPUBackend)
     }
 
     if (total_p_count < kThreshold) {
-      tp.AddWork(
+      tp.AddTask(
         [=](int thread_id) {
           auto dist = use_default_dist ? Dist() : dists[sample_id];
           if (independent_channels) {
@@ -180,7 +180,7 @@ void RNGBase<Backend, Impl, IsNoiseGen>::RunImplTyped(Workspace &ws, CPUBackend)
         std::tie(p_offset, p_count) = get_chunk<T>(total_p_count, c, chunks);
         for (auto &s : seed)
           s = rng_[sample_id]();
-        tp.AddWork(
+        tp.AddTask(
           [=](int thread_id) {
             std::seed_seq seq(seed.begin(), seed.end());
             std::mt19937_64 chunk_rng(seq);
diff --git a/dali/operators/reader/nemo_asr_reader_op.cc b/dali/operators/reader/nemo_asr_reader_op.cc
index db50fcbfe70..b5249154514 100755
--- a/dali/operators/reader/nemo_asr_reader_op.cc
+++ b/dali/operators/reader/nemo_asr_reader_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -174,7 +174,7 @@ void NemoAsrReader::Prefetch() {
 
     const auto &audio_meta = sample.audio_meta();
     int64_t priority = audio_meta.length * audio_meta.channels;
-    thread_pool_.AddWork(
+    thread_pool_.AddTask(
       [audio, &sample](int tid) {
         sample.decode_audio(audio, tid);
       }, priority);
diff --git a/dali/operators/reader/numpy_reader_gpu_op.cc b/dali/operators/reader/numpy_reader_gpu_op.cc
index 3bc9b5cc31f..f5e208472d5 100644
--- a/dali/operators/reader/numpy_reader_gpu_op.cc
+++ b/dali/operators/reader/numpy_reader_gpu_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -62,7 +62,7 @@ void NumpyReaderGPU::Prefetch() {
   for (size_t data_idx = 0; data_idx < curr_batch.size(); ++data_idx) {
     // when padding, the last sample is duplicated so no need to redo the same work
     if (data_idx > 0 && curr_batch[data_idx -1 ] == curr_batch[data_idx]) continue;
-    thread_pool_.AddWork([this, &curr_batch, data_idx](int tid) {
+    thread_pool_.AddTask([this, &curr_batch, data_idx](int tid) {
         curr_batch[data_idx]->Reopen();
         curr_batch[data_idx]->ReadHeader(header_cache_);
       });
@@ -140,7 +140,7 @@ void NumpyReaderGPU::ScheduleChunkedRead(SampleView<GPUBackend> &out_sample,
     ssize_t copy_skip = copy_start - file_offset;
     ssize_t copy_end = file_offset + chunk_read_length;
     ssize_t chunk_copy_length = copy_end - copy_start;
-    thread_pool_.AddWork([=, &load_target](int tid) {
+    thread_pool_.AddTask([=, &load_target](int tid) {
       assert(chunk_read_length <= static_cast<ssize_t>(staging_.chunk_size()));
       auto buffer = staging_.get_staging_buffer();
       load_target.ReadRawChunk(buffer.at(0), chunk_read_length, 0, file_offset);
diff --git a/dali/operators/reader/numpy_reader_op.cc b/dali/operators/reader/numpy_reader_op.cc
index 3d863c40b53..7b976e8d2bd 100644
--- a/dali/operators/reader/numpy_reader_op.cc
+++ b/dali/operators/reader/numpy_reader_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ static void CopyHelper(SampleView<CPUBackend> output, ConstSampleView<CPUBackend
   auto nelements = input.shape().num_elements();
   auto nbytes = nelements * TypeTable::GetTypeInfo(input.type()).size();
   if (nelements <= min_blk_sz) {
-    thread_pool.AddWork([=](int tid) {
+    thread_pool.AddTask([=](int tid) {
       std::memcpy(out_ptr, in_ptr, nbytes);
     }, nelements);
   } else {
@@ -40,7 +40,7 @@ static void CopyHelper(SampleView<CPUBackend> output, ConstSampleView<CPUBackend
       int64_t b_start = prev_b_start;
       int64_t b_end = prev_b_start = nbytes * (b + 1) / req_nblocks;
       int64_t b_size = b_end - b_start;
-      thread_pool.AddWork([=](int tid) {
+      thread_pool.AddTask([=](int tid) {
         std::memcpy(out_ptr + b_start, in_ptr + b_start, b_size);
       }, b_size);
     }
@@ -250,7 +250,7 @@ void NumpyReaderCPU::RunImpl(Workspace &ws) {
                   blocks_per_sample);
     } else if (need_transpose_[i]) {
       // TODO(janton): Parallelize when Transpose supports tiling
-      thread_pool.AddWork([&, i, input_sample](int tid) {
+      thread_pool.AddTask([&, i, input_sample](int tid) {
         numpy::FromFortranOrder(output[i], input_sample);
       }, sample_sz * 8);  // 8 x (heuristic)
     } else {
diff --git a/dali/operators/reader/webdataset_reader_op.cc b/dali/operators/reader/webdataset_reader_op.cc
index 9163a7c73bb..751ce208c23 100644
--- a/dali/operators/reader/webdataset_reader_op.cc
+++ b/dali/operators/reader/webdataset_reader_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ void WebdatasetReader::RunImpl(Workspace &ws) {
                     sample[output_idx].nbytes());
       };
       if (threaded) {
-        ws.GetThreadPool().AddWork(std::move(copy_task), -data_idx);
+        ws.GetThreadPool().AddTask(std::move(copy_task), -data_idx);
       } else {
         copy_task(0);
       }
diff --git a/dali/operators/segmentation/random_mask_pixel.cc b/dali/operators/segmentation/random_mask_pixel.cc
index 36b7082d1f9..b1a53e54154 100644
--- a/dali/operators/segmentation/random_mask_pixel.cc
+++ b/dali/operators/segmentation/random_mask_pixel.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -123,7 +123,7 @@ void RandomMaskPixelCPU::RunImplTyped(Workspace &ws) {
   rle_.resize(thread_pool.NumThreads());
 
   for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
-    thread_pool.AddWork(
+    thread_pool.AddTask(
       [&, sample_idx](int thread_id) {
         auto &rng = rngs_[sample_idx];
         auto mask = masks_view[sample_idx];
diff --git a/dali/operators/segmentation/random_object_bbox.cc b/dali/operators/segmentation/random_object_bbox.cc
index 6152e6e94f9..6019bdddffa 100644
--- a/dali/operators/segmentation/random_object_bbox.cc
+++ b/dali/operators/segmentation/random_object_bbox.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -189,7 +189,7 @@ void RandomObjectBBox::SampleContext<BlobLabel>::FindLabels(const InTensorCPU<T>
   for (int i = 0; i < num_chunks; i++) {
     int64_t start = N * i / num_chunks;
     int64_t end = N * (i + 1)  / num_chunks;
-    thread_pool->AddWork([=](int) {
+    thread_pool->AddTask([=](int) {
       auto &lbl = tmp_labels[i];
       lbl.clear();
       detail::FindLabels(lbl, data + start, end - start);
@@ -227,7 +227,7 @@ void FilterByLabel(ThreadPool *tp,
       auto *out_start = out.data + start;
       auto *in_start = in.data + start;
       int64_t n = end - start;
-      tp->AddWork([=](int) {
+      tp->AddTask([=](int) {
         FilterByLabel(out_start, in_start, n, label);
       });
     }
diff --git a/dali/operators/sequence/sequence_rearrange.cc b/dali/operators/sequence/sequence_rearrange.cc
index 7ea28044160..1b71f28d15a 100644
--- a/dali/operators/sequence/sequence_rearrange.cc
+++ b/dali/operators/sequence/sequence_rearrange.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -77,7 +77,7 @@ void SequenceRearrange<CPUBackend>::RunImpl(Workspace &ws) {
   auto curr_batch_size = ws.GetInputBatchSize(0);
 
   for (int sample_idx = 0; sample_idx < curr_batch_size; ++sample_idx) {
-    thread_pool.AddWork([this, &ws, &input, &output, sample_idx](int tid) {
+    thread_pool.AddTask([this, &ws, &input, &output, sample_idx](int tid) {
       const TypeInfo &type = input.type_info();
       const auto *in_sample = reinterpret_cast<const char *>(input.raw_tensor(sample_idx));
       auto *out_sample = reinterpret_cast<char *>(output.raw_mutable_tensor(sample_idx));
diff --git a/dali/operators/signal/decibel/to_decibels_op_cpu.cc b/dali/operators/signal/decibel/to_decibels_op_cpu.cc
index a28e05e4be1..e45a346824f 100644
--- a/dali/operators/signal/decibel/to_decibels_op_cpu.cc
+++ b/dali/operators/signal/decibel/to_decibels_op_cpu.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -81,7 +81,7 @@ void ToDecibels<CPUBackend>::RunImpl(Workspace &ws) {
   TYPE_SWITCH(input.type(), type2id, T, (float), (
     using ToDbKernel = kernels::signal::ToDecibelsCpu<T>;
     for (int i = 0; i < input.shape().num_samples(); i++) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
         [this, &input, &output, i](int thread_id) {
           kernels::KernelContext ctx;
           auto in_view = view<const T>(input[i]);
diff --git a/dali/operators/signal/fft/power_spectrum.cc b/dali/operators/signal/fft/power_spectrum.cc
index fd5c7281b83..87e20a31eab 100644
--- a/dali/operators/signal/fft/power_spectrum.cc
+++ b/dali/operators/signal/fft/power_spectrum.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -95,7 +95,7 @@ void PowerSpectrum<CPUBackend>::RunImpl(Workspace &ws) {
     using FftKernel = kernels::signal::fft::Fft1DCpu<OutputType, InputType, Dims>;
 
     for (int i = 0; i < input.shape().num_samples(); i++) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
         [this, &input, &output, i](int thread_id) {
           kernels::KernelContext ctx;
           auto in_view = view<const InputType, Dims>(input[i]);
diff --git a/dali/operators/signal/fft/spectrogram.cc b/dali/operators/signal/fft/spectrogram.cc
index 157154f6d54..2f08b7d19c3 100644
--- a/dali/operators/signal/fft/spectrogram.cc
+++ b/dali/operators/signal/fft/spectrogram.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -261,7 +261,7 @@ void SpectrogramImplCpu<time_major>::RunImpl(Workspace &ws) {
   output.SetLayout(layout_);
 
   for (int i = 0; i < nsamples; i++) {
-    thread_pool.AddWork(
+    thread_pool.AddTask(
       [this, &input, &output, view_window_fn, i](int thread_id) {
         kernels::KernelContext ctx;
 
diff --git a/dali/pipeline/operator/builtin/conditional/merge.cc b/dali/pipeline/operator/builtin/conditional/merge.cc
index 94539aa9ff0..a2ae8689393 100644
--- a/dali/pipeline/operator/builtin/conditional/merge.cc
+++ b/dali/pipeline/operator/builtin/conditional/merge.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -145,7 +145,7 @@ void Merge<CPUBackend>::CopySampleToOutput(TensorList<CPUBackend> &output, int o
                                            const TensorList<CPUBackend> &input,
                                            int input_sample_idx, Workspace &ws) {
   auto &tp = ws.GetThreadPool();
-  tp.AddWork(
+  tp.AddTask(
       [&output, &input, output_sample_idx, input_sample_idx](int thread_idx) {
         output.ResizeSample(output_sample_idx, input.shape()[input_sample_idx]);
         output.CopySample(output_sample_idx, input, input_sample_idx, output.order());
diff --git a/dali/pipeline/operator/builtin/input_operator.cc b/dali/pipeline/operator/builtin/input_operator.cc
index 1e7c9167d37..c3b65abce60 100644
--- a/dali/pipeline/operator/builtin/input_operator.cc
+++ b/dali/pipeline/operator/builtin/input_operator.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -69,7 +69,7 @@ void InputOperator<CPUBackend>::ForwardCurrentData(TensorList<CPUBackend> &targe
     target.SetLayout(tensor_list_elm.front()->GetLayout());
 
     for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
               [&target, sample_id, &tensor_list_elm](int tid) {
                   target.CopySample(sample_id, *tensor_list_elm.front(), sample_id,
                                     AccessOrder::host());
@@ -168,7 +168,7 @@ void InputOperator<MixedBackend>::ForwardCurrentData(TensorList<CPUBackend> &tar
     target.SetLayout(tensor_list_elm.front()->GetLayout());
 
     for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
               [&target, sample_id, &tensor_list_elm](int tid) {
                   target.CopySample(sample_id, *tensor_list_elm.front(), sample_id,
                                     AccessOrder::host());
diff --git a/dali/pipeline/operator/builtin/make_contiguous.cc b/dali/pipeline/operator/builtin/make_contiguous.cc
index 7613cc61bf6..acfd1602fe1 100644
--- a/dali/pipeline/operator/builtin/make_contiguous.cc
+++ b/dali/pipeline/operator/builtin/make_contiguous.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ void MakeContiguousCPU::RunImpl(Workspace &ws) {
 
     auto &thread_pool = ws.GetThreadPool();
     for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
-      thread_pool.AddWork(
+      thread_pool.AddTask(
           [sample_id, &input, &output](int tid) {
             output.CopySample(sample_id, input, sample_id, AccessOrder::host());
           },
diff --git a/dali/pipeline/operator/operator.h b/dali/pipeline/operator/operator.h
index 4b00034857e..2b4d81dd13c 100644
--- a/dali/pipeline/operator/operator.h
+++ b/dali/pipeline/operator/operator.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -284,7 +284,7 @@ class Operator<CPUBackend> : public OperatorBase {
     }
     auto &thread_pool = ws.GetThreadPool();
     for (int data_idx = 0; data_idx < curr_batch_size; ++data_idx) {
-      thread_pool.AddWork([this, &ws, data_idx](int tid) {
+      thread_pool.AddTask([this, &ws, data_idx](int tid) {
         SampleWorkspace sample;
         MakeSampleView(sample, ws, data_idx, tid);
         this->SetupSharedSampleParams(sample);
diff --git a/dali/pipeline/util/for_each_thread.h b/dali/pipeline/util/for_each_thread.h
index cfe319129ca..711ac581089 100644
--- a/dali/pipeline/util/for_each_thread.h
+++ b/dali/pipeline/util/for_each_thread.h
@@ -44,7 +44,7 @@ void ForEachThread(ThreadPool &tp, Func &&func) {
   int n = tp.NumThreads();
   std::atomic_int pending{n};
   for (int i = 0; i < n; i++) {
-    tp.AddWork([&](int tid) {
+    tp.AddTask([&](int tid) {
       std::exception_ptr err{nullptr};
       try {
         func(tid);
diff --git a/dali/pipeline/util/thread_pool.cc b/dali/pipeline/util/thread_pool.cc
index c6d4243fdbf..a3d0448f300 100644
--- a/dali/pipeline/util/thread_pool.cc
+++ b/dali/pipeline/util/thread_pool.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -59,7 +59,7 @@ ThreadPool::~ThreadPool() {
 #endif
 }
 
-void ThreadPool::AddWork(Work work, int64_t priority, bool start_immediately) {
+void ThreadPool::AddTask(Work work, int64_t priority, bool start_immediately) {
   bool started_before = false;
   {
     std::lock_guard<std::mutex> lock(mutex_);
diff --git a/dali/pipeline/util/thread_pool.h b/dali/pipeline/util/thread_pool.h
index e5e78685c59..903e61716ae 100644
--- a/dali/pipeline/util/thread_pool.h
+++ b/dali/pipeline/util/thread_pool.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -45,11 +45,11 @@ class DLL_PUBLIC ThreadPool {
    * @brief Adds work to the queue with optional priority, and optionally starts processing
    *
    * The jobs are queued but the workers don't pick up the work unless they have
-   * already been started by a previous call to AddWork with start_immediately = true or RunAll.
+   * already been started by a previous call to AddTask with start_immediately = true or RunAll.
    * Once work is started, the threads will continue to pick up whatever work is scheduled
    * until WaitForWork is called.
    */
-  DLL_PUBLIC void AddWork(Work work, int64_t priority = 0, bool start_immediately = false);
+  DLL_PUBLIC void AddTask(Work work, int64_t priority = 0, bool start_immediately = false);
 
   /**
    * @brief Wakes up all the threads to complete all the queued work,
diff --git a/dali/pipeline/util/thread_pool_test.cc b/dali/pipeline/util/thread_pool_test.cc
index eb238e3b08b..79cb07d619f 100644
--- a/dali/pipeline/util/thread_pool_test.cc
+++ b/dali/pipeline/util/thread_pool_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -20,30 +20,30 @@ namespace dali {
 
 namespace test {
 
-TEST(ThreadPool, AddWork) {
+TEST(ThreadPool, AddTask) {
   ThreadPool tp(16, 0, false, "ThreadPool test");
   std::atomic<int> count{0};
   auto increase = [&count](int thread_id) { count++; };
   for (int i = 0; i < 64; i++) {
-    tp.AddWork(increase);
+    tp.AddTask(increase);
   }
   ASSERT_EQ(count, 0);
   tp.RunAll();
   ASSERT_EQ(count, 64);
 }
 
-TEST(ThreadPool, AddWorkImmediateStart) {
+TEST(ThreadPool, AddTaskImmediateStart) {
   ThreadPool tp(16, 0, false, "ThreadPool test");
   std::atomic<int> count{0};
   auto increase = [&count](int thread_id) { count++; };
   for (int i = 0; i < 64; i++) {
-    tp.AddWork(increase, 0, true);
+    tp.AddTask(increase, 0, true);
   }
   tp.WaitForWork();
   ASSERT_EQ(count, 64);
 }
 
-TEST(ThreadPool, AddWorkWithPriority) {
+TEST(ThreadPool, AddTaskWithPriority) {
   // only one thread to ensure deterministic behavior
   ThreadPool tp(1, 0, false, "ThreadPool test");
   std::atomic<int> count{0};
@@ -57,12 +57,12 @@ TEST(ThreadPool, AddWorkWithPriority) {
     int val = count.load();
     while (!count.compare_exchange_weak(val, val * 2)) {}
   };
-  tp.AddWork(increase_by_1, 2);
-  tp.AddWork(mult_by_2, 7);
-  tp.AddWork(mult_by_2, 9);
-  tp.AddWork(mult_by_2, 8);
-  tp.AddWork(increase_by_1, 100);
-  tp.AddWork(set_to_1, 1000);
+  tp.AddTask(increase_by_1, 2);
+  tp.AddTask(mult_by_2, 7);
+  tp.AddTask(mult_by_2, 9);
+  tp.AddTask(mult_by_2, 8);
+  tp.AddTask(increase_by_1, 100);
+  tp.AddTask(set_to_1, 1000);
 
   tp.RunAll();
   ASSERT_EQ(((1+1) << 3) + 1, count);
@@ -79,7 +79,7 @@ TEST(ThreadPool, CheckName) {
   auto set_name = [&read_thread_pool_name](int thread_id) {
     pthread_getname_np(pthread_self(), read_thread_pool_name, sizeof(read_thread_pool_name));
   };
-  tp.AddWork(set_name, 1);
+  tp.AddTask(set_name, 1);
 
   tp.RunAll();
   // skip terminating \0 character
diff --git a/dali/test/plugins/dummy/dummy.cc b/dali/test/plugins/dummy/dummy.cc
index fe3febd4639..4e8ee4f801e 100644
--- a/dali/test/plugins/dummy/dummy.cc
+++ b/dali/test/plugins/dummy/dummy.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@ void Dummy<::dali::CPUBackend>::RunImpl(::dali::Workspace &ws) {
   auto &tp = ws.GetThreadPool();
   const auto &in_shape = input.shape();
   for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) {
-    tp.AddWork(
+    tp.AddTask(
         [&, sample_id](int thread_id) {
           type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(output.raw_mutable_tensor(sample_id),
                                                             input.raw_tensor(sample_id),
diff --git a/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb b/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb
index 2e314c91fda..7082470a896 100644
--- a/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb
+++ b/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb
@@ -128,7 +128,7 @@
       "  auto &tp = ws.GetThreadPool();\r\n",
       "  const auto &in_shape = input.shape();\r\n",
       "  for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) {\r\n",
-      "    tp.AddWork(\r\n",
+      "    tp.AddTask(\r\n",
       "        [&, sample_id](int thread_id) {\r\n",
       "          type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(output.raw_mutable_tensor(sample_id),\r\n",
       "                                                            input.raw_tensor(sample_id),\r\n",
diff --git a/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc b/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc
index a2a929a840b..499f6013167 100644
--- a/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc
+++ b/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc
@@ -11,7 +11,7 @@ void Dummy<::dali::CPUBackend>::RunImpl(::dali::Workspace &ws) {
   auto &tp = ws.GetThreadPool();
   const auto &in_shape = input.shape();
   for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) {
-    tp.AddWork(
+    tp.AddTask(
         [&, sample_id](int thread_id) {
           type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(output.raw_mutable_tensor(sample_id),
                                                             input.raw_tensor(sample_id),
diff --git a/include/dali/core/exec/engine.h b/include/dali/core/exec/engine.h
index a7d90c36d49..9857159e7de 100644
--- a/include/dali/core/exec/engine.h
+++ b/include/dali/core/exec/engine.h
@@ -26,7 +26,7 @@ concept ExecutionEngine {
   /// @param priority    priority hint for the job, the higher, the earlier it should start
   /// @param start_immediately        if true, all jobs can start - it's just a hint
   ///                                 and implementations may start running the jobs earlier
-  void AddWork(CallableWithInt f, int64_t priority, bool start_immediately = false);
+  void AddTask(CallableWithInt f, int64_t priority, bool start_immediately = false);
 
   /// @brief Starts the work and waits for it to complete.
   /// If there was an exception in one of the jobs, rethrows one of them.
@@ -46,7 +46,7 @@ class SequentialExecutionEngine {
    * @brief Immediately execute a callable object `f` with thread index 0.
    */
   template <typename FunctionLike>
-  void AddWork(FunctionLike &&f, int64_t priority = 0) {
+  void AddTask(FunctionLike &&f, int64_t priority = 0) {
     const int idx = 0;  // use of 0 literal would successfully call f expecting a pointer
     f(idx);
   }
diff --git a/include/dali/core/exec/thread_pool_base.h b/include/dali/core/exec/thread_pool_base.h
index 6ac8177206c..45e6c8953c8 100644
--- a/include/dali/core/exec/thread_pool_base.h
+++ b/include/dali/core/exec/thread_pool_base.h
@@ -43,15 +43,17 @@ class DLL_PUBLIC Job {
   using priority_t = int64_t;
 
   template <typename Runnable>
-  std::enable_if_t<std::is_convertible_v<Runnable, std::function<void()>>>
-  AddTask(Runnable &&runnable, priority_t priority = {}) {
+  void AddTask(Runnable &&runnable, priority_t priority = {}) {
     if (started_)
       throw std::logic_error("This job has already been started - cannot add more tasks to it");
     auto it = tasks_.emplace(priority, Task());
     try {
-      it->second.func = [this, task = &it->second, f = std::move(runnable)]() noexcept {
+      it->second.func = [this, task = &it->second, f = std::move(runnable)](int tid) noexcept {
         try {
-          f();
+          if constexpr (std::is_invocable_v<Runnable, int>)
+            f(tid);
+          else
+            f();
         } catch (...) {
           task->error = std::current_exception();
         }
@@ -93,7 +95,7 @@ class DLL_PUBLIC Job {
   bool waited_for_ = false;
 
   struct Task {
-    std::function<void()> func;
+    std::function<void(int)> func;
     std::exception_ptr error;
   };
 
@@ -104,7 +106,7 @@ class DLL_PUBLIC Job {
 
 class DLL_PUBLIC ThreadPoolBase {
  public:
-  using TaskFunc = std::function<void()>;
+  using TaskFunc = std::function<void(int)>;
 
   ThreadPoolBase() = default;
   explicit ThreadPoolBase(int num_threads) {
@@ -119,6 +121,8 @@ class DLL_PUBLIC ThreadPoolBase {
 
   void AddTask(TaskFunc f);
 
+  int NumThreads() const { return threads_.size(); }
+
   /**
    * @brief Returns the thread pool that owns the calling thread (or nullptr)
    */
@@ -135,9 +139,12 @@ class DLL_PUBLIC ThreadPoolBase {
     return this_thread_idx_;
   }
 
- protected:
   void Shutdown();
 
+  bool ShutdownPending() const {
+    return shutdown_pending_;
+  }
+
  private:
   friend class Job;
 
@@ -162,16 +169,15 @@ class DLL_PUBLIC ThreadPoolBase {
 };
 
 
-template <typename ThreadPool>
 class ThreadedExecutionengine {
  public:
-  ThreadedExecutionengine(ThreadPool &tp) : tp_(tp) {}  // NOLINT
+  ThreadedExecutionengine(ThreadPoolBase &tp) : tp_(tp) {}  // NOLINT
 
   /**
    * @brief Immediately execute a callable object `f` with thread index 0.
    */
   template <typename FunctionLike>
-  void AddWork(FunctionLike &&f, int64_t priority = 0) {
+  void AddTask(FunctionLike &&f, int64_t priority = 0) {
     job_.AddTask(std::forward<FunctionLike>(f), priority);
   }
 
@@ -183,12 +189,12 @@ class ThreadedExecutionengine {
     return tp_.NumThreads();
   }
 
-  ThreadPool &GetThreadPool() const noexcept {
+  ThreadPoolBase &GetThreadPool() const noexcept {
     return tp_;
   }
 
  private:
-  ThreadPool &tp_;
+  ThreadPoolBase &tp_;
   Job job_;
 };