From c1c5ca76049b39128a44f97fadb19dd5de8a268c Mon Sep 17 00:00:00 2001 From: Michal Zientkiewicz Date: Mon, 27 Feb 2023 16:56:39 +0100 Subject: [PATCH] Rename AddWork to AddTask. Signed-off-by: Michal Zientkiewicz --- dali/benchmark/thread_pool_bench.cc | 14 ++++----- dali/core/exec/thread_pool_base.cc | 2 +- dali/core/exec/thread_pool_base_test.cc | 5 ++-- .../imgcodec/decoders/decoder_parallel_impl.h | 6 ++-- .../nvjpeg_lossless/nvjpeg_lossless.cc | 2 +- dali/imgcodec/future_decode_result_test.cc | 6 ++-- dali/imgcodec/tools/imagemagick_test.cc | 4 +-- dali/kernels/common/scatter_gather.h | 4 +-- .../imgproc/structure/connected_components.h | 10 +++---- dali/kernels/imgproc/structure/label_bbox.h | 4 +-- dali/kernels/slice/slice_cpu.h | 8 ++--- .../slice_flip_normalize_permute_pad_cpu.h | 6 ++-- .../audio/mel_scale/mel_filter_bank.cc | 4 +-- dali/operators/audio/mfcc/mfcc.cc | 4 +-- dali/operators/audio/nonsilence_op.cc | 4 +-- dali/operators/audio/preemphasis_filter_op.cc | 4 +-- dali/operators/audio/resample.cc | 4 +-- dali/operators/bbox/bb_flip.cc | 4 +-- .../decoder/audio/audio_decoder_op.cc | 4 +-- .../nvjpeg/nvjpeg_decoder_decoupled_api.h | 12 ++++---- .../decoder/video/video_decoder_cpu.cc | 6 ++-- .../decoder/video/video_decoder_mixed.cc | 6 ++-- dali/operators/generic/cast.cc | 4 +-- dali/operators/generic/erase/erase.cc | 4 +-- dali/operators/generic/join.cc | 2 +- dali/operators/generic/lookup_table.cc | 4 +-- dali/operators/generic/one_hot.cc | 4 +-- dali/operators/generic/pad.cc | 4 +-- dali/operators/generic/permute_batch.cc | 4 +-- dali/operators/generic/reduce/reduce.h | 2 +- .../generic/reduce/reduce_with_mean_input.h | 2 +- dali/operators/generic/transpose/transpose.cc | 4 +-- dali/operators/geometry/coord_flip.cc | 4 +-- dali/operators/geometry/coord_transform.cc | 4 +-- .../image/color/brightness_contrast.cc | 4 +-- .../image/color/color_space_conversion.cc | 4 +-- dali/operators/image/color/color_twist.cc | 4 +-- .../image/convolution/gaussian_blur.cc | 4 +-- dali/operators/image/convolution/laplacian.cc | 4 +-- dali/operators/image/crop/bbox_crop.cc | 4 +-- .../jpeg_compression_distortion_op_cpu.cc | 4 +-- dali/operators/image/mask/grid_mask.cc | 4 +-- dali/operators/image/paste/multipaste.cc | 6 ++-- .../image/peek_shape/peek_image_shape.h | 4 +-- .../remap/displacement_filter_impl_cpu.h | 4 +-- dali/operators/image/remap/warp.h | 4 +-- .../image/resize/resize_op_impl_cpu.h | 4 +-- dali/operators/imgcodec/decoder.h | 4 +-- dali/operators/imgcodec/peek_image_shape.cc | 4 +-- dali/operators/math/expressions/arithmetic.cc | 4 +-- dali/operators/math/normalize/normalize.cc | 8 ++--- dali/operators/numba_function/numba_func.cc | 4 +-- .../python_function/dltensor_function.cc | 4 +-- dali/operators/random/rng_base_cpu.h | 6 ++-- dali/operators/reader/nemo_asr_reader_op.cc | 4 +-- dali/operators/reader/numpy_reader_gpu_op.cc | 6 ++-- dali/operators/reader/numpy_reader_op.cc | 8 ++--- dali/operators/reader/webdataset_reader_op.cc | 4 +-- .../segmentation/random_mask_pixel.cc | 4 +-- .../segmentation/random_object_bbox.cc | 6 ++-- dali/operators/sequence/sequence_rearrange.cc | 4 +-- .../signal/decibel/to_decibels_op_cpu.cc | 4 +-- dali/operators/signal/fft/power_spectrum.cc | 4 +-- dali/operators/signal/fft/spectrogram.cc | 4 +-- .../operator/builtin/conditional/merge.cc | 4 +-- .../operator/builtin/input_operator.cc | 6 ++-- .../operator/builtin/make_contiguous.cc | 4 +-- dali/pipeline/operator/operator.h | 4 +-- dali/pipeline/util/for_each_thread.h | 2 +- dali/pipeline/util/thread_pool.cc | 4 +-- dali/pipeline/util/thread_pool.h | 6 ++-- dali/pipeline/util/thread_pool_test.cc | 26 ++++++++-------- dali/test/plugins/dummy/dummy.cc | 4 +-- .../create_a_custom_operator.ipynb | 2 +- .../custom_operator/customdummy/dummy.cc | 2 +- include/dali/core/exec/engine.h | 4 +-- include/dali/core/exec/thread_pool_base.h | 30 +++++++++++-------- 77 files changed, 203 insertions(+), 196 deletions(-) diff --git a/dali/benchmark/thread_pool_bench.cc b/dali/benchmark/thread_pool_bench.cc index daa3941f75f..c68c2a4fd0c 100644 --- a/dali/benchmark/thread_pool_bench.cc +++ b/dali/benchmark/thread_pool_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ static void ThreadPoolArgs(benchmark::internal::Benchmark *b) { b->Args({batch_size, work_size_min, work_size_max, nthreads}); } -BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) { +BENCHMARK_DEFINE_F(ThreadPoolBench, AddTask)(benchmark::State& st) { int batch_size = st.range(0); int work_size_min = st.range(1); int work_size_max = st.range(2); @@ -40,7 +40,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) { while (st.KeepRunning()) { for (int i = 0; i < batch_size; i++) { auto size = this->RandInt(work_size_min, work_size_max); - thread_pool.AddWork( + thread_pool.AddTask( [&data, size, &total_count](int thread_id){ std::vector other_data; for (int i = 0; i < size; i++) { @@ -59,13 +59,13 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWork)(benchmark::State& st) { std::cout << total_count << std::endl; } -BENCHMARK_REGISTER_F(ThreadPoolBench, AddWork)->Iterations(1000) +BENCHMARK_REGISTER_F(ThreadPoolBench, AddTask)->Iterations(1000) ->Unit(benchmark::kMicrosecond) ->UseRealTime() ->Apply(ThreadPoolArgs); -BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) { +BENCHMARK_DEFINE_F(ThreadPoolBench, AddTaskDeferred)(benchmark::State& st) { int batch_size = st.range(0); int work_size_min = st.range(1); int work_size_max = st.range(2); @@ -78,7 +78,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) { while (st.KeepRunning()) { for (int i = 0; i < batch_size; i++) { auto size = this->RandInt(work_size_min, work_size_max); - thread_pool.AddWork( + thread_pool.AddTask( [&data, size, &total_count](int thread_id){ std::vector other_data; for (int i = 0; i < size; i++) { @@ -98,7 +98,7 @@ BENCHMARK_DEFINE_F(ThreadPoolBench, AddWorkDeferred)(benchmark::State& st) { } -BENCHMARK_REGISTER_F(ThreadPoolBench, AddWorkDeferred)->Iterations(1000) +BENCHMARK_REGISTER_F(ThreadPoolBench, AddTaskDeferred)->Iterations(1000) ->Unit(benchmark::kMicrosecond) ->UseRealTime() ->Apply(ThreadPoolArgs); diff --git a/dali/core/exec/thread_pool_base.cc b/dali/core/exec/thread_pool_base.cc index b47b204d7f4..affe1d3a5ac 100644 --- a/dali/core/exec/thread_pool_base.cc +++ b/dali/core/exec/thread_pool_base.cc @@ -125,7 +125,7 @@ void ThreadPoolBase::PopAndRunTask(std::unique_lock &lock) { TaskFunc t = std::move(tasks_.front()); tasks_.pop(); lock.unlock(); - t(); + t(this_thread_idx()); lock.lock(); } diff --git a/dali/core/exec/thread_pool_base_test.cc b/dali/core/exec/thread_pool_base_test.cc index 91c22fa3070..d001f7a887f 100644 --- a/dali/core/exec/thread_pool_base_test.cc +++ b/dali/core/exec/thread_pool_base_test.cc @@ -20,9 +20,10 @@ namespace dali { struct SerialExecutor { template - std::enable_if_t>> + std::enable_if_t>> AddTask(Runnable &&runnable) { - runnable(); + const int idx = 0; + runnable(idx); } }; diff --git a/dali/imgcodec/decoders/decoder_parallel_impl.h b/dali/imgcodec/decoders/decoder_parallel_impl.h index 11cf6ae31a6..0de5b49e42b 100644 --- a/dali/imgcodec/decoders/decoder_parallel_impl.h +++ b/dali/imgcodec/decoders/decoder_parallel_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -104,7 +104,7 @@ class DLL_PUBLIC BatchParallelDecoderImpl : public ImageDecoderImpl { ROI no_roi; for (int i = 0; i < in.size(); i++) { auto roi = rois.empty() ? no_roi : rois[i]; - ctx.tp->AddWork([=, out = out[i], in = in[i]](int tid) mutable { + ctx.tp->AddTask([=, out = out[i], in = in[i]](int tid) mutable { try { promise.set(i, DecodeImplTask(tid, out, in, opts, roi)); } catch (...) { @@ -128,7 +128,7 @@ class DLL_PUBLIC BatchParallelDecoderImpl : public ImageDecoderImpl { ROI no_roi; for (int i = 0; i < in.size(); i++) { auto roi = rois.empty() ? no_roi : rois[i]; - ctx.tp->AddWork([=, out = out[i], in = in[i]](int tid) mutable { + ctx.tp->AddTask([=, out = out[i], in = in[i]](int tid) mutable { try { promise.set(i, DecodeImplTask(tid, ctx.stream, out, in, opts, roi)); } catch (...) { diff --git a/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc b/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc index b2ecc666cd2..f89a2429a95 100644 --- a/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc +++ b/dali/imgcodec/decoders/nvjpeg_lossless/nvjpeg_lossless.cc @@ -99,7 +99,7 @@ void NvJpegLosslessDecoderInstance::Parse(DecodeResultsPromise &promise, DecodeResultsPromise parse_promise(nsamples); for (int i = 0; i < nsamples; i++) { int tid = 0; - ctx.tp->AddWork( + ctx.tp->AddTask( [&, i](int tid) { auto &jpeg_stream = per_thread_resources_[tid].jpeg_stream; auto *sample = in[i]; diff --git a/dali/imgcodec/future_decode_result_test.cc b/dali/imgcodec/future_decode_result_test.cc index 6635e2b1b1d..3fdada471ee 100644 --- a/dali/imgcodec/future_decode_result_test.cc +++ b/dali/imgcodec/future_decode_result_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ TEST(FutureDecodeResultsTest, WaitNew) { DecodeResultsPromise pro(3); auto fut = pro.get_future(); - tp.AddWork([pro](int tidx) mutable { + tp.AddTask([pro](int tidx) mutable { std::this_thread::sleep_for(std::chrono::milliseconds(10)); pro.set(1, DecodeResult::Success()); pro.set(0, DecodeResult::Success()); @@ -59,7 +59,7 @@ TEST(FutureDecodeResultsTest, Benchmark) { auto start = std::chrono::high_resolution_clock::now(); for (int iter = 0; iter < num_iter; iter++) { DecodeResultsPromise res(100); - tp.AddWork([&](int tidx) { + tp.AddTask([&](int tidx) { for (int i = 0; i < res.num_samples(); i++) res.set(i, DecodeResult::Success()); }, 0, true); diff --git a/dali/imgcodec/tools/imagemagick_test.cc b/dali/imgcodec/tools/imagemagick_test.cc index 2ad3a3fa0e2..f84c92028df 100644 --- a/dali/imgcodec/tools/imagemagick_test.cc +++ b/dali/imgcodec/tools/imagemagick_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -205,7 +205,7 @@ void run(Env &env) { while (directory_it != fs::end(directory_it)) { auto batch = get_batch(env, directory_it); - pool.AddWork([=, &env](int tid){ + pool.AddTask([=, &env](int tid){ process(env, batch); }); } diff --git a/dali/kernels/common/scatter_gather.h b/dali/kernels/common/scatter_gather.h index ea3f3f08134..72adbb6ac70 100644 --- a/dali/kernels/common/scatter_gather.h +++ b/dali/kernels/common/scatter_gather.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -170,7 +170,7 @@ class DLL_PUBLIC ScatterGatherCPU : public ScatterGatherBase { } else { MakeBlocks(exec_engine.NumThreads() * kTasksMultiplier); for (auto &r : blocks_) { - exec_engine.AddWork([=](int thread_id) { std::memcpy(r.dst, r.src, r.size); }, r.size); + exec_engine.AddTask([=](int thread_id) { std::memcpy(r.dst, r.src, r.size); }, r.size); } exec_engine.RunAll(); } diff --git a/dali/kernels/imgproc/structure/connected_components.h b/dali/kernels/imgproc/structure/connected_components.h index 739a995dddc..9ee6f9cf630 100644 --- a/dali/kernels/imgproc/structure/connected_components.h +++ b/dali/kernels/imgproc/structure/connected_components.h @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -208,7 +208,7 @@ void LabelSlice(OutLabel *label_base, for (int64_t i = 0; i < n; i++) { auto out_slice = out.slice(i); auto in_slice = in.slice(i); - engine.AddWork([=, &seq_engn](int){ + engine.AddTask([=, &seq_engn](int){ LabelSlice(label_base, out_slice, in_slice, background, seq_engn); }); } @@ -220,7 +220,7 @@ void LabelSlice(OutLabel *label_base, auto in_slice = in.slice(i); auto prev_out = out.slice(i-1); auto prev_in = in.slice(i-1); - engine.AddWork([=](int){ + engine.AddTask([=](int){ MergeSlices(label_base, prev_out, out_slice, prev_in, in_slice); }); } @@ -313,7 +313,7 @@ int64_t CompactLabels(OutLabel *labels, int64_t chunk_start = volume * chunk / num_chunks; int64_t chunk_end = volume * (chunk + 1) / num_chunks; - engine.AddWork([=, &tmp_sets, &lock](int thread) { + engine.AddTask([=, &tmp_sets, &lock](int thread) { OutLabel prev = old_bg_label; OutLabel remapped = old_bg_label; for (int64_t i = chunk_start; i < chunk_end; i++) { @@ -354,7 +354,7 @@ int64_t CompactLabels(OutLabel *labels, for (int chunk = 0; chunk < num_chunks; chunk++) { int64_t chunk_start = volume * chunk / num_chunks; int64_t chunk_end = volume * (chunk + 1) / num_chunks; - engine.AddWork([=, &label_map](int) { + engine.AddTask([=, &label_map](int) { RemapChunk(make_span(labels + chunk_start, chunk_end - chunk_start), label_map); }); } diff --git a/dali/kernels/imgproc/structure/label_bbox.h b/dali/kernels/imgproc/structure/label_bbox.h index 0776b256ab4..0db405bba74 100644 --- a/dali/kernels/imgproc/structure/label_bbox.h +++ b/dali/kernels/imgproc/structure/label_bbox.h @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -245,7 +245,7 @@ void GetLabelBoundingBoxes(span> boxes, part_boxes = make_span(&tmp_boxes[(i - 1)*boxes.size()], boxes.size()); part.size[max_d] = end - start; part.data = in.data + start * stride; - engine.AddWork([=](int) { + engine.AddTask([=](int) { i64vec origin = {}; origin[dim_mapping[max_d]] = start; GetLabelBoundingBoxes(part_boxes, part, dim_mapping, background, origin); diff --git a/dali/kernels/slice/slice_cpu.h b/dali/kernels/slice/slice_cpu.h index 8fbe4c0663a..63f61f9da45 100644 --- a/dali/kernels/slice/slice_cpu.h +++ b/dali/kernels/slice/slice_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -313,7 +313,7 @@ void SliceKernel(ExecutionEngine &exec_engine, int64_t b_start = prev_b_end; int64_t b_end = prev_b_end = total_sz * (b + 1) / nblocks; int64_t b_nbytes = (b_end - b_start) * sizeof(OutputType); - exec_engine.AddWork([=](int tid) { + exec_engine.AddTask([=](int tid) { std::memcpy(out_data + b_start, in_data + b_start, b_nbytes); }, b_nbytes, false); // do not start work immediately } @@ -325,7 +325,7 @@ void SliceKernel(ExecutionEngine &exec_engine, int nblocks = split_shape(split_factor, out_shape, req_nblocks, min_blk_sz, skip_dim_mask); if (nblocks == 1) { - exec_engine.AddWork([=](int) { + exec_engine.AddTask([=](int) { SliceKernel(out_data, in_data, out_strides, in_strides, out_shape, in_shape, args.anchor, GetPtr(args.fill_values), args.channel_dim); }, kSliceCost * volume(out_shape), false); // do not start work immediately @@ -345,7 +345,7 @@ void SliceKernel(ExecutionEngine &exec_engine, blk_shape[d] = blk_end[d] - blk_start[d]; blk_anchor[d] = args.anchor[d] + blk_start[d]; } - exec_engine.AddWork([=](int) { + exec_engine.AddTask([=](int) { SliceKernel(output_ptr, in_data, out_strides, in_strides, blk_shape, in_shape, blk_anchor, GetPtr(args.fill_values), args.channel_dim); }, kSliceCost * volume(blk_shape), false); // do not start work immediately diff --git a/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h b/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h index 8580a62c0c0..b928bd1670c 100644 --- a/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h +++ b/dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -260,7 +260,7 @@ void SliceFlipNormalizePermutePadKernel( req_nblocks > 0 ? req_nblocks : exec_engine.NumThreads() * 8, min_blk_sz, skip_dim_mask); if (nblocks == 1) { - exec_engine.AddWork([=](int) { + exec_engine.AddTask([=](int) { SliceFlipNormalizePermutePadKernel(output, input, args.in_strides, args.out_strides, args.anchor, args.in_shape, args.out_shape, GetPtr(fill_values), @@ -288,7 +288,7 @@ void SliceFlipNormalizePermutePadKernel( blk_anchor[d] = args.anchor[d] + blk_start[d]; } - exec_engine.AddWork([=](int) { + exec_engine.AddTask([=](int) { SliceFlipNormalizePermutePadKernel(output_ptr, input_ptr, args.in_strides, args.out_strides, blk_anchor, args.in_shape, blk_shape, GetPtr(fill_values), diff --git a/dali/operators/audio/mel_scale/mel_filter_bank.cc b/dali/operators/audio/mel_scale/mel_filter_bank.cc index 7ddfca4bf84..24497658144 100644 --- a/dali/operators/audio/mel_scale/mel_filter_bank.cc +++ b/dali/operators/audio/mel_scale/mel_filter_bank.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -101,7 +101,7 @@ void MelFilterBank::RunImpl(Workspace &ws) { TYPE_SWITCH(input.type(), type2id, T, MEL_FBANK_SUPPORTED_TYPES, ( using MelFilterBankKernel = kernels::audio::MelFilterBankCpu; for (int i = 0; i < input.shape().num_samples(); i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { auto in_view = view(input[i]); auto out_view = view(output[i]); diff --git a/dali/operators/audio/mfcc/mfcc.cc b/dali/operators/audio/mfcc/mfcc.cc index 705fd362b7c..67e0cd7c528 100644 --- a/dali/operators/audio/mfcc/mfcc.cc +++ b/dali/operators/audio/mfcc/mfcc.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -160,7 +160,7 @@ void MFCC::RunImpl(Workspace &ws) { VALUE_SWITCH(in_shape.sample_dim(), Dims, MFCC_SUPPORTED_NDIMS, ( using DctKernel = kernels::signal::dct::Dct1DCpu; for (int i = 0; i < input.shape().num_samples(); i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { kernels::KernelContext ctx; auto in_view = view(input[i]); diff --git a/dali/operators/audio/nonsilence_op.cc b/dali/operators/audio/nonsilence_op.cc index d7a922115a5..2c191d46f11 100644 --- a/dali/operators/audio/nonsilence_op.cc +++ b/dali/operators/audio/nonsilence_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ class NonsilenceOperatorCpu : public NonsilenceOperator { auto &tp = ws.GetThreadPool(); auto in_shape = input.shape(); for (int sample_id = 0; sample_id < curr_batch_size; sample_id++) { - tp.AddWork( + tp.AddTask( [&, sample_id](int thread_id) { detail::Args args; args.input = view(input[sample_id]); diff --git a/dali/operators/audio/preemphasis_filter_op.cc b/dali/operators/audio/preemphasis_filter_op.cc index 335414a30b1..b6db6dfba0f 100644 --- a/dali/operators/audio/preemphasis_filter_op.cc +++ b/dali/operators/audio/preemphasis_filter_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -63,7 +63,7 @@ void PreemphasisFilterCPU::RunImplTyped(Workspace &ws) { auto nsamples = shape.num_samples(); for (int sample_id = 0; sample_id < nsamples; sample_id++) { - tp.AddWork( + tp.AddTask( [this, &output, &input, sample_id](int thread_id) { const auto *in_ptr = input.tensor(sample_id); auto *out_ptr = output.mutable_tensor(sample_id); diff --git a/dali/operators/audio/resample.cc b/dali/operators/audio/resample.cc index f52220bbfd6..8766c5e9fca 100644 --- a/dali/operators/audio/resample.cc +++ b/dali/operators/audio/resample.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -136,7 +136,7 @@ class ResampleCPU : public ResampleBase { auto &tp = ws.GetThreadPool(); in_fp32.resize(tp.NumThreads()); for (int s = 0; s < N; s++) { - tp.AddWork([&, this, s](int thread_idx) { + tp.AddTask([&, this, s](int thread_idx) { InTensorCPU in_view; TYPE_SWITCH(in.type(), type2id, T, (AUDIO_RESAMPLE_TYPES), (in_view = ConvertInput(in_fp32[thread_idx], view(in[s]));), diff --git a/dali/operators/bbox/bb_flip.cc b/dali/operators/bbox/bb_flip.cc index 2f0f1b41483..7c31b8a445d 100644 --- a/dali/operators/bbox/bb_flip.cc +++ b/dali/operators/bbox/bb_flip.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -50,7 +50,7 @@ void BbFlipCPU::RunImpl(Workspace &ws) { TensorLayout layout = ltrb_ ? "xyXY" : "xyWH"; for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { - tp.AddWork( + tp.AddTask( [&, sample_idx](int thread_id) { bool vertical = vert_[sample_idx].data[0]; bool horizontal = horz_[sample_idx].data[0]; diff --git a/dali/operators/decoder/audio/audio_decoder_op.cc b/dali/operators/decoder/audio/audio_decoder_op.cc index 0bfb649bab3..8ace2929817 100644 --- a/dali/operators/decoder/audio/audio_decoder_op.cc +++ b/dali/operators/decoder/audio/audio_decoder_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -148,7 +148,7 @@ void AudioDecoderCpu::DecodeBatch(Workspace &ws) { scratch_resampler_.resize(tp.NumThreads()); for (int i = 0; i < batch_size; i++) { - tp.AddWork([&, i](int thread_id) { + tp.AddTask([&, i](int thread_id) { try { DecodeSample(decoded_output[i], thread_id, i); sample_rate_output[i].data[0] = use_resampling_ diff --git a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h index a48d1ef2619..6cab9db4bfe 100644 --- a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h +++ b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -242,7 +242,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { #if NVJPEG2K_ENABLED auto nvjpeg2k_thread_id = nvjpeg2k_thread_.GetThreadIds()[0]; - nvjpeg2k_thread_.AddWork([this, device_memory_padding_jpeg2k, host_memory_padding_jpeg2k, + nvjpeg2k_thread_.AddTask([this, device_memory_padding_jpeg2k, host_memory_padding_jpeg2k, nvjpeg2k_thread_id](int) { nvjpeg2k_handle_ = NvJPEG2KHandle(&nvjpeg2k_dev_alloc_, &nvjpeg2k_pin_alloc_); @@ -572,7 +572,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { auto *input_data = input.tensor(i); const auto in_size = input.tensor_shape(i).num_elements(); const auto &source_info = input.GetMeta(i).GetSourceInfo(); - thread_pool_.AddWork([this, i, input_data, in_size, source_info](int tid) { + thread_pool_.AddTask([this, i, input_data, in_size, source_info](int tid) { SampleData &data = sample_data_[i]; data.clear(); data.sample_idx = i; @@ -725,7 +725,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { auto *output_data = output.mutable_tensor(i); const auto *in_data = input.tensor(i); const auto in_size = input.tensor_shape(i).num_elements(); - thread_pool_.AddWork( + thread_pool_.AddTask( [this, sample, in_data, in_size, output_data](int tid) { SampleWorker(sample->sample_idx, sample->file_name, in_size, tid, in_data, output_data, streams_[tid]); @@ -807,7 +807,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { if (!nvjpeg2k_handle_) { return; } - nvjpeg2k_thread_.AddWork([this, &ws](int) { + nvjpeg2k_thread_.AddTask([this, &ws](int) { auto &output = ws.Output(0); const auto &input = ws.Input(0); const auto &input_shape = input.shape(); @@ -833,7 +833,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { auto in_size = input.tensor_shape(i).num_elements(); auto *output_data = output.mutable_tensor(i); ImageCache::ImageShape shape = output_shape_[i].to_static<3>(); - thread_pool_.AddWork( + thread_pool_.AddTask( [this, sample, input_data, in_size, output_data, shape](int tid) { HostFallback(input_data, in_size, output_image_type_, output_data, streams_[tid], sample->file_name, sample->roi, use_fast_idct_); diff --git a/dali/operators/decoder/video/video_decoder_cpu.cc b/dali/operators/decoder/video/video_decoder_cpu.cc index 9a745e002b3..46e7547fffc 100644 --- a/dali/operators/decoder/video/video_decoder_cpu.cc +++ b/dali/operators/decoder/video/video_decoder_cpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ bool VideoDecoderCpu::SetupImpl(std::vector &output_desc, auto sample = input[i]; auto data = reinterpret_cast(sample.data()); size_t size = sample.shape().num_elements(); - thread_pool.AddWork([this, i, data, size](int tid) { + thread_pool.AddTask([this, i, data, size](int tid) { frames_decoders_[i] = std::make_unique(data, size, false); }); } @@ -44,7 +44,7 @@ void VideoDecoderCpu::RunImpl(Workspace &ws) { int batch_size = input.num_samples(); auto &thread_pool = ws.GetThreadPool(); for (int s = 0; s < batch_size; ++s) { - thread_pool.AddWork([this, s, &output](int tid) { + thread_pool.AddTask([this, s, &output](int tid) { DecodeSample(output[s], s); }, volume(input[s].shape())); } diff --git a/dali/operators/decoder/video/video_decoder_mixed.cc b/dali/operators/decoder/video/video_decoder_mixed.cc index 62f1ec1da0e..26c3b6c7fa8 100644 --- a/dali/operators/decoder/video/video_decoder_mixed.cc +++ b/dali/operators/decoder/video/video_decoder_mixed.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ bool VideoDecoderMixed::SetupImpl( auto stream = ws.stream(); frames_decoders_.resize(batch_size); for (int i = 0; i < batch_size; ++i) { - thread_pool_.AddWork([this, i, &input, stream](int) { + thread_pool_.AddTask([this, i, &input, stream](int) { auto sample = input[i]; auto data = reinterpret_cast(sample.data()); size_t size = sample.shape().num_elements(); @@ -43,7 +43,7 @@ void VideoDecoderMixed::Run(Workspace &ws) { const auto &input = ws.Input(0); int batch_size = input.num_samples(); for (int s = 0; s < batch_size; ++s) { - thread_pool_.AddWork([this, s, &output](int) { + thread_pool_.AddTask([this, s, &output](int) { DecodeSample(output[s], s); // when the decoding is done release the decoder, // so it can be reused by the next sample in the batch diff --git a/dali/operators/generic/cast.cc b/dali/operators/generic/cast.cc index 68aafe4670f..f426245e013 100644 --- a/dali/operators/generic/cast.cc +++ b/dali/operators/generic/cast.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ void CastCPU::RunImpl(Workspace &ws) { auto *out = output.mutable_tensor(sample_id); const auto *in = input.tensor(sample_id); auto size = input_shape.tensor_size(sample_id); - tp.AddWork([out, in, size](int thread_id) { CpuHelper(out, in, size); }, + tp.AddTask([out, in, size](int thread_id) { CpuHelper(out, in, size); }, size); } diff --git a/dali/operators/generic/erase/erase.cc b/dali/operators/generic/erase/erase.cc index 9d2af190616..e68922e716e 100644 --- a/dali/operators/generic/erase/erase.cc +++ b/dali/operators/generic/erase/erase.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -209,7 +209,7 @@ void EraseImplCpu::RunImpl(Workspace &ws) { auto& thread_pool = ws.GetThreadPool(); auto in_shape = input.shape(); for (int i = 0; i < nsamples; i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { kernels::KernelContext ctx; auto in_view = view(input[i]); diff --git a/dali/operators/generic/join.cc b/dali/operators/generic/join.cc index 1fb241e8809..e042f63fdc9 100644 --- a/dali/operators/generic/join.cc +++ b/dali/operators/generic/join.cc @@ -233,7 +233,7 @@ void TensorJoin::RunTyped( in_shapes.resize(num_threads * njoin); for (int i = 0; i < N; i++) { - tp.AddWork([&, i](int tid) { + tp.AddTask([&, i](int tid) { kernels::KernelContext ctx; auto sample_in_tensors = make_span(&in_tensors[tid * njoin], njoin); auto sample_in_shapes = make_span(&in_shapes[tid * njoin], njoin); diff --git a/dali/operators/generic/lookup_table.cc b/dali/operators/generic/lookup_table.cc index 20c4272f7cd..fa641d81552 100644 --- a/dali/operators/generic/lookup_table.cc +++ b/dali/operators/generic/lookup_table.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ void LookupValuesImpl(ThreadPool &tp, TensorList &output, auto data_size = shape.tensor_size(sample_idx); auto *out_data = output.mutable_tensor(sample_idx); const auto *in_data = input.tensor(sample_idx); - tp.AddWork( + tp.AddTask( [=](int thread_id) { for (int64_t i = 0; i < data_size; i++) { DoLookup(out_data[i], in_data[i], lookup_table, default_value); diff --git a/dali/operators/generic/one_hot.cc b/dali/operators/generic/one_hot.cc index bf1ce3bd0d4..056613ad746 100644 --- a/dali/operators/generic/one_hot.cc +++ b/dali/operators/generic/one_hot.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -88,7 +88,7 @@ void OneHotCPU::RunImpl(Workspace &ws) { auto in_tensor = view(input); auto out_tensor = view(output); for (int sample_id = 0; sample_id < num_samples; ++sample_id) { - tp.AddWork( + tp.AddTask( [&, sample_id](int thread_id) { auto in = in_tensor[sample_id]; auto out = out_tensor[sample_id]; diff --git a/dali/operators/generic/pad.cc b/dali/operators/generic/pad.cc index 37c659ef505..06444a9a268 100644 --- a/dali/operators/generic/pad.cc +++ b/dali/operators/generic/pad.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -235,7 +235,7 @@ void Pad::RunImpl(Workspace &ws) { using Args = kernels::SliceArgs; for (int i = 0; i < nsamples; i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { kernels::KernelContext ctx; auto in_view = view(input[i]); diff --git a/dali/operators/generic/permute_batch.cc b/dali/operators/generic/permute_batch.cc index 23943394b19..ea271e14678 100644 --- a/dali/operators/generic/permute_batch.cc +++ b/dali/operators/generic/permute_batch.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ void PermuteBatch::RunImpl(Workspace &ws) { for (int i = 0; i < N; i++) { auto size = output_shape.tensor_size(i); int src = indices_[i]; - tp.AddWork([&, i, src](int tid) { + tp.AddTask([&, i, src](int tid) { output.SetMeta(i, input.GetMeta(i)); // TODO(klecki): SetSample output.CopySample(i, input, src); diff --git a/dali/operators/generic/reduce/reduce.h b/dali/operators/generic/reduce/reduce.h index a229117e56f..8cbe460d21c 100644 --- a/dali/operators/generic/reduce/reduce.h +++ b/dali/operators/generic/reduce/reduce.h @@ -90,7 +90,7 @@ class Reduce : public Operator, AxesHelper { for (int sample = 0; sample < in_view.num_samples(); sample++) { int64_t priority = volume(in_view.shape.tensor_shape_span(sample)); - thread_pool.AddWork( + thread_pool.AddTask( [&, sample](int thread_id) { auto in_sample_view = in_view[sample]; auto out_sample_view = out_view[sample]; diff --git a/dali/operators/generic/reduce/reduce_with_mean_input.h b/dali/operators/generic/reduce/reduce_with_mean_input.h index bfecfab3285..0823d4fcfc9 100644 --- a/dali/operators/generic/reduce/reduce_with_mean_input.h +++ b/dali/operators/generic/reduce/reduce_with_mean_input.h @@ -103,7 +103,7 @@ class ReduceWithMeanInput : public Operator, AxesHelper { for (int sample = 0; sample < in_view.num_samples(); sample++) { int64_t priority = volume(in_view.shape.tensor_shape_span(sample)); - thread_pool.AddWork( + thread_pool.AddTask( [&, sample](int thread_id) { auto in_sample_view = in_view[sample]; auto mean_sample_view = mean_view[sample]; diff --git a/dali/operators/generic/transpose/transpose.cc b/dali/operators/generic/transpose/transpose.cc index 1ef632c406f..6df3a15f0b2 100644 --- a/dali/operators/generic/transpose/transpose.cc +++ b/dali/operators/generic/transpose/transpose.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ class TransposeCPU : public Transpose { TYPE_SWITCH(input_type, type2id, T, TRANSPOSE_ALLOWED_TYPES, ( for (int i = 0; i < nsamples; i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { TensorShape<> src_ts = input.shape()[i]; diff --git a/dali/operators/geometry/coord_flip.cc b/dali/operators/geometry/coord_flip.cc index 09244c60960..7143c403b07 100644 --- a/dali/operators/geometry/coord_flip.cc +++ b/dali/operators/geometry/coord_flip.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ void CoordFlipCPU::RunImpl(Workspace &ws) { mirrored_origin[z_dim_] = 2.0f * spec_.GetArgument("center_z", &ws, sample_id); auto in_size = volume(input.tensor_shape(sample_id)); - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, in_size, &output, sample_id, flip_dim, mirrored_origin](int thread_id) { const auto *in = input.tensor(sample_id); auto *out = output.mutable_tensor(sample_id); diff --git a/dali/operators/geometry/coord_transform.cc b/dali/operators/geometry/coord_transform.cc index dc43012d73e..f3972fe0acd 100644 --- a/dali/operators/geometry/coord_transform.cc +++ b/dali/operators/geometry/coord_transform.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -63,7 +63,7 @@ void CoordTransform::RunTyped(Workspace &ws) { auto T = GetTranslations(); for (int idx = 0; idx < in_view.num_samples(); idx++) { - tp.AddWork([&, idx](int tid) { + tp.AddTask([&, idx](int tid) { kernels::KernelContext ctx; auto in_tensor = in_view[idx]; auto out_tensor = out_view[idx]; diff --git a/dali/operators/image/color/brightness_contrast.cc b/dali/operators/image/color/brightness_contrast.cc index 5e2f562b31c..90ecacceae6 100644 --- a/dali/operators/image/color/brightness_contrast.cc +++ b/dali/operators/image/color/brightness_contrast.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -118,7 +118,7 @@ void BrightnessContrastCpu::RunImplHelper(Workspace &ws) { sequence_utils::unfolded_views_range(out_view[sample_id], in_view[sample_id]); const auto &in_range = planes_range.template get<1>(); for (auto &&views : planes_range) { - tp.AddWork([&, views, add, mul](int thread_id) { + tp.AddTask([&, views, add, mul](int thread_id) { kernels::KernelContext ctx; auto &[tvout, tvin] = views; kernel_manager_.Run(0, ctx, tvout, tvin, add, mul); diff --git a/dali/operators/image/color/color_space_conversion.cc b/dali/operators/image/color/color_space_conversion.cc index 631a268c64e..0c0eaac7294 100644 --- a/dali/operators/image/color/color_space_conversion.cc +++ b/dali/operators/image/color/color_space_conversion.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ void ColorSpaceConversion::RunImpl(Workspace &ws) { int ndim = in_sh.sample_dim(); auto& thread_pool = ws.GetThreadPool(); for (int i = 0; i < nsamples; i++) { - thread_pool.AddWork( + thread_pool.AddTask( [&, i](int thread_id) { auto in_sample_sh = in_sh.tensor_shape_span(i); // flatten any leading dimensions together with the height diff --git a/dali/operators/image/color/color_twist.cc b/dali/operators/image/color/color_twist.cc index f2c8ad20608..c8a69133357 100644 --- a/dali/operators/image/color/color_twist.cc +++ b/dali/operators/image/color/color_twist.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -155,7 +155,7 @@ void ColorTwistCpu::RunImplHelper(Workspace &ws) { auto planes_range = sequence_utils::unfolded_views_range(out_view[i], in_view[i]); const auto &in_range = planes_range.template get<1>(); for (auto &&views : planes_range) { - tp.AddWork( + tp.AddTask( [&, i, views](int thread_id) { kernels::KernelContext ctx; auto &[tvout, tvin] = views; diff --git a/dali/operators/image/convolution/gaussian_blur.cc b/dali/operators/image/convolution/gaussian_blur.cc index 0a7faa0ca24..bf603aa62ea 100644 --- a/dali/operators/image/convolution/gaussian_blur.cc +++ b/dali/operators/image/convolution/gaussian_blur.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -117,7 +117,7 @@ class GaussianBlurOpCpu : public OpImplBase { int nsamples = input.num_samples(); for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { auto elem_volume = volume(input.tensor_shape(sample_idx)); - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, sample_idx](int thread_id) { auto gaussian_windows = windows_[sample_idx].GetWindows(); const auto &shape = input.tensor_shape(sample_idx); diff --git a/dali/operators/image/convolution/laplacian.cc b/dali/operators/image/convolution/laplacian.cc index 333dea5b2d2..c20a41b04eb 100644 --- a/dali/operators/image/convolution/laplacian.cc +++ b/dali/operators/image/convolution/laplacian.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -154,7 +154,7 @@ class LaplacianOpCpu : public OpImplBase { for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { auto priority = volume(input.tensor_shape(sample_idx)) * args.GetTotalWindowSizes(sample_idx); - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, sample_idx](int thread_id) { const auto& scales = args.GetScales(sample_idx); const auto& shape = input.tensor_shape(sample_idx); diff --git a/dali/operators/image/crop/bbox_crop.cc b/dali/operators/image/crop/bbox_crop.cc index 77caf5d4e3e..f6285f0788e 100644 --- a/dali/operators/image/crop/bbox_crop.cc +++ b/dali/operators/image/crop/bbox_crop.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -519,7 +519,7 @@ class RandomBBoxCropImpl : public OpImplBase { sample_data_.resize(num_samples); for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) { auto &data = sample_data_[sample_idx]; - tp.AddWork([&, sample_idx](int thread_id) { + tp.AddTask([&, sample_idx](int thread_id) { auto nboxes = in_boxes_view.tensor_shape_span(sample_idx)[0]; data.in_bboxes.resize(nboxes); ReadBoxes(make_span(data.in_bboxes), diff --git a/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc b/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc index 6678669f94e..42919782be3 100644 --- a/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc +++ b/dali/operators/image/distortion/jpeg_compression_distortion_op_cpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -101,7 +101,7 @@ void JpegCompressionDistortionCPU::RunImpl(Workspace &ws) { int64_t width = shape[w_dim]; int64_t height = shape[h_dim]; for (int elem_idx = 0; elem_idx < nframes; elem_idx++) { - thread_pool.AddWork( + thread_pool.AddTask( [&, sample_idx, elem_idx, width, height, frame_size, quality = quality_arg_[sample_idx].data[0]](int thread_id) { auto *in = in_view[sample_idx].data + elem_idx * frame_size; diff --git a/dali/operators/image/mask/grid_mask.cc b/dali/operators/image/mask/grid_mask.cc index 37aa88f48e0..0d39d66331f 100644 --- a/dali/operators/image/mask/grid_mask.cc +++ b/dali/operators/image/mask/grid_mask.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -75,7 +75,7 @@ void GridMaskCpu::RunImpl(Workspace &ws) { auto in_view = view(input); auto out_view = view(output); for (int sid = 0; sid < input.shape().num_samples(); sid++) { - tp.AddWork([&, sid](int) { + tp.AddTask([&, sid](int) { kernels::KernelContext ctx; kernel_manager_.Run(sid, ctx, out_view[sid], in_view[sid], tile_[sid], ratio_[sid], angle_[sid], shift_x_[sid], shift_y_[sid]); diff --git a/dali/operators/image/paste/multipaste.cc b/dali/operators/image/paste/multipaste.cc index 65199564e1c..21108463872 100644 --- a/dali/operators/image/paste/multipaste.cc +++ b/dali/operators/image/paste/multipaste.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -90,7 +90,7 @@ void MultiPasteCPU::RunTyped(Workspace &ws) { int from_sample = in_idx_[i].data[iter]; int to_sample = i; - tp.AddWork( + tp.AddTask( [&, i, iter, from_sample, to_sample, in_view, out_view](int thread_id) { kernels::KernelContext ctx; auto tvin = in_view[from_sample]; @@ -108,7 +108,7 @@ void MultiPasteCPU::RunTyped(Workspace &ws) { out_shape.tensor_size(to_sample)); } } else { - tp.AddWork( + tp.AddTask( [&, i, paste_count, in_view, out_view](int thread_id) { for (int iter = 0; iter < paste_count; iter++) { int from_sample = in_idx_[i].data[iter]; diff --git a/dali/operators/image/peek_shape/peek_image_shape.h b/dali/operators/image/peek_shape/peek_image_shape.h index 7a14178538e..3ad55184431 100644 --- a/dali/operators/image/peek_shape/peek_image_shape.h +++ b/dali/operators/image/peek_shape/peek_image_shape.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ class PeekImageShape : public Operator { DALI_ENFORCE(input.sample_dim() == 1, "Input must be 1D encoded JPEG bit stream."); for (size_t sample_id = 0; sample_id < batch_size; ++sample_id) { - thread_pool.AddWork([sample_id, &input, &output, this] (int tid) { + thread_pool.AddTask([sample_id, &input, &output, this] (int tid) { const auto& image = input[sample_id]; auto img = ImageFactory::CreateImage(image.data(), image.shape().num_elements(), {}); diff --git a/dali/operators/image/remap/displacement_filter_impl_cpu.h b/dali/operators/image/remap/displacement_filter_impl_cpu.h index a3c74a0fe2f..f70dc1346cd 100644 --- a/dali/operators/image/remap/displacement_filter_impl_cpu.h +++ b/dali/operators/image/remap/displacement_filter_impl_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -168,7 +168,7 @@ class DisplacementFilter auto &tp = ws.GetThreadPool(); for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) { - tp.AddWork([&, sample_idx](int thread_idx) { RunSample(ws, sample_idx, thread_idx); }, + tp.AddTask([&, sample_idx](int thread_idx) { RunSample(ws, sample_idx, thread_idx); }, shape.tensor_size(sample_idx)); } diff --git a/dali/operators/image/remap/warp.h b/dali/operators/image/remap/warp.h index 01ef36100d9..317236f3ac4 100644 --- a/dali/operators/image/remap/warp.h +++ b/dali/operators/image/remap/warp.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -167,7 +167,7 @@ class WarpOpImpl : public OpImplInterface { auto interp_types = param_provider_->InterpTypes(); for (int i = 0; i < input_.num_samples(); i++) { - pool.AddWork([&, i](int tid) { + pool.AddTask([&, i](int tid) { DALIInterpType interp_type = interp_types.size() > 1 ? interp_types[i] : interp_types[0]; auto context = GetContext(ws); kmgr_.Run( diff --git a/dali/operators/image/resize/resize_op_impl_cpu.h b/dali/operators/image/resize/resize_op_impl_cpu.h index cc6d90b8697..1e7c688b354 100644 --- a/dali/operators/image/resize/resize_op_impl_cpu.h +++ b/dali/operators/image/resize/resize_op_impl_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ class ResizeOpImplCPU : public ResizeBase::Impl { // NOTE: This does not account for cost of antialiasing! cost += std::pow(std::pow(out_size, spatial_ndim - i) * pow(in_size, i), root); } - tp.AddWork(work, std::llround(cost)); + tp.AddTask(work, std::llround(cost)); } tp.RunAll(); } diff --git a/dali/operators/imgcodec/decoder.h b/dali/operators/imgcodec/decoder.h index 9bc6c444be1..7255fa771e7 100644 --- a/dali/operators/imgcodec/decoder.h +++ b/dali/operators/imgcodec/decoder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -100,7 +100,7 @@ class DecoderBase : public Operator { auto *decoder = GetDecoderInstance(); for (int i = 0; i < shapes.size(); i++) { - tp.AddWork([i, decoder, &input, &shapes, &ws, &spec, this] (int tid) { + tp.AddTask([i, decoder, &input, &shapes, &ws, &spec, this] (int tid) { srcs_[i] = SampleAsImageSource(input[i], input.GetMeta(i).GetSourceInfo()); src_ptrs_[i] = &srcs_[i]; auto info = decoder->GetInfo(src_ptrs_[i]); diff --git a/dali/operators/imgcodec/peek_image_shape.cc b/dali/operators/imgcodec/peek_image_shape.cc index dbf6c7291c5..b5373014f8c 100644 --- a/dali/operators/imgcodec/peek_image_shape.cc +++ b/dali/operators/imgcodec/peek_image_shape.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ void ImgcodecPeekImageShape::RunImpl(Workspace &ws) { "The input must be a raw, undecoded file stored as a flat uint8 array."); for (int i = 0; i < input.num_samples(); i++) { - thread_pool.AddWork([i, &input, &output, this] (int tid) { + thread_pool.AddTask([i, &input, &output, this] (int tid) { auto src = SampleAsImageSource(input[i], input.GetMeta(i).GetSourceInfo()); auto *format = ImageFormatRegistry::instance().GetImageFormat(&src); DALI_ENFORCE(format, make_string("Cannot parse the image: ", src.SourceInfo())); diff --git a/dali/operators/math/expressions/arithmetic.cc b/dali/operators/math/expressions/arithmetic.cc index 48124d3954c..dc870096216 100644 --- a/dali/operators/math/expressions/arithmetic.cc +++ b/dali/operators/math/expressions/arithmetic.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ void ArithmeticGenericOp::RunImpl(Workspace &ws) { int batch_size = ws.GetInputBatchSize(0); for (size_t task_idx = 0; task_idx < tile_range_.size(); task_idx++) { - pool.AddWork( + pool.AddTask( [=](int thread_idx) { auto range = tile_range_[task_idx]; // Go over "tiles" diff --git a/dali/operators/math/normalize/normalize.cc b/dali/operators/math/normalize/normalize.cc index f496a7e3331..0e1ecac311d 100644 --- a/dali/operators/math/normalize/normalize.cc +++ b/dali/operators/math/normalize/normalize.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -320,7 +320,7 @@ void Normalize::RunTyped(Workspace &ws) { if (batch_norm_) { if (ShouldCalcMean()) { for (int i = 0; i < nsamples; i++) { - tp.AddWork([&, i](int thread_idx) { + tp.AddTask([&, i](int thread_idx) { kernels::MeanCPU mean; mean.Setup(mutable_mean[i], in_view[i], make_span(axes_)); // Reset per-sample values, but don't postprocess @@ -335,7 +335,7 @@ void Normalize::RunTyped(Workspace &ws) { if (ShouldCalcStdDev()) { auto sample_mean = mean_view[0]; for (int i = 0; i < nsamples; i++) { - tp.AddWork([&, i](int thread_idx) { + tp.AddTask([&, i](int thread_idx) { kernels::VarianceCPU stddev; stddev.Setup(mutable_stddev[i], in_view[i], make_span(axes_), sample_mean); // Reset per-sample values, but don't postprocess @@ -353,7 +353,7 @@ void Normalize::RunTyped(Workspace &ws) { for (int i = 0; i < nsamples; i++) { - tp.AddWork([&, i](int thread_idx) { + tp.AddTask([&, i](int thread_idx) { auto sample_mean = mean_view.num_samples() == 1 || batch_norm_ ? mean_view[0] : mean_view[i]; diff --git a/dali/operators/numba_function/numba_func.cc b/dali/operators/numba_function/numba_func.cc index bb187e69f39..c9f26037047 100644 --- a/dali/operators/numba_function/numba_func.cc +++ b/dali/operators/numba_function/numba_func.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -293,7 +293,7 @@ void NumbaFuncImpl::RunImpl(Workspace &ws) { auto out_shape = out.shape(); auto &tp = ws.GetThreadPool(); for (int sample_id = 0; sample_id < N; sample_id++) { - tp.AddWork([&, sample_id](int thread_id) { + tp.AddTask([&, sample_id](int thread_id) { SmallVector out_ptrs_per_sample; SmallVector out_shapes_per_sample; auto& out_shapes_ptrs = setup_fn_ ? output_shape_ptrs_ : input_shape_ptrs_; diff --git a/dali/operators/python_function/dltensor_function.cc b/dali/operators/python_function/dltensor_function.cc index f042a2cd0c6..cf7fe7c5ee9 100644 --- a/dali/operators/python_function/dltensor_function.cc +++ b/dali/operators/python_function/dltensor_function.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -147,7 +147,7 @@ void CopyOutputData(TensorList &output, std::vector &d auto &thread_pool = workspace.GetThreadPool(); auto out_shape = output.shape(); for (int i = 0; i < batch_size; ++i) { - thread_pool.AddWork([&, i](int) { + thread_pool.AddTask([&, i](int) { CopyDlTensor(output.raw_mutable_tensor(i), dl_tensors[i]); }, out_shape.tensor_size(i)); } diff --git a/dali/operators/random/rng_base_cpu.h b/dali/operators/random/rng_base_cpu.h index a66a887e139..9f01cc7ae80 100644 --- a/dali/operators/random/rng_base_cpu.h +++ b/dali/operators/random/rng_base_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -162,7 +162,7 @@ void RNGBase::RunImplTyped(Workspace &ws, CPUBackend) } if (total_p_count < kThreshold) { - tp.AddWork( + tp.AddTask( [=](int thread_id) { auto dist = use_default_dist ? Dist() : dists[sample_id]; if (independent_channels) { @@ -180,7 +180,7 @@ void RNGBase::RunImplTyped(Workspace &ws, CPUBackend) std::tie(p_offset, p_count) = get_chunk(total_p_count, c, chunks); for (auto &s : seed) s = rng_[sample_id](); - tp.AddWork( + tp.AddTask( [=](int thread_id) { std::seed_seq seq(seed.begin(), seed.end()); std::mt19937_64 chunk_rng(seq); diff --git a/dali/operators/reader/nemo_asr_reader_op.cc b/dali/operators/reader/nemo_asr_reader_op.cc index db50fcbfe70..b5249154514 100755 --- a/dali/operators/reader/nemo_asr_reader_op.cc +++ b/dali/operators/reader/nemo_asr_reader_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -174,7 +174,7 @@ void NemoAsrReader::Prefetch() { const auto &audio_meta = sample.audio_meta(); int64_t priority = audio_meta.length * audio_meta.channels; - thread_pool_.AddWork( + thread_pool_.AddTask( [audio, &sample](int tid) { sample.decode_audio(audio, tid); }, priority); diff --git a/dali/operators/reader/numpy_reader_gpu_op.cc b/dali/operators/reader/numpy_reader_gpu_op.cc index 3bc9b5cc31f..f5e208472d5 100644 --- a/dali/operators/reader/numpy_reader_gpu_op.cc +++ b/dali/operators/reader/numpy_reader_gpu_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -62,7 +62,7 @@ void NumpyReaderGPU::Prefetch() { for (size_t data_idx = 0; data_idx < curr_batch.size(); ++data_idx) { // when padding, the last sample is duplicated so no need to redo the same work if (data_idx > 0 && curr_batch[data_idx -1 ] == curr_batch[data_idx]) continue; - thread_pool_.AddWork([this, &curr_batch, data_idx](int tid) { + thread_pool_.AddTask([this, &curr_batch, data_idx](int tid) { curr_batch[data_idx]->Reopen(); curr_batch[data_idx]->ReadHeader(header_cache_); }); @@ -140,7 +140,7 @@ void NumpyReaderGPU::ScheduleChunkedRead(SampleView &out_sample, ssize_t copy_skip = copy_start - file_offset; ssize_t copy_end = file_offset + chunk_read_length; ssize_t chunk_copy_length = copy_end - copy_start; - thread_pool_.AddWork([=, &load_target](int tid) { + thread_pool_.AddTask([=, &load_target](int tid) { assert(chunk_read_length <= static_cast(staging_.chunk_size())); auto buffer = staging_.get_staging_buffer(); load_target.ReadRawChunk(buffer.at(0), chunk_read_length, 0, file_offset); diff --git a/dali/operators/reader/numpy_reader_op.cc b/dali/operators/reader/numpy_reader_op.cc index 3d863c40b53..7b976e8d2bd 100644 --- a/dali/operators/reader/numpy_reader_op.cc +++ b/dali/operators/reader/numpy_reader_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ static void CopyHelper(SampleView output, ConstSampleView output, ConstSampleView::FindLabels(const InTensorCPU for (int i = 0; i < num_chunks; i++) { int64_t start = N * i / num_chunks; int64_t end = N * (i + 1) / num_chunks; - thread_pool->AddWork([=](int) { + thread_pool->AddTask([=](int) { auto &lbl = tmp_labels[i]; lbl.clear(); detail::FindLabels(lbl, data + start, end - start); @@ -227,7 +227,7 @@ void FilterByLabel(ThreadPool *tp, auto *out_start = out.data + start; auto *in_start = in.data + start; int64_t n = end - start; - tp->AddWork([=](int) { + tp->AddTask([=](int) { FilterByLabel(out_start, in_start, n, label); }); } diff --git a/dali/operators/sequence/sequence_rearrange.cc b/dali/operators/sequence/sequence_rearrange.cc index 7ea28044160..1b71f28d15a 100644 --- a/dali/operators/sequence/sequence_rearrange.cc +++ b/dali/operators/sequence/sequence_rearrange.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,7 +77,7 @@ void SequenceRearrange::RunImpl(Workspace &ws) { auto curr_batch_size = ws.GetInputBatchSize(0); for (int sample_idx = 0; sample_idx < curr_batch_size; ++sample_idx) { - thread_pool.AddWork([this, &ws, &input, &output, sample_idx](int tid) { + thread_pool.AddTask([this, &ws, &input, &output, sample_idx](int tid) { const TypeInfo &type = input.type_info(); const auto *in_sample = reinterpret_cast(input.raw_tensor(sample_idx)); auto *out_sample = reinterpret_cast(output.raw_mutable_tensor(sample_idx)); diff --git a/dali/operators/signal/decibel/to_decibels_op_cpu.cc b/dali/operators/signal/decibel/to_decibels_op_cpu.cc index a28e05e4be1..e45a346824f 100644 --- a/dali/operators/signal/decibel/to_decibels_op_cpu.cc +++ b/dali/operators/signal/decibel/to_decibels_op_cpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -81,7 +81,7 @@ void ToDecibels::RunImpl(Workspace &ws) { TYPE_SWITCH(input.type(), type2id, T, (float), ( using ToDbKernel = kernels::signal::ToDecibelsCpu; for (int i = 0; i < input.shape().num_samples(); i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { kernels::KernelContext ctx; auto in_view = view(input[i]); diff --git a/dali/operators/signal/fft/power_spectrum.cc b/dali/operators/signal/fft/power_spectrum.cc index fd5c7281b83..87e20a31eab 100644 --- a/dali/operators/signal/fft/power_spectrum.cc +++ b/dali/operators/signal/fft/power_spectrum.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -95,7 +95,7 @@ void PowerSpectrum::RunImpl(Workspace &ws) { using FftKernel = kernels::signal::fft::Fft1DCpu; for (int i = 0; i < input.shape().num_samples(); i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, i](int thread_id) { kernels::KernelContext ctx; auto in_view = view(input[i]); diff --git a/dali/operators/signal/fft/spectrogram.cc b/dali/operators/signal/fft/spectrogram.cc index 157154f6d54..2f08b7d19c3 100644 --- a/dali/operators/signal/fft/spectrogram.cc +++ b/dali/operators/signal/fft/spectrogram.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -261,7 +261,7 @@ void SpectrogramImplCpu::RunImpl(Workspace &ws) { output.SetLayout(layout_); for (int i = 0; i < nsamples; i++) { - thread_pool.AddWork( + thread_pool.AddTask( [this, &input, &output, view_window_fn, i](int thread_id) { kernels::KernelContext ctx; diff --git a/dali/pipeline/operator/builtin/conditional/merge.cc b/dali/pipeline/operator/builtin/conditional/merge.cc index 94539aa9ff0..a2ae8689393 100644 --- a/dali/pipeline/operator/builtin/conditional/merge.cc +++ b/dali/pipeline/operator/builtin/conditional/merge.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -145,7 +145,7 @@ void Merge::CopySampleToOutput(TensorList &output, int o const TensorList &input, int input_sample_idx, Workspace &ws) { auto &tp = ws.GetThreadPool(); - tp.AddWork( + tp.AddTask( [&output, &input, output_sample_idx, input_sample_idx](int thread_idx) { output.ResizeSample(output_sample_idx, input.shape()[input_sample_idx]); output.CopySample(output_sample_idx, input, input_sample_idx, output.order()); diff --git a/dali/pipeline/operator/builtin/input_operator.cc b/dali/pipeline/operator/builtin/input_operator.cc index 1e7c9167d37..c3b65abce60 100644 --- a/dali/pipeline/operator/builtin/input_operator.cc +++ b/dali/pipeline/operator/builtin/input_operator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ void InputOperator::ForwardCurrentData(TensorList &targe target.SetLayout(tensor_list_elm.front()->GetLayout()); for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) { - thread_pool.AddWork( + thread_pool.AddTask( [&target, sample_id, &tensor_list_elm](int tid) { target.CopySample(sample_id, *tensor_list_elm.front(), sample_id, AccessOrder::host()); @@ -168,7 +168,7 @@ void InputOperator::ForwardCurrentData(TensorList &tar target.SetLayout(tensor_list_elm.front()->GetLayout()); for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) { - thread_pool.AddWork( + thread_pool.AddTask( [&target, sample_id, &tensor_list_elm](int tid) { target.CopySample(sample_id, *tensor_list_elm.front(), sample_id, AccessOrder::host()); diff --git a/dali/pipeline/operator/builtin/make_contiguous.cc b/dali/pipeline/operator/builtin/make_contiguous.cc index 7613cc61bf6..acfd1602fe1 100644 --- a/dali/pipeline/operator/builtin/make_contiguous.cc +++ b/dali/pipeline/operator/builtin/make_contiguous.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ void MakeContiguousCPU::RunImpl(Workspace &ws) { auto &thread_pool = ws.GetThreadPool(); for (int sample_id = 0; sample_id < batch_size; ++sample_id) { - thread_pool.AddWork( + thread_pool.AddTask( [sample_id, &input, &output](int tid) { output.CopySample(sample_id, input, sample_id, AccessOrder::host()); }, diff --git a/dali/pipeline/operator/operator.h b/dali/pipeline/operator/operator.h index 4b00034857e..2b4d81dd13c 100644 --- a/dali/pipeline/operator/operator.h +++ b/dali/pipeline/operator/operator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -284,7 +284,7 @@ class Operator : public OperatorBase { } auto &thread_pool = ws.GetThreadPool(); for (int data_idx = 0; data_idx < curr_batch_size; ++data_idx) { - thread_pool.AddWork([this, &ws, data_idx](int tid) { + thread_pool.AddTask([this, &ws, data_idx](int tid) { SampleWorkspace sample; MakeSampleView(sample, ws, data_idx, tid); this->SetupSharedSampleParams(sample); diff --git a/dali/pipeline/util/for_each_thread.h b/dali/pipeline/util/for_each_thread.h index cfe319129ca..711ac581089 100644 --- a/dali/pipeline/util/for_each_thread.h +++ b/dali/pipeline/util/for_each_thread.h @@ -44,7 +44,7 @@ void ForEachThread(ThreadPool &tp, Func &&func) { int n = tp.NumThreads(); std::atomic_int pending{n}; for (int i = 0; i < n; i++) { - tp.AddWork([&](int tid) { + tp.AddTask([&](int tid) { std::exception_ptr err{nullptr}; try { func(tid); diff --git a/dali/pipeline/util/thread_pool.cc b/dali/pipeline/util/thread_pool.cc index c6d4243fdbf..a3d0448f300 100644 --- a/dali/pipeline/util/thread_pool.cc +++ b/dali/pipeline/util/thread_pool.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ ThreadPool::~ThreadPool() { #endif } -void ThreadPool::AddWork(Work work, int64_t priority, bool start_immediately) { +void ThreadPool::AddTask(Work work, int64_t priority, bool start_immediately) { bool started_before = false; { std::lock_guard lock(mutex_); diff --git a/dali/pipeline/util/thread_pool.h b/dali/pipeline/util/thread_pool.h index e5e78685c59..903e61716ae 100644 --- a/dali/pipeline/util/thread_pool.h +++ b/dali/pipeline/util/thread_pool.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,11 +45,11 @@ class DLL_PUBLIC ThreadPool { * @brief Adds work to the queue with optional priority, and optionally starts processing * * The jobs are queued but the workers don't pick up the work unless they have - * already been started by a previous call to AddWork with start_immediately = true or RunAll. + * already been started by a previous call to AddTask with start_immediately = true or RunAll. * Once work is started, the threads will continue to pick up whatever work is scheduled * until WaitForWork is called. */ - DLL_PUBLIC void AddWork(Work work, int64_t priority = 0, bool start_immediately = false); + DLL_PUBLIC void AddTask(Work work, int64_t priority = 0, bool start_immediately = false); /** * @brief Wakes up all the threads to complete all the queued work, diff --git a/dali/pipeline/util/thread_pool_test.cc b/dali/pipeline/util/thread_pool_test.cc index eb238e3b08b..79cb07d619f 100644 --- a/dali/pipeline/util/thread_pool_test.cc +++ b/dali/pipeline/util/thread_pool_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,30 +20,30 @@ namespace dali { namespace test { -TEST(ThreadPool, AddWork) { +TEST(ThreadPool, AddTask) { ThreadPool tp(16, 0, false, "ThreadPool test"); std::atomic count{0}; auto increase = [&count](int thread_id) { count++; }; for (int i = 0; i < 64; i++) { - tp.AddWork(increase); + tp.AddTask(increase); } ASSERT_EQ(count, 0); tp.RunAll(); ASSERT_EQ(count, 64); } -TEST(ThreadPool, AddWorkImmediateStart) { +TEST(ThreadPool, AddTaskImmediateStart) { ThreadPool tp(16, 0, false, "ThreadPool test"); std::atomic count{0}; auto increase = [&count](int thread_id) { count++; }; for (int i = 0; i < 64; i++) { - tp.AddWork(increase, 0, true); + tp.AddTask(increase, 0, true); } tp.WaitForWork(); ASSERT_EQ(count, 64); } -TEST(ThreadPool, AddWorkWithPriority) { +TEST(ThreadPool, AddTaskWithPriority) { // only one thread to ensure deterministic behavior ThreadPool tp(1, 0, false, "ThreadPool test"); std::atomic count{0}; @@ -57,12 +57,12 @@ TEST(ThreadPool, AddWorkWithPriority) { int val = count.load(); while (!count.compare_exchange_weak(val, val * 2)) {} }; - tp.AddWork(increase_by_1, 2); - tp.AddWork(mult_by_2, 7); - tp.AddWork(mult_by_2, 9); - tp.AddWork(mult_by_2, 8); - tp.AddWork(increase_by_1, 100); - tp.AddWork(set_to_1, 1000); + tp.AddTask(increase_by_1, 2); + tp.AddTask(mult_by_2, 7); + tp.AddTask(mult_by_2, 9); + tp.AddTask(mult_by_2, 8); + tp.AddTask(increase_by_1, 100); + tp.AddTask(set_to_1, 1000); tp.RunAll(); ASSERT_EQ(((1+1) << 3) + 1, count); @@ -79,7 +79,7 @@ TEST(ThreadPool, CheckName) { auto set_name = [&read_thread_pool_name](int thread_id) { pthread_getname_np(pthread_self(), read_thread_pool_name, sizeof(read_thread_pool_name)); }; - tp.AddWork(set_name, 1); + tp.AddTask(set_name, 1); tp.RunAll(); // skip terminating \0 character diff --git a/dali/test/plugins/dummy/dummy.cc b/dali/test/plugins/dummy/dummy.cc index fe3febd4639..4e8ee4f801e 100644 --- a/dali/test/plugins/dummy/dummy.cc +++ b/dali/test/plugins/dummy/dummy.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ void Dummy<::dali::CPUBackend>::RunImpl(::dali::Workspace &ws) { auto &tp = ws.GetThreadPool(); const auto &in_shape = input.shape(); for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) { - tp.AddWork( + tp.AddTask( [&, sample_id](int thread_id) { type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(output.raw_mutable_tensor(sample_id), input.raw_tensor(sample_id), diff --git a/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb b/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb index 2e314c91fda..7082470a896 100644 --- a/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb +++ b/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb @@ -128,7 +128,7 @@ " auto &tp = ws.GetThreadPool();\r\n", " const auto &in_shape = input.shape();\r\n", " for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) {\r\n", - " tp.AddWork(\r\n", + " tp.AddTask(\r\n", " [&, sample_id](int thread_id) {\r\n", " type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(output.raw_mutable_tensor(sample_id),\r\n", " input.raw_tensor(sample_id),\r\n", diff --git a/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc b/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc index a2a929a840b..499f6013167 100644 --- a/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc +++ b/docs/examples/custom_operations/custom_operator/customdummy/dummy.cc @@ -11,7 +11,7 @@ void Dummy<::dali::CPUBackend>::RunImpl(::dali::Workspace &ws) { auto &tp = ws.GetThreadPool(); const auto &in_shape = input.shape(); for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) { - tp.AddWork( + tp.AddTask( [&, sample_id](int thread_id) { type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(output.raw_mutable_tensor(sample_id), input.raw_tensor(sample_id), diff --git a/include/dali/core/exec/engine.h b/include/dali/core/exec/engine.h index a7d90c36d49..9857159e7de 100644 --- a/include/dali/core/exec/engine.h +++ b/include/dali/core/exec/engine.h @@ -26,7 +26,7 @@ concept ExecutionEngine { /// @param priority priority hint for the job, the higher, the earlier it should start /// @param start_immediately if true, all jobs can start - it's just a hint /// and implementations may start running the jobs earlier - void AddWork(CallableWithInt f, int64_t priority, bool start_immediately = false); + void AddTask(CallableWithInt f, int64_t priority, bool start_immediately = false); /// @brief Starts the work and waits for it to complete. /// If there was an exception in one of the jobs, rethrows one of them. @@ -46,7 +46,7 @@ class SequentialExecutionEngine { * @brief Immediately execute a callable object `f` with thread index 0. */ template - void AddWork(FunctionLike &&f, int64_t priority = 0) { + void AddTask(FunctionLike &&f, int64_t priority = 0) { const int idx = 0; // use of 0 literal would successfully call f expecting a pointer f(idx); } diff --git a/include/dali/core/exec/thread_pool_base.h b/include/dali/core/exec/thread_pool_base.h index 6ac8177206c..45e6c8953c8 100644 --- a/include/dali/core/exec/thread_pool_base.h +++ b/include/dali/core/exec/thread_pool_base.h @@ -43,15 +43,17 @@ class DLL_PUBLIC Job { using priority_t = int64_t; template - std::enable_if_t>> - AddTask(Runnable &&runnable, priority_t priority = {}) { + void AddTask(Runnable &&runnable, priority_t priority = {}) { if (started_) throw std::logic_error("This job has already been started - cannot add more tasks to it"); auto it = tasks_.emplace(priority, Task()); try { - it->second.func = [this, task = &it->second, f = std::move(runnable)]() noexcept { + it->second.func = [this, task = &it->second, f = std::move(runnable)](int tid) noexcept { try { - f(); + if constexpr (std::is_invocable_v) + f(tid); + else + f(); } catch (...) { task->error = std::current_exception(); } @@ -93,7 +95,7 @@ class DLL_PUBLIC Job { bool waited_for_ = false; struct Task { - std::function func; + std::function func; std::exception_ptr error; }; @@ -104,7 +106,7 @@ class DLL_PUBLIC Job { class DLL_PUBLIC ThreadPoolBase { public: - using TaskFunc = std::function; + using TaskFunc = std::function; ThreadPoolBase() = default; explicit ThreadPoolBase(int num_threads) { @@ -119,6 +121,8 @@ class DLL_PUBLIC ThreadPoolBase { void AddTask(TaskFunc f); + int NumThreads() const { return threads_.size(); } + /** * @brief Returns the thread pool that owns the calling thread (or nullptr) */ @@ -135,9 +139,12 @@ class DLL_PUBLIC ThreadPoolBase { return this_thread_idx_; } - protected: void Shutdown(); + bool ShutdownPending() const { + return shutdown_pending_; + } + private: friend class Job; @@ -162,16 +169,15 @@ class DLL_PUBLIC ThreadPoolBase { }; -template class ThreadedExecutionengine { public: - ThreadedExecutionengine(ThreadPool &tp) : tp_(tp) {} // NOLINT + ThreadedExecutionengine(ThreadPoolBase &tp) : tp_(tp) {} // NOLINT /** * @brief Immediately execute a callable object `f` with thread index 0. */ template - void AddWork(FunctionLike &&f, int64_t priority = 0) { + void AddTask(FunctionLike &&f, int64_t priority = 0) { job_.AddTask(std::forward(f), priority); } @@ -183,12 +189,12 @@ class ThreadedExecutionengine { return tp_.NumThreads(); } - ThreadPool &GetThreadPool() const noexcept { + ThreadPoolBase &GetThreadPool() const noexcept { return tp_; } private: - ThreadPool &tp_; + ThreadPoolBase &tp_; Job job_; };