Skip to content

Commit ca4167b

Browse files
szkarpinskimzient
andauthored
Experimental image decoder based on imgcodec module (#4223)
New operators: * experimental.decoders.image * experimental.decoders.image_crop * experimental.decoders.image_slice * experimental.decoders.image_random_crop * experimental.peek_image_shape These are drop-in replacements for the existing operators. The difference is that they now handle EXIF and TIFF orientation tags as well as support other output bit depths. Missing features: * Hybrid GPU backend for NVJPEG decoder * Hardware backend for NVJPEG decoder author Szymon Karpiński <skarpinski@nvidia.com> committer Michal Zientkiewicz <michalz@nvidia.com> Signed-off-by: Szymon Karpiński <skarpinski@nvidia.com> Co-authored-by: Michal Zientkiewicz <michalz@nvidia.com>
1 parent d77f862 commit ca4167b

17 files changed

+1429
-81
lines changed

dali/imgcodec/decoders/memory_pool.cc

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <unordered_map>
2222
#include "dali/imgcodec/decoders/memory_pool.h"
2323
#include "dali/core/cuda_error.h"
24+
#include "dali/core/cuda_stream_pool.h"
2425
#include "dali/core/mm/malloc_resource.h"
2526
#include "dali/pipeline/data/buffer.h"
2627

dali/imgcodec/decoders/nvjpeg2k/nvjpeg2k.cc

+25-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "dali/imgcodec/util/convert_gpu.h"
2323
#include "dali/core/static_switch.h"
2424
#include "dali/imgcodec/registry.h"
25+
#include "dali/pipeline/util/for_each_thread.h"
2526

2627
namespace dali {
2728
namespace imgcodec {
@@ -63,8 +64,10 @@ NvJpeg2000DecoderInstance::NvJpeg2000DecoderInstance(
6364
nvjpeg2k_handle_ = NvJpeg2kHandle(&nvjpeg2k_dev_alloc_, &nvjpeg2k_pin_alloc_);
6465
DALI_ENFORCE(nvjpeg2k_handle_, "NvJpeg2kHandle initalization failed");
6566

66-
for (auto &res : per_thread_resources_)
67-
res = {nvjpeg2k_handle_, device_memory_padding, device_id_};
67+
ForEachThread(*tp_, [&](int tid) noexcept {
68+
CUDA_CALL(cudaSetDevice(device_id));
69+
per_thread_resources_[tid] = {nvjpeg2k_handle_, device_memory_padding, device_id_};
70+
});
6871

6972
for (const auto &thread_id : tp_->GetThreadIds()) {
7073
if (device_memory_padding > 0) {
@@ -81,9 +84,18 @@ NvJpeg2000DecoderInstance::NvJpeg2000DecoderInstance(
8184
}
8285

8386
NvJpeg2000DecoderInstance::~NvJpeg2000DecoderInstance() {
87+
tp_->WaitForWork();
8488
for (const auto &res : per_thread_resources_)
8589
CUDA_CALL(cudaStreamSynchronize(res.cuda_stream));
86-
for (const auto &thread_id : tp_->GetThreadIds())
90+
91+
ForEachThread(*tp_, [&](int tid) {
92+
auto &res = per_thread_resources_[tid];
93+
res.tile_dec_res.clear();
94+
res.nvjpeg2k_decode_state.reset();
95+
res.intermediate_buffer.free();
96+
});
97+
98+
for (auto thread_id : tp_->GetThreadIds())
8799
nvjpeg_memory::DeleteAllBuffers(thread_id);
88100
}
89101

@@ -157,8 +169,9 @@ bool NvJpeg2000DecoderInstance::DecodeJpeg2000(ImageSource *in, void *out, const
157169
ctx.nvjpeg2k_stream, &output_image, ctx.cuda_stream);
158170
return check_status(ret, in);
159171
} else {
160-
// Decode tile by tile: nvjpeg2kDecodeImage seems to be bugged
161172
auto &image_info = ctx.image_info;
173+
174+
// Decode tile by tile: nvjpeg2kDecodeImage doesn't work properly with ROI
162175
auto &roi = ctx.roi;
163176
std::array tile_shape = {image_info.tile_height, image_info.tile_width};
164177

@@ -185,11 +198,10 @@ bool NvJpeg2000DecoderInstance::DecodeJpeg2000(ImageSource *in, void *out, const
185198

186199
if (begin_x < end_x && begin_y < end_y) {
187200
const TileDecodingResources &per_tile_ctx = ctx.tile_dec_res[state_idx];
188-
state_idx = (state_idx + 1) % ctx.tile_dec_res.size();
189201

190202
CUDA_CALL(cudaEventSynchronize(per_tile_ctx.decode_event));
191203

192-
NvJpeg2kDecodeParams params;
204+
auto &params = per_tile_ctx.params;
193205
CUDA_CALL(nvjpeg2kDecodeParamsSetDecodeArea(params, begin_x, end_x, begin_y, end_y));
194206

195207
auto output_image = PrepareOutputArea(out, pixel_data, pitch_in_bytes, output_offset_x,
@@ -208,6 +220,7 @@ bool NvJpeg2000DecoderInstance::DecodeJpeg2000(ImageSource *in, void *out, const
208220
return check_status(ret, in);
209221

210222
CUDA_CALL(cudaEventRecord(per_tile_ctx.decode_event, ctx.cuda_stream));
223+
state_idx = (state_idx + 1) % ctx.tile_dec_res.size();
211224
}
212225
}
213226
}
@@ -225,11 +238,11 @@ DecodeResult NvJpeg2000DecoderInstance::DecodeImplTask(int thread_idx,
225238
Context ctx(opts, roi, res);
226239
DecodeResult result = {false, nullptr};
227240

241+
CUDA_CALL(cudaEventSynchronize(ctx.decode_event));
242+
228243
if (!ParseJpeg2000Info(in, ctx))
229244
return result;
230245

231-
CUDA_CALL(cudaEventSynchronize(ctx.decode_event));
232-
233246
const int64_t channels = ctx.shape[0];
234247
DALIImageType format = channels == 1 ? DALI_GRAY : DALI_RGB;
235248
bool is_processing_needed =
@@ -241,6 +254,10 @@ DecodeResult NvJpeg2000DecoderInstance::DecodeImplTask(int thread_idx,
241254
auto decode_out = out;
242255
if (is_processing_needed) {
243256
int64_t type_size = dali::TypeTable::GetTypeInfo(ctx.pixel_type).size();
257+
size_t new_size = volume(ctx.shape) * type_size;
258+
if (new_size > res.intermediate_buffer.capacity()) {
259+
CUDA_CALL(cudaStreamSynchronize(ctx.cuda_stream));
260+
}
244261
res.intermediate_buffer.resize(volume(ctx.shape) * type_size);
245262
decode_out = {res.intermediate_buffer.data(), ctx.shape, ctx.pixel_type};
246263
}

dali/imgcodec/decoders/nvjpeg2k/nvjpeg2k.h

+6-7
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class DLL_PUBLIC NvJpeg2000DecoderInstance : public BatchParallelDecoderImpl {
4646
const ROI &roi) override;
4747

4848
FutureDecodeResults ScheduleDecode(DecodeContext ctx,
49-
span<SampleView<CPUBackend>> out,
49+
span<SampleView<GPUBackend>> out,
5050
cspan<ImageSource *> in,
5151
DecodeParams opts,
5252
cspan<ROI> rois = {}) override {
@@ -79,15 +79,16 @@ class DLL_PUBLIC NvJpeg2000DecoderInstance : public BatchParallelDecoderImpl {
7979
struct TileDecodingResources {
8080
NvJpeg2kDecodeState state;
8181
CUDAEvent decode_event;
82+
NvJpeg2kDecodeParams params;
8283

83-
explicit TileDecodingResources(const NvJpeg2kHandle &nvjpeg2k_handle, int device_id,
84-
cudaStream_t cuda_stream)
84+
explicit TileDecodingResources(const NvJpeg2kHandle &nvjpeg2k_handle, int device_id)
8585
: state(nvjpeg2k_handle), decode_event(CUDAEvent::Create(device_id)) {
86-
CUDA_CALL(cudaEventRecord(decode_event, cuda_stream));
8786
}
8887
};
8988

9089
struct PerThreadResources {
90+
static constexpr int kNumParallelTiles = 2;
91+
9192
PerThreadResources() = default;
9293
PerThreadResources(const NvJpeg2kHandle &nvjpeg2k_handle,
9394
size_t device_memory_padding, int device_id)
@@ -97,12 +98,10 @@ class DLL_PUBLIC NvJpeg2000DecoderInstance : public BatchParallelDecoderImpl {
9798
, decode_event(CUDAEvent::Create(device_id))
9899
, cuda_stream(CUDAStreamPool::instance().Get(device_id)) {
99100
intermediate_buffer.resize(device_memory_padding / 8);
100-
CUDA_CALL(cudaEventRecord(decode_event, cuda_stream));
101101

102-
constexpr int kNumParallelTiles = 10;
103102
tile_dec_res.reserve(kNumParallelTiles);
104103
for (int i = 0; i < kNumParallelTiles; i++) {
105-
tile_dec_res.emplace_back(nvjpeg2k_handle, device_id, cuda_stream);
104+
tile_dec_res.emplace_back(nvjpeg2k_handle, device_id);
106105
}
107106
}
108107

dali/imgcodec/decoders/nvjpeg2k/nvjpeg2k_test.cc

+2-12
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,9 @@ class NvJpeg2000DecoderTest : public NumpyDecoderTestBase<GPUBackend, OutputType
142142
AssertEqualSatNorm(img, ref);
143143
}
144144

145-
void RunTest(const ImageTestingData &data, std::optional<float> eps = std::nullopt,
146-
DALIImageType format = DALI_RGB) {
145+
void RunTest(const ImageTestingData &data, std::optional<float> eps = std::nullopt) {
147146
ImageBuffer image(data.img_path);
148-
auto params = this->GetParams();
149-
params.format = format;
150-
auto decoded = this->Decode(&image.src, params, data.roi);
147+
auto decoded = this->Decode(&image.src, this->GetParams(), data.roi);
151148
auto ref = this->ReadReferenceFrom(data.ref_path);
152149
AssertEqual(decoded, ref, eps);
153150
}
@@ -187,13 +184,6 @@ TYPED_TEST(NvJpeg2000DecoderTest, DecodeSingleRoi) {
187184
this->RunTest(from_regular_file(name, roi));
188185
}
189186

190-
TYPED_TEST(NvJpeg2000DecoderTest, DecodeSingleAnyData) {
191-
for (const auto &name : images) {
192-
SCOPED_TRACE(name);
193-
this->RunTest(from_regular_file(name), {}, DALI_ANY_DATA);
194-
}
195-
}
196-
197187
TYPED_TEST(NvJpeg2000DecoderTest, DecodeBatchSingleThread) {
198188
std::vector<ImageTestingData> data;
199189
for (const auto &name : images)

dali/imgcodec/decoders/opencv_fallback.cc

+12-7
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,11 @@ DecodeResult OpenCVDecoderInstance::DecodeImplTask(int thread_idx,
8989
cvimg = cv::imdecode(cv::_InputArray(raw, in->Size()), flags);
9090
}
9191

92-
// TODO(michalz): correct the orientation of images loaded with IMREAD_UNCHANGED
93-
(void)adjust_orientation;
92+
Orientation orientation = {};
93+
if (adjust_orientation) {
94+
auto info = ImageFormatRegistry::instance().GetImageFormat(in)->Parser()->GetInfo(in);
95+
orientation = info.orientation;
96+
}
9497

9598
res.success = cvimg.ptr(0) != nullptr;
9699
if (res.success) {
@@ -110,14 +113,16 @@ DecodeResult OpenCVDecoderInstance::DecodeImplTask(int thread_idx,
110113

111114
int in_channels = cvimg.channels();
112115
auto out_format = opts.format;
113-
// OpenCV uses BGR by default. Here we avoid outputting BGR when requesting ANY_DATA
114-
if (out_format == DALI_ANY_DATA && in_format == DALI_ANY_DATA && in_channels == 3) {
115-
in_format = DALI_BGR;
116-
out_format = DALI_RGB;
116+
if (out_format == DALI_ANY_DATA && in_format == DALI_ANY_DATA) {
117+
if (in_channels == 3) {
118+
// OpenCV uses BGR by default. Here we avoid outputting BGR when requesting ANY_DATA
119+
in_format = DALI_BGR;
120+
out_format = DALI_RGB;
121+
} // TODO(michalz): support RGBA in DALI
117122
}
118123
TensorLayout layout = cvimg.dims == 3 ? "DHWC" : "HWC";
119124

120-
Convert(out, layout, out_format, in, layout, in_format, roi);
125+
Convert(out, layout, out_format, in, layout, in_format, roi, orientation);
121126
}
122127
} catch (...) {
123128
res.exception = std::current_exception();

dali/imgcodec/parsers/jpeg.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ ImageInfo JpegParser::GetInfo(ImageSource *encoded) const {
7878
stream->Read(exif_block.data(), size - 8);
7979
cv::ExifReader reader;
8080
if (!reader.parseExif(exif_block.data(), exif_block.size()))
81-
DALI_FAIL(make_string("Couldn't parse EXIF data in: ", encoded->SourceInfo()));
81+
continue;
8282
auto entry = reader.getTag(cv::ORIENTATION);
8383
if (entry.tag != cv::INVALID_TAG) {
8484
info.orientation = FromExifOrientation(static_cast<ExifOrientation>(entry.field_u16));

dali/operators/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_subdirectory(debug)
2121
add_subdirectory(decoder)
2222
add_subdirectory(generic)
2323
add_subdirectory(image)
24+
add_subdirectory(imgcodec)
2425
add_subdirectory(math)
2526
add_subdirectory(random)
2627
add_subdirectory(reader)
@@ -84,7 +85,7 @@ adjust_source_file_language_property("${DALI_OPERATOR_SRCS}")
8485
add_library(dali_operators ${LIBTYPE} ${DALI_OPERATOR_SRCS} ${DALI_OPERATOR_OBJ})
8586
set_target_properties(dali_operators PROPERTIES
8687
LIBRARY_OUTPUT_DIRECTORY "${DALI_LIBRARY_OUTPUT_DIR}")
87-
target_link_libraries(dali_operators PUBLIC dali dali_kernels dali_core)
88+
target_link_libraries(dali_operators PUBLIC dali dali_kernels dali_core dali_imgcodec)
8889
target_link_libraries(dali_operators PRIVATE dynlink_cuda ${DALI_LIBS})
8990
if (BUILD_NVML)
9091
target_link_libraries(dali_operators PRIVATE dynlink_nvml)
+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
collect_headers(DALI_INST_HDRS PARENT_SCOPE)
16+
collect_sources(DALI_OPERATOR_SRCS PARENT_SCOPE)
17+
collect_test_sources(DALI_OPERATOR_TEST_SRCS PARENT_SCOPE)

0 commit comments

Comments
 (0)