Skip to content

Commit 116373d

Browse files
committed
Workaround race condition in NVJPEG2k.
Signed-off-by: Michal Zientkiewicz <michalz@nvidia.com>
1 parent 35f8207 commit 116373d

File tree

3 files changed

+19
-33
lines changed

3 files changed

+19
-33
lines changed

dali/imgcodec/decoders/nvjpeg2k/nvjpeg2k.cc

+1-15
Original file line numberDiff line numberDiff line change
@@ -158,19 +158,6 @@ bool NvJpeg2000DecoderInstance::DecodeJpeg2000(ImageSource *in, void *out, const
158158
return check_status(ret, in);
159159
} else {
160160
auto &image_info = ctx.image_info;
161-
int num_tiles = image_info.num_tiles_y * image_info.num_tiles_x;
162-
int tile_res_used = std::min<int>(ctx.tile_dec_res.size(), num_tiles);
163-
SmallVector<nvjpeg2kStream_t, kNumParallelTiles> tile_jp2_stream;
164-
SmallVector<NvJpeg2kStream, kNumParallelTiles> tile_stream_storage;
165-
tile_jp2_stream.resize(tile_res_used);
166-
tile_stream_storage.resize(tile_res_used);
167-
tile_jp2_stream[0] = ctx.nvjpeg2k_stream;
168-
for (int i = 1; i < tile_res_used; i++) {
169-
tile_stream_storage[i] = NvJpeg2kStream::Create();
170-
tile_jp2_stream[i] = tile_stream_storage[i];
171-
CUDA_CALL(nvjpeg2kStreamParse(nvjpeg2k_handle_, in->RawData<uint8_t>(), in->Size(),
172-
0, 0, tile_jp2_stream[i]));
173-
}
174161

175162
// Decode tile by tile: nvjpeg2kDecodeImage seems to be bugged
176163
auto &roi = ctx.roi;
@@ -201,7 +188,6 @@ bool NvJpeg2000DecoderInstance::DecodeJpeg2000(ImageSource *in, void *out, const
201188
const TileDecodingResources &per_tile_ctx = ctx.tile_dec_res[state_idx];
202189

203190
CUDA_CALL(cudaEventSynchronize(per_tile_ctx.decode_event));
204-
//cudaStreamSynchronize(ctx.cuda_stream);
205191

206192
auto &params = per_tile_ctx.params;
207193
CUDA_CALL(nvjpeg2kDecodeParamsSetDecodeArea(params, begin_x, end_x, begin_y, end_y));
@@ -211,7 +197,7 @@ bool NvJpeg2000DecoderInstance::DecodeJpeg2000(ImageSource *in, void *out, const
211197

212198
auto ret = nvjpeg2kDecodeTile(nvjpeg2k_handle_,
213199
per_tile_ctx.state,
214-
tile_jp2_stream[state_idx],
200+
ctx.nvjpeg2k_stream,
215201
params,
216202
tile_x + tile_y * image_info.num_tiles_x,
217203
0,

dali/imgcodec/decoders/nvjpeg2k/nvjpeg2k.h

+4-6
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,13 @@ class DLL_PUBLIC NvJpeg2000DecoderInstance : public BatchParallelDecoderImpl {
8181
CUDAEvent decode_event;
8282
NvJpeg2kDecodeParams params;
8383

84-
explicit TileDecodingResources(const NvJpeg2kHandle &nvjpeg2k_handle, int device_id,
85-
cudaStream_t cuda_stream)
84+
explicit TileDecodingResources(const NvJpeg2kHandle &nvjpeg2k_handle, int device_id)
8685
: state(nvjpeg2k_handle), decode_event(CUDAEvent::Create(device_id)) {
87-
CUDA_CALL(cudaEventRecord(decode_event, cuda_stream));
8886
}
8987
};
9088

91-
static constexpr int kNumParallelTiles = 10;
89+
static constexpr int kNumParallelTiles = 1; // TODO(michalz): Use a different memory resource
90+
// to allow parallel processing of tiles - existing one isn't stream-safe
9291
struct PerThreadResources {
9392
PerThreadResources() = default;
9493
PerThreadResources(const NvJpeg2kHandle &nvjpeg2k_handle,
@@ -99,11 +98,10 @@ class DLL_PUBLIC NvJpeg2000DecoderInstance : public BatchParallelDecoderImpl {
9998
, decode_event(CUDAEvent::Create(device_id))
10099
, cuda_stream(CUDAStreamPool::instance().Get(device_id)) {
101100
intermediate_buffer.resize(device_memory_padding / 8);
102-
CUDA_CALL(cudaEventRecord(decode_event, cuda_stream));
103101

104102
tile_dec_res.reserve(kNumParallelTiles);
105103
for (int i = 0; i < kNumParallelTiles; i++) {
106-
tile_dec_res.emplace_back(nvjpeg2k_handle, device_id, cuda_stream);
104+
tile_dec_res.emplace_back(nvjpeg2k_handle, device_id);
107105
}
108106
}
109107

dali/test/python/test_dali_variable_batch_size.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -994,27 +994,29 @@ def peek_image_shape_pipe(module, max_batch_size, input_data, device):
994994
pipe.set_outputs(shape)
995995
return pipe
996996

997-
image_decoder_extensions = ['.jp2'] * 1000
997+
image_decoder_extensions = ['.jpg', '.bmp', '.png', '.pnm', '.jp2']
998998
image_decoder_pipes = [
999-
#image_decoder_pipe,
999+
image_decoder_pipe,
10001000
image_decoder_crop_pipe,
10011001
image_decoder_slice_pipe,
10021002
]
10031003

10041004
data_path = os.path.join(test_utils.get_dali_extra_path(), 'db', 'single')
10051005
for ext in image_decoder_extensions:
10061006
for pipe_template in image_decoder_pipes:
1007+
pipe = partial(pipe_template, fn.decoders)
1008+
yield test_decoders_check, pipe, data_path, ext, ['cpu', 'mixed']
10071009
pipe = partial(pipe_template, fn.experimental.decoders)
1008-
yield test_decoders_check, pipe, data_path + '/jpeg2k/2', ext, ['mixed']
1009-
#pipe = partial(image_decoder_rcrop_pipe, fn.decoders)
1010-
#yield test_decoders_run, pipe, data_path, ext, ['cpu', 'mixed']
1011-
#pipe = partial(image_decoder_rcrop_pipe, fn.experimental.decoders)
1012-
#yield test_decoders_run, pipe, data_path, ext, ['cpu', 'mixed']
1013-
1014-
#pipe = partial(peek_image_shape_pipe, fn)
1015-
#yield test_decoders_check, pipe, data_path, '.jpg', ['cpu']
1016-
#pipe = partial(peek_image_shape_pipe, fn.experimental)
1017-
#yield test_decoders_check, pipe, data_path, '.jpg', ['cpu']
1010+
yield test_decoders_check, pipe, data_path, ext, ['cpu', 'mixed']
1011+
pipe = partial(image_decoder_rcrop_pipe, fn.decoders)
1012+
yield test_decoders_run, pipe, data_path, ext, ['cpu', 'mixed']
1013+
pipe = partial(image_decoder_rcrop_pipe, fn.experimental.decoders)
1014+
yield test_decoders_run, pipe, data_path, ext, ['cpu', 'mixed']
1015+
1016+
pipe = partial(peek_image_shape_pipe, fn)
1017+
yield test_decoders_check, pipe, data_path, '.jpg', ['cpu']
1018+
pipe = partial(peek_image_shape_pipe, fn.experimental)
1019+
yield test_decoders_check, pipe, data_path, '.jpg', ['cpu']
10181020

10191021

10201022
def test_python_function():

0 commit comments

Comments
 (0)