|
1 |
| -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 1 | +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
2 | 2 | //
|
3 | 3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
4 | 4 | // you may not use this file except in compliance with the License.
|
@@ -554,15 +554,16 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
|
554 | 554 | samples_jpeg2k_.clear();
|
555 | 555 | #endif // NVJPEG2K_ENABLED
|
556 | 556 |
|
| 557 | + const auto &input = ws.Input<CPUBackend>(0); |
557 | 558 | for (int i = 0; i < curr_batch_size; i++) {
|
558 |
| - const auto &in = ws.Input<CPUBackend>(0)[i]; |
559 |
| - const auto in_size = in.size(); |
560 |
| - thread_pool_.AddWork([this, i, &in, in_size](int tid) { |
561 |
| - auto *input_data = in.data<uint8_t>(); |
| 559 | + auto *input_data = input.tensor<uint8_t>(i); |
| 560 | + const auto in_size = input.tensor_shape(i).num_elements(); |
| 561 | + const auto &source_info = input.GetMeta(i).GetSourceInfo(); |
| 562 | + thread_pool_.AddWork([this, i, input_data, in_size, source_info](int tid) { |
562 | 563 | SampleData &data = sample_data_[i];
|
563 | 564 | data.clear();
|
564 | 565 | data.sample_idx = i;
|
565 |
| - data.file_name = in.GetSourceInfo(); |
| 566 | + data.file_name = source_info; |
566 | 567 | data.encoded_length = in_size;
|
567 | 568 |
|
568 | 569 | auto cached_shape = CacheImageShape(data.file_name);
|
@@ -704,15 +705,17 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
|
704 | 705 |
|
705 | 706 | void ProcessImagesCuda(MixedWorkspace &ws) {
|
706 | 707 | auto& output = ws.Output<GPUBackend>(0);
|
| 708 | + const auto &input = ws.Input<CPUBackend>(0); |
707 | 709 | for (auto *sample : samples_single_) {
|
708 | 710 | assert(sample);
|
709 | 711 | auto i = sample->sample_idx;
|
710 | 712 | auto *output_data = output.mutable_tensor<uint8_t>(i);
|
711 |
| - const auto &in = ws.Input<CPUBackend>(0)[i]; |
| 713 | + const auto *in_data = input.tensor<uint8_t>(i); |
| 714 | + const auto in_size = input.tensor_shape(i).num_elements(); |
712 | 715 | thread_pool_.AddWork(
|
713 |
| - [this, sample, &in, output_data](int tid) { |
714 |
| - SampleWorker(sample->sample_idx, sample->file_name, in.size(), tid, |
715 |
| - in.data<uint8_t>(), output_data, streams_[tid]); |
| 716 | + [this, sample, in_data, in_size, output_data](int tid) { |
| 717 | + SampleWorker(sample->sample_idx, sample->file_name, in_size, tid, |
| 718 | + in_data, output_data, streams_[tid]); |
716 | 719 | }, task_priority_seq_--); // FIFO order, since the samples were already ordered
|
717 | 720 | }
|
718 | 721 | }
|
@@ -808,15 +811,17 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
|
808 | 811 | }
|
809 | 812 |
|
810 | 813 | void ProcessImagesHost(MixedWorkspace &ws) {
|
| 814 | + const auto &input = ws.Input<CPUBackend>(0); |
811 | 815 | auto& output = ws.Output<GPUBackend>(0);
|
812 | 816 | for (auto *sample : samples_host_) {
|
813 | 817 | auto i = sample->sample_idx;
|
| 818 | + const auto *input_data = input.tensor<uint8_t>(i); |
| 819 | + auto in_size = input.tensor_shape(i).num_elements(); |
814 | 820 | auto *output_data = output.mutable_tensor<uint8_t>(i);
|
815 |
| - const auto &in = ws.Input<CPUBackend>(0)[i]; |
816 | 821 | ImageCache::ImageShape shape = output_shape_[i].to_static<3>();
|
817 | 822 | thread_pool_.AddWork(
|
818 |
| - [this, sample, &in, output_data, shape](int tid) { |
819 |
| - HostFallback<StorageGPU>(in.data<uint8_t>(), in.size(), output_image_type_, output_data, |
| 823 | + [this, sample, input_data, in_size, output_data, shape](int tid) { |
| 824 | + HostFallback<StorageGPU>(input_data, in_size, output_image_type_, output_data, |
820 | 825 | streams_[tid], sample->file_name, sample->roi, use_fast_idct_);
|
821 | 826 | CacheStore(sample->file_name, output_data, shape, streams_[tid]);
|
822 | 827 | }, task_priority_seq_--); // FIFO order, since the samples were already ordered
|
@@ -846,13 +851,14 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
|
846 | 851 | int j = 0;
|
847 | 852 | TensorVector<CPUBackend> tv(samples_hw_batched_.size());
|
848 | 853 |
|
| 854 | + const auto &input = ws.Input<CPUBackend>(0); |
| 855 | + tv.SetupLike(input); |
849 | 856 | for (auto *sample : samples_hw_batched_) {
|
850 | 857 | int i = sample->sample_idx;
|
851 |
| - const auto &in = ws.Input<CPUBackend>(0)[i]; |
852 | 858 | const auto &out_shape = output_shape_.tensor_shape(i);
|
853 | 859 |
|
854 |
| - tv[j].ShareData(const_cast<Tensor<CPUBackend> &>(in)); |
855 |
| - in_lengths_[j] = in.size(); |
| 860 | + tv.UnsafeSetSample(j, input, i); |
| 861 | + in_lengths_[j] = input.tensor_shape(i).num_elements(); |
856 | 862 | nvjpeg_destinations_[j].channel[0] = output.mutable_tensor<uint8_t>(i);
|
857 | 863 | nvjpeg_destinations_[j].pitch[0] = out_shape[1] * out_shape[2];
|
858 | 864 | nvjpeg_params_[j] = sample->params;
|
|
0 commit comments