-
Notifications
You must be signed in to change notification settings - Fork 630
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add kernel-wrapper around NPP debayer calls #4486
Merged
Merged
Changes from 9 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
667bea6
Add kernel-wrapper around NPP debayer calls
stiepan f2b2150
Add npp debayer calls to stub generator
stiepan 772e279
Expose DALIColorFilter as enum in python
stiepan 8312240
Rename the bayer/debayer algorithm enum, add color filter enum to docs
stiepan f354f3c
Fix linting issues
stiepan 3ee9590
Revert exposing the enum
stiepan 541b29d
Zero-init the npp context
stiepan a4118e8
Add cpp tests
stiepan cc4eee6
Simplify bayering (with mod rather than in literal tiles)
stiepan 6ed58cb
Use plain C arrays for easier initialization
stiepan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
dali/kernels/imgproc/color_manipulation/debayer/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
collect_headers(DALI_INST_HDRS PARENT_SCOPE) | ||
collect_sources(DALI_KERNEL_SRCS PARENT_SCOPE) | ||
collect_test_sources(DALI_KERNEL_TEST_SRCS PARENT_SCOPE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#ifndef DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_H_ | ||
#define DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_H_ | ||
|
||
#include <string> | ||
|
||
#include "dali/core/common.h" | ||
#include "dali/core/span.h" | ||
#include "dali/kernels/kernel.h" | ||
|
||
namespace dali { | ||
namespace kernels { | ||
namespace debayer { | ||
|
||
enum class DALIBayerPattern { | ||
DALI_BAYER_BG = 0, | ||
DALI_BAYER_GB = 1, | ||
DALI_BAYER_GR = 2, | ||
DALI_BAYER_RG = 3 | ||
}; | ||
|
||
enum class DALIDebayerAlgorithm { | ||
DALI_DEBAYER_BILINEAR_NPP = 0 | ||
}; | ||
|
||
inline std::string to_string(DALIBayerPattern bayer_pattern) { | ||
switch (bayer_pattern) { | ||
case DALIBayerPattern::DALI_BAYER_BG: | ||
return "BG(GR)"; | ||
case DALIBayerPattern::DALI_BAYER_GB: | ||
return "GB(RG)"; | ||
case DALIBayerPattern::DALI_BAYER_GR: | ||
return "GR(BG)"; | ||
case DALIBayerPattern::DALI_BAYER_RG: | ||
return "RG(GB)"; | ||
default: | ||
return "<unknown>"; | ||
} | ||
} | ||
|
||
inline std::string to_string(DALIDebayerAlgorithm alg) { | ||
switch (alg) { | ||
case DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP: | ||
return "bilinear_npp"; | ||
default: | ||
return "<unknown>"; | ||
} | ||
} | ||
|
||
inline DALIDebayerAlgorithm parse_algorithm_name(std::string alg) { | ||
std::transform(alg.begin(), alg.end(), alg.begin(), [](auto c) { return std::tolower(c); }); | ||
if (alg == "bilinear_npp") { | ||
return DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP; | ||
} | ||
throw std::runtime_error( | ||
make_string("Unsupported debayer algorithm was specified: `", alg, "`.")); | ||
} | ||
|
||
} // namespace debayer | ||
|
||
template <typename InOutT> | ||
struct DebayerKernelGpu { | ||
static constexpr int in_ndim = 2; | ||
static constexpr int out_ndim = 3; | ||
virtual void Run(KernelContext &context, TensorListView<StorageGPU, InOutT, out_ndim> output, | ||
TensorListView<StorageGPU, const InOutT, in_ndim> input, | ||
span<const debayer::DALIBayerPattern> patterns) = 0; | ||
|
||
virtual ~DebayerKernelGpu() = default; | ||
}; | ||
|
||
} // namespace kernels | ||
} // namespace dali | ||
|
||
#endif // DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_H_ |
100 changes: 100 additions & 0 deletions
100
dali/kernels/imgproc/color_manipulation/debayer/debayer_npp.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#ifndef DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_NPP_H_ | ||
#define DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_NPP_H_ | ||
|
||
#include <tuple> | ||
|
||
#include "dali/core/span.h" | ||
#include "dali/kernels/imgproc/color_manipulation/debayer/debayer.h" | ||
#include "dali/kernels/imgproc/color_manipulation/debayer/npp_debayer_call.h" | ||
#include "dali/npp/npp.h" | ||
#include "include/dali/core/backend_tags.h" | ||
#include "include/dali/core/tensor_view.h" | ||
|
||
namespace dali { | ||
namespace kernels { | ||
namespace debayer { | ||
|
||
/** | ||
* @brief Transforms DALI's OpenCv-style bayer pattern specification to NPP's enum. | ||
* All the supported bayer patterns are 2x2 tiles. | ||
* For example the BGGR corresponds to | ||
* [BG, | ||
* GR] | ||
* tile. Imagine covering a single-channel image with a given tile. Now, the letter at | ||
* any given position specifies which color channel's intensity is described by | ||
* a corresponding value. The OpenCV convention used by the `DALIBayerPattern` names the | ||
* pattern by looking at the image's ((1, 1), (3, 3)) rectangle, while NPP looks at | ||
* positions ((0, 0), (2, 2)). Thus, the quite surprising mapping below, which | ||
* seemingly permutes the patterns. | ||
*/ | ||
inline NppiBayerGridPosition to_npp(DALIBayerPattern bayer_pattern) { | ||
switch (bayer_pattern) { | ||
case DALIBayerPattern::DALI_BAYER_BG: // bg(gr) | ||
return NPPI_BAYER_RGGB; | ||
case DALIBayerPattern::DALI_BAYER_GB: // gb(rg) | ||
return NPPI_BAYER_GRBG; | ||
case DALIBayerPattern::DALI_BAYER_GR: // gr(bg) | ||
return NPPI_BAYER_GBRG; | ||
case DALIBayerPattern::DALI_BAYER_RG: // rg(gb) | ||
return NPPI_BAYER_BGGR; | ||
default: | ||
throw std::runtime_error( | ||
make_string("Unsupported bayer pattern: ", to_string(bayer_pattern), ".")); | ||
} | ||
} | ||
|
||
template <typename InOutT> | ||
struct NppDebayerKernel : public DebayerKernelGpu<InOutT> { | ||
using SupportedInputTypes = std::tuple<uint8_t, uint16_t>; | ||
static_assert(contains_v<InOutT, SupportedInputTypes>, "Unsupported input type."); | ||
using Base = DebayerKernelGpu<InOutT>; | ||
using Base::in_ndim; | ||
using Base::out_ndim; | ||
|
||
explicit NppDebayerKernel(int device_id) : npp_ctx_{CreateNppContext(device_id)} {} | ||
|
||
void Run(KernelContext &context, TensorListView<StorageGPU, InOutT, out_ndim> output, | ||
TensorListView<StorageGPU, const InOutT, in_ndim> input, | ||
span<const DALIBayerPattern> patterns) override { | ||
constexpr int num_out_chanels = 3; | ||
int batch_size = input.num_samples(); | ||
assert(output.num_samples() == batch_size); | ||
assert(patterns.size() == batch_size); | ||
UpdateNppContextStream(npp_ctx_, context.gpu.stream); | ||
for (int sample_idx = 0; sample_idx < batch_size; sample_idx++) { | ||
const auto &in_view = input[sample_idx]; | ||
const auto &out_view = output[sample_idx]; | ||
const auto &sample_shape = in_view.shape; | ||
int width = sample_shape[1]; | ||
int height = sample_shape[0]; | ||
CUDA_CALL(npp_debayer_call(in_view.data, width * sizeof(InOutT), {width, height}, | ||
{0, 0, width, height}, out_view.data, | ||
width * num_out_chanels * sizeof(InOutT), | ||
to_npp(patterns[sample_idx]), npp_ctx_)); | ||
} | ||
} | ||
|
||
|
||
protected: | ||
NppStreamContext npp_ctx_{cudaStream_t(-1), 0}; | ||
}; | ||
|
||
} // namespace debayer | ||
} // namespace kernels | ||
} // namespace dali | ||
|
||
#endif // DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_NPP_H_ |
176 changes: 176 additions & 0 deletions
176
dali/kernels/imgproc/color_manipulation/debayer/debayer_test.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include <gtest/gtest.h> | ||
#include <array> | ||
#include <limits> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include "dali/core/tensor_shape_print.h" | ||
#include "dali/kernels/common/utils.h" | ||
#include "dali/kernels/dynamic_scratchpad.h" | ||
#include "dali/kernels/imgproc/color_manipulation/debayer/debayer.h" | ||
#include "dali/kernels/imgproc/color_manipulation/debayer/debayer_npp.h" | ||
#include "dali/kernels/scratch.h" | ||
#include "dali/pipeline/data/tensor_list.h" | ||
#include "dali/pipeline/data/views.h" | ||
#include "dali/test/tensor_test_utils.h" | ||
#include "dali/test/test_tensors.h" | ||
|
||
namespace dali { | ||
namespace kernels { | ||
namespace debayer { | ||
namespace test { | ||
|
||
constexpr cudaStream_t cuda_stream = 0; | ||
|
||
template <typename InOutT_, DALIDebayerAlgorithm alg_> | ||
struct DebayerTestParams { | ||
using InOutT = InOutT_; | ||
static constexpr DALIDebayerAlgorithm alg = alg_; | ||
}; | ||
|
||
template <typename DebayerTestParamsT> | ||
class DebayerGpuTest : public ::testing::Test { | ||
protected: | ||
using InOutT = typename DebayerTestParamsT::InOutT; | ||
using Kernel = NppDebayerKernel<InOutT>; | ||
static constexpr int num_channels = 3; | ||
static_assert(DebayerTestParamsT::alg == DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP); | ||
|
||
void FillWithGradient(TensorListView<StorageCPU, InOutT, 3> rgb_batch) { | ||
int max_val = std::numeric_limits<InOutT>::max(); | ||
static constexpr int num_channels = 3; | ||
const auto &batch_shape = rgb_batch.shape; | ||
for (int sample_idx = 0; sample_idx < rgb_batch.num_samples(); sample_idx++) { | ||
int height = batch_shape[sample_idx][0]; | ||
int width = batch_shape[sample_idx][1]; | ||
auto rgb_sample = rgb_batch[sample_idx]; | ||
for (int h = 0; h < height; h++) { | ||
for (int w = 0; w < width; w++) { | ||
rgb_sample.data[h * num_channels * width + w * num_channels] = max_val * (w + 1) / width; | ||
} | ||
} | ||
for (int h = 0; h < height; h++) { | ||
for (int w = 0; w < width; w++) { | ||
rgb_sample.data[h * num_channels * width + w * num_channels + 1] = | ||
max_val * (h + 1) / height; | ||
} | ||
} | ||
for (int h = 0; h < height; h++) { | ||
for (int w = 0; w < width; w++) { | ||
rgb_sample.data[h * num_channels * width + w * num_channels + 2] = | ||
max_val * (width - w) / width; | ||
} | ||
} | ||
} | ||
} | ||
|
||
void BayerSamples(TensorListView<StorageCPU, InOutT, 2> bayer_batch, | ||
TensorListView<StorageCPU, const InOutT, 3> rgb_batch) { | ||
// Note that dali uses opncv's convention of naming the patterns, which | ||
// looks at the 2x2 tile starting at the second row and column of the sensors matrix. | ||
// We iterate over whole image, so the patterns are first transformed as if by looking | ||
// at the tile starting at first column and row. | ||
std::array<std::array<std::array<int, 2>, 2>, 4> pattern2channel{{ | ||
{{{0, 1}, {1, 2}}}, // bggr -> rggb -> 0112 | ||
{{{1, 0}, {2, 1}}}, // gbrg -> grbg -> 1021 | ||
{{{1, 2}, {0, 1}}}, // grbg -> gbrg -> 1201 | ||
{{{2, 1}, {1, 0}}} // rggb -> bggr -> 2110 | ||
}}; | ||
int batch_size = rgb_batch.num_samples(); | ||
const auto &batch_shape = rgb_batch.shape; | ||
for (int sample_idx = 0; sample_idx < batch_size; sample_idx++) { | ||
int height = batch_shape[sample_idx][0]; | ||
int width = batch_shape[sample_idx][1]; | ||
auto pattern = patterns_[sample_idx]; | ||
auto rgb_sample = rgb_batch[sample_idx]; | ||
auto bayer_sample = bayer_batch[sample_idx]; | ||
ASSERT_EQ(height % 2, 0); | ||
ASSERT_EQ(width % 2, 0); | ||
for (int h = 0; h < height; h++) { | ||
for (int w = 0; w < width; w++) { | ||
int i = h & 1; | ||
int j = w & 1; | ||
int c = pattern2channel[static_cast<int>(pattern)][i][j]; | ||
bayer_sample.data[h * width + w] = | ||
rgb_sample.data[h * width * num_channels + w * num_channels + c]; | ||
} | ||
} | ||
} | ||
} | ||
|
||
void PrepareData(int batch_size, int min_extent, int max_extent) { | ||
std::uniform_int_distribution<> shape_dist{min_extent / 2, max_extent / 2}; | ||
std::uniform_int_distribution<> pattern_dist{0, 3}; | ||
TensorListShape<3> batch_shape(batch_size); | ||
for (int sample_idx = 0; sample_idx < batch_size; sample_idx++) { | ||
TensorShape<3> sample_shape{2 * shape_dist(rng), 2 * shape_dist(rng), 3}; | ||
batch_shape.set_tensor_shape(sample_idx, sample_shape); | ||
patterns_.push_back(static_cast<DALIBayerPattern>(pattern_dist(rng))); | ||
} | ||
baseline_.reshape(batch_shape); | ||
out_.reshape(batch_shape); | ||
in_.reshape(batch_shape.first<2>()); | ||
auto baseline_view = baseline_.cpu(); | ||
FillWithGradient(baseline_view); | ||
auto in_view = in_.cpu(); | ||
BayerSamples(in_view, baseline_view); | ||
} | ||
|
||
void Run(int batch_size, int min_extent, int max_extent) { | ||
PrepareData(batch_size, min_extent, max_extent); | ||
Kernel kernel{0}; | ||
KernelContext ctx; | ||
ctx.gpu.stream = cuda_stream; | ||
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream)); | ||
ctx.scratchpad = &dyn_scratchpad; | ||
auto in_view = in_.gpu(cuda_stream); | ||
auto out_view = out_.gpu(cuda_stream); | ||
kernel.Run(ctx, out_view, in_view, make_span(patterns_)); | ||
auto out_view_cpu = out_.cpu(cuda_stream); | ||
int max_val = std::numeric_limits<InOutT>::max(); | ||
int grad_step = (max_val + min_extent - 1) / min_extent; | ||
Check(out_view_cpu, baseline_.cpu(), EqualEps(grad_step)); | ||
} | ||
|
||
std::vector<DALIBayerPattern> patterns_; | ||
TestTensorList<InOutT, 2> in_; | ||
TestTensorList<InOutT, 3> baseline_, out_; | ||
std::mt19937_64 rng{12345}; | ||
}; | ||
|
||
using TestParams = | ||
::testing::Types<DebayerTestParams<uint8_t, DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP>, | ||
DebayerTestParams<uint16_t, DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP>>; | ||
|
||
TYPED_TEST_SUITE(DebayerGpuTest, TestParams); | ||
|
||
TYPED_TEST(DebayerGpuTest, Gradient_1) { | ||
this->Run(1, 256, 400); | ||
} | ||
|
||
TYPED_TEST(DebayerGpuTest, Gradient_32) { | ||
this->Run(32, 256, 400); | ||
} | ||
|
||
TYPED_TEST(DebayerGpuTest, Gradient_200) { | ||
this->Run(200, 256, 300); | ||
} | ||
|
||
} // namespace test | ||
} // namespace debayer | ||
} // namespace kernels | ||
} // namespace dali |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the braces are mismatched. Plain C array would be easier to read.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done