Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add kernel-wrapper around NPP debayer calls #4486

Merged
merged 10 commits into from
Dec 5, 2022
3 changes: 2 additions & 1 deletion dali/kernels/imgproc/color_manipulation/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_subdirectory(debayer)

# Get all the source files and dump test files
collect_headers(DALI_INST_HDRS PARENT_SCOPE)
Expand Down
17 changes: 17 additions & 0 deletions dali/kernels/imgproc/color_manipulation/debayer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

collect_headers(DALI_INST_HDRS PARENT_SCOPE)
collect_sources(DALI_KERNEL_SRCS PARENT_SCOPE)
collect_test_sources(DALI_KERNEL_TEST_SRCS PARENT_SCOPE)
88 changes: 88 additions & 0 deletions dali/kernels/imgproc/color_manipulation/debayer/debayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_H_
#define DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_H_

#include <string>

#include "dali/core/common.h"
#include "dali/core/span.h"
#include "dali/kernels/kernel.h"

namespace dali {
namespace kernels {
namespace debayer {

enum class DALIBayerPattern {
DALI_BAYER_BG = 0,
DALI_BAYER_GB = 1,
DALI_BAYER_GR = 2,
DALI_BAYER_RG = 3
};

enum class DALIDebayerAlgorithm {
DALI_DEBAYER_BILINEAR_NPP = 0
};

inline std::string to_string(DALIBayerPattern bayer_pattern) {
switch (bayer_pattern) {
case DALIBayerPattern::DALI_BAYER_BG:
return "BG(GR)";
case DALIBayerPattern::DALI_BAYER_GB:
return "GB(RG)";
case DALIBayerPattern::DALI_BAYER_GR:
return "GR(BG)";
case DALIBayerPattern::DALI_BAYER_RG:
return "RG(GB)";
default:
return "<unknown>";
}
}

inline std::string to_string(DALIDebayerAlgorithm alg) {
switch (alg) {
case DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP:
return "bilinear_npp";
default:
return "<unknown>";
}
}

inline DALIDebayerAlgorithm parse_algorithm_name(std::string alg) {
std::transform(alg.begin(), alg.end(), alg.begin(), [](auto c) { return std::tolower(c); });
if (alg == "bilinear_npp") {
return DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP;
}
throw std::runtime_error(
make_string("Unsupported debayer algorithm was specified: `", alg, "`."));
}

} // namespace debayer

template <typename InOutT>
struct DebayerKernelGpu {
static constexpr int in_ndim = 2;
static constexpr int out_ndim = 3;
virtual void Run(KernelContext &context, TensorListView<StorageGPU, InOutT, out_ndim> output,
TensorListView<StorageGPU, const InOutT, in_ndim> input,
span<const debayer::DALIBayerPattern> patterns) = 0;

virtual ~DebayerKernelGpu() = default;
};

} // namespace kernels
} // namespace dali

#endif // DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_H_
100 changes: 100 additions & 0 deletions dali/kernels/imgproc/color_manipulation/debayer/debayer_npp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_NPP_H_
#define DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_NPP_H_

#include <tuple>

#include "dali/core/span.h"
#include "dali/kernels/imgproc/color_manipulation/debayer/debayer.h"
#include "dali/kernels/imgproc/color_manipulation/debayer/npp_debayer_call.h"
#include "dali/npp/npp.h"
#include "include/dali/core/backend_tags.h"
#include "include/dali/core/tensor_view.h"

namespace dali {
namespace kernels {
namespace debayer {

/**
* @brief Transforms DALI's OpenCv-style bayer pattern specification to NPP's enum.
* All the supported bayer patterns are 2x2 tiles.
* For example the BGGR corresponds to
* [BG,
* GR]
* tile. Imagine covering a single-channel image with a given tile. Now, the letter at
* any given position specifies which color channel's intensity is described by
* a corresponding value. The OpenCV convention used by the `DALIBayerPattern` names the
* pattern by looking at the image's ((1, 1), (3, 3)) rectangle, while NPP looks at
* positions ((0, 0), (2, 2)). Thus, the quite surprising mapping below, which
* seemingly permutes the patterns.
*/
inline NppiBayerGridPosition to_npp(DALIBayerPattern bayer_pattern) {
switch (bayer_pattern) {
case DALIBayerPattern::DALI_BAYER_BG: // bg(gr)
return NPPI_BAYER_RGGB;
case DALIBayerPattern::DALI_BAYER_GB: // gb(rg)
return NPPI_BAYER_GRBG;
case DALIBayerPattern::DALI_BAYER_GR: // gr(bg)
return NPPI_BAYER_GBRG;
case DALIBayerPattern::DALI_BAYER_RG: // rg(gb)
return NPPI_BAYER_BGGR;
default:
throw std::runtime_error(
make_string("Unsupported bayer pattern: ", to_string(bayer_pattern), "."));
}
}

template <typename InOutT>
struct NppDebayerKernel : public DebayerKernelGpu<InOutT> {
using SupportedInputTypes = std::tuple<uint8_t, uint16_t>;
static_assert(contains_v<InOutT, SupportedInputTypes>, "Unsupported input type.");
using Base = DebayerKernelGpu<InOutT>;
using Base::in_ndim;
using Base::out_ndim;

explicit NppDebayerKernel(int device_id) : npp_ctx_{CreateNppContext(device_id)} {}

void Run(KernelContext &context, TensorListView<StorageGPU, InOutT, out_ndim> output,
TensorListView<StorageGPU, const InOutT, in_ndim> input,
span<const DALIBayerPattern> patterns) override {
constexpr int num_out_chanels = 3;
int batch_size = input.num_samples();
assert(output.num_samples() == batch_size);
assert(patterns.size() == batch_size);
UpdateNppContextStream(npp_ctx_, context.gpu.stream);
for (int sample_idx = 0; sample_idx < batch_size; sample_idx++) {
const auto &in_view = input[sample_idx];
const auto &out_view = output[sample_idx];
const auto &sample_shape = in_view.shape;
int width = sample_shape[1];
int height = sample_shape[0];
CUDA_CALL(npp_debayer_call(in_view.data, width * sizeof(InOutT), {width, height},
{0, 0, width, height}, out_view.data,
width * num_out_chanels * sizeof(InOutT),
to_npp(patterns[sample_idx]), npp_ctx_));
}
}


protected:
NppStreamContext npp_ctx_{cudaStream_t(-1), 0};
};

} // namespace debayer
} // namespace kernels
} // namespace dali

#endif // DALI_KERNELS_IMGPROC_COLOR_MANIPULATION_DEBAYER_DEBAYER_NPP_H_
176 changes: 176 additions & 0 deletions dali/kernels/imgproc/color_manipulation/debayer/debayer_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <array>
#include <limits>
#include <random>
#include <vector>

#include "dali/core/tensor_shape_print.h"
#include "dali/kernels/common/utils.h"
#include "dali/kernels/dynamic_scratchpad.h"
#include "dali/kernels/imgproc/color_manipulation/debayer/debayer.h"
#include "dali/kernels/imgproc/color_manipulation/debayer/debayer_npp.h"
#include "dali/kernels/scratch.h"
#include "dali/pipeline/data/tensor_list.h"
#include "dali/pipeline/data/views.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"

namespace dali {
namespace kernels {
namespace debayer {
namespace test {

constexpr cudaStream_t cuda_stream = 0;

template <typename InOutT_, DALIDebayerAlgorithm alg_>
struct DebayerTestParams {
using InOutT = InOutT_;
static constexpr DALIDebayerAlgorithm alg = alg_;
};

template <typename DebayerTestParamsT>
class DebayerGpuTest : public ::testing::Test {
protected:
using InOutT = typename DebayerTestParamsT::InOutT;
using Kernel = NppDebayerKernel<InOutT>;
static constexpr int num_channels = 3;
static_assert(DebayerTestParamsT::alg == DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP);

void FillWithGradient(TensorListView<StorageCPU, InOutT, 3> rgb_batch) {
int max_val = std::numeric_limits<InOutT>::max();
static constexpr int num_channels = 3;
const auto &batch_shape = rgb_batch.shape;
for (int sample_idx = 0; sample_idx < rgb_batch.num_samples(); sample_idx++) {
int height = batch_shape[sample_idx][0];
int width = batch_shape[sample_idx][1];
auto rgb_sample = rgb_batch[sample_idx];
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
rgb_sample.data[h * num_channels * width + w * num_channels] = max_val * (w + 1) / width;
}
}
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
rgb_sample.data[h * num_channels * width + w * num_channels + 1] =
max_val * (h + 1) / height;
}
}
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
rgb_sample.data[h * num_channels * width + w * num_channels + 2] =
max_val * (width - w) / width;
}
}
}
}

void BayerSamples(TensorListView<StorageCPU, InOutT, 2> bayer_batch,
TensorListView<StorageCPU, const InOutT, 3> rgb_batch) {
// Note that dali uses opncv's convention of naming the patterns, which
// looks at the 2x2 tile starting at the second row and column of the sensors matrix.
// We iterate over whole image, so the patterns are first transformed as if by looking
// at the tile starting at first column and row.
std::array<std::array<std::array<int, 2>, 2>, 4> pattern2channel{{
{{{0, 1}, {1, 2}}}, // bggr -> rggb -> 0112
{{{1, 0}, {2, 1}}}, // gbrg -> grbg -> 1021
{{{1, 2}, {0, 1}}}, // grbg -> gbrg -> 1201
{{{2, 1}, {1, 0}}} // rggb -> bggr -> 2110
}};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the braces are mismatched. Plain C array would be easier to read.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::array<std::array<std::array<int, 2>, 2>, 4> pattern2channel{{
{{{0, 1}, {1, 2}}}, // bggr -> rggb -> 0112
{{{1, 0}, {2, 1}}}, // gbrg -> grbg -> 1021
{{{1, 2}, {0, 1}}}, // grbg -> gbrg -> 1201
{{{2, 1}, {1, 0}}} // rggb -> bggr -> 2110
}};
int pattern2channel[4][2][2] = {
{{0, 1}, {1, 2}}, // bggr -> rggb -> 0112
{{1, 0}, {2, 1}}, // gbrg -> grbg -> 1021
{{1, 2}, {0, 1}}, // grbg -> gbrg -> 1201
{{2, 1}, {1, 0}} // rggb -> bggr -> 2110
};

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

int batch_size = rgb_batch.num_samples();
const auto &batch_shape = rgb_batch.shape;
for (int sample_idx = 0; sample_idx < batch_size; sample_idx++) {
int height = batch_shape[sample_idx][0];
int width = batch_shape[sample_idx][1];
auto pattern = patterns_[sample_idx];
auto rgb_sample = rgb_batch[sample_idx];
auto bayer_sample = bayer_batch[sample_idx];
ASSERT_EQ(height % 2, 0);
ASSERT_EQ(width % 2, 0);
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
int i = h & 1;
int j = w & 1;
int c = pattern2channel[static_cast<int>(pattern)][i][j];
bayer_sample.data[h * width + w] =
rgb_sample.data[h * width * num_channels + w * num_channels + c];
}
}
}
}

void PrepareData(int batch_size, int min_extent, int max_extent) {
std::uniform_int_distribution<> shape_dist{min_extent / 2, max_extent / 2};
std::uniform_int_distribution<> pattern_dist{0, 3};
TensorListShape<3> batch_shape(batch_size);
for (int sample_idx = 0; sample_idx < batch_size; sample_idx++) {
TensorShape<3> sample_shape{2 * shape_dist(rng), 2 * shape_dist(rng), 3};
batch_shape.set_tensor_shape(sample_idx, sample_shape);
patterns_.push_back(static_cast<DALIBayerPattern>(pattern_dist(rng)));
}
baseline_.reshape(batch_shape);
out_.reshape(batch_shape);
in_.reshape(batch_shape.first<2>());
auto baseline_view = baseline_.cpu();
FillWithGradient(baseline_view);
auto in_view = in_.cpu();
BayerSamples(in_view, baseline_view);
}

void Run(int batch_size, int min_extent, int max_extent) {
PrepareData(batch_size, min_extent, max_extent);
Kernel kernel{0};
KernelContext ctx;
ctx.gpu.stream = cuda_stream;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;
auto in_view = in_.gpu(cuda_stream);
auto out_view = out_.gpu(cuda_stream);
kernel.Run(ctx, out_view, in_view, make_span(patterns_));
auto out_view_cpu = out_.cpu(cuda_stream);
int max_val = std::numeric_limits<InOutT>::max();
int grad_step = (max_val + min_extent - 1) / min_extent;
Check(out_view_cpu, baseline_.cpu(), EqualEps(grad_step));
}

std::vector<DALIBayerPattern> patterns_;
TestTensorList<InOutT, 2> in_;
TestTensorList<InOutT, 3> baseline_, out_;
std::mt19937_64 rng{12345};
};

using TestParams =
::testing::Types<DebayerTestParams<uint8_t, DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP>,
DebayerTestParams<uint16_t, DALIDebayerAlgorithm::DALI_DEBAYER_BILINEAR_NPP>>;

TYPED_TEST_SUITE(DebayerGpuTest, TestParams);

TYPED_TEST(DebayerGpuTest, Gradient_1) {
this->Run(1, 256, 400);
}

TYPED_TEST(DebayerGpuTest, Gradient_32) {
this->Run(32, 256, 400);
}

TYPED_TEST(DebayerGpuTest, Gradient_200) {
this->Run(200, 256, 300);
}

} // namespace test
} // namespace debayer
} // namespace kernels
} // namespace dali
Loading