Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Laplacian GPU operator #3644

Merged
merged 8 commits into from
Feb 7, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions dali/kernels/imgproc/convolution/laplacian_gpu.cuh
Original file line number Diff line number Diff line change
@@ -40,8 +40,8 @@ namespace laplacian {
* @brief Computes convolution to obtain partial derivative in one of the dimensions.
* Convolution consits of `axes` windows, each to convolve along one dimension of the input data,
* where `deriv_axis`-th window is supposed to compute partial derivative along that axis,
* whereas the remaining windows should perform smoothing. If no smoothing is necessary in a whole
* batch, you can prevent smoothing convolutions form running by passing empty lists for
* whereas the remaining windows should perform smoothing. If no smoothing is necessary in
* the whole batch, you can prevent smoothing convolutions from running by passing empty lists for
* `window_sizes[i]` such that `i != deriv_axis`.
*/
template <typename Out, typename In, typename W, int axes, int deriv_axis, bool has_channels,
@@ -61,6 +61,18 @@ struct PartialDerivGpu {
return false;
}

/**
* @param ctx Kernel context, used for scratch-pad.
* @param in_shape List of input shapes, used by underlaying convolution kernels to infer
* intermediate buffer sizes.
* @param window_sizes For given `i`, `window_sizes[i]` contains per-sample window sizes
* to be applied in a convolution along `i-th` axis. The length of
* `window_sizes[deriv_axis]` must be equal to the input batch size.
* Lists for other axes must either all have length equal to the input
* batch size or all be empty. In the latter case, smoothing convolutions
* will be omitted, i.e. only one convolution, along `deriv_axis`
* will be applied.
*/
KernelRequirements Setup(KernelContext& ctx, const TensorListShape<ndim>& in_shape,
const std::array<TensorListShape<1>, axes>& window_sizes) {
has_smoothing_ = HasSmoothing(window_sizes);
4 changes: 2 additions & 2 deletions dali/kernels/imgproc/convolution/laplacian_gpu_test.cu
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@

#include <gtest/gtest.h>
#include <array>
#include <numeric>
#include <cmath>
#include <vector>

#include "dali/kernels/common/utils.h"
@@ -154,7 +154,7 @@ struct LaplacianGpuTest : public ::testing::Test {
windows_[i][j].data[sample_idx] = window.data;
windows_[i][j].shape.set_tensor_shape(sample_idx, window.shape);
}
scales_[i][sample_idx] = exp2(-win_size_sum);
scales_[i][sample_idx] = std::exp2f(-win_size_sum);
}
}
}
125 changes: 125 additions & 0 deletions dali/kernels/imgproc/convolution/laplacian_windows.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved from laplacian_params.h

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not 1:1 copy though :P

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the hindsight - it is the worst kind of moving the code around.

//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DALI_KERNELS_IMGPROC_CONVOLUTION_LAPLACIAN_WINDOWS_H_
#define DALI_KERNELS_IMGPROC_CONVOLUTION_LAPLACIAN_WINDOWS_H_

#include <vector>

#include "dali/core/tensor_view.h"

namespace dali {
namespace kernels {

template <typename T>
class LaplacianWindows {
public:
explicit LaplacianWindows(int max_window_size) : smooth_computed_{1}, deriv_computed_{1} {
Resize(max_window_size);
*smoothing_views_[0](0) = 1;
*deriv_views_[0](0) = 1;
}

TensorView<StorageCPU, const T, 1> GetDerivWindow(int window_size) {
assert(1 <= window_size && window_size <= max_window_size_);
assert(window_size % 2 == 1);
auto window_idx = window_size / 2;
PrepareSmoothingWindow(window_size - 2);
PrepareDerivWindow(window_size);
return deriv_views_[window_idx];
}

TensorView<StorageCPU, const T, 1> GetSmoothingWindow(int window_size) {
assert(1 <= window_size && window_size <= max_window_size_);
assert(window_size % 2 == 1);
auto window_idx = window_size / 2;
PrepareSmoothingWindow(window_size);
return smoothing_views_[window_idx];
}

private:
/**
* @brief Smoothing window of size 2n + 1 is [1, 2, 1] conv composed with itself n - 1 times
* so that the window has appropriate size: it boils down to computing binominal coefficients:
* (1 + 1) ^ (2n).
*/
inline void PrepareSmoothingWindow(int window_size) {
for (; smooth_computed_ < window_size; smooth_computed_++) {
auto cur_size = smooth_computed_ + 1;
auto cur_idx = cur_size / 2;
auto &prev_view = smoothing_views_[cur_size % 2 == 0 ? cur_idx - 1 : cur_idx];
auto &view = smoothing_views_[cur_idx];
auto prev_val = *prev_view(0);
*view(0) = prev_val;
for (int j = 1; j < cur_size - 1; j++) {
auto val = *prev_view(j);
*view(j) = prev_val + *prev_view(j);
prev_val = val;
}
*view(cur_size - 1) = prev_val;
}
}

/**
* @brief Derivative window of size 3 is [1, -2, 1] (which is [1, -1] composed with itself).
* Bigger windows are convolutions of smoothing windows with [1, -2, 1].
*/
inline void PrepareDerivWindow(int window_size) {
for (; deriv_computed_ < window_size; deriv_computed_++) {
auto cur_size = deriv_computed_ + 1;
auto cur_idx = cur_size / 2;
auto &prev_view = cur_size % 2 == 0 ? smoothing_views_[cur_idx - 1] : deriv_views_[cur_idx];
auto &view = deriv_views_[cur_idx];
auto prev_val = *prev_view(0);
*view(0) = -prev_val;
for (int j = 1; j < cur_size - 1; j++) {
auto val = *prev_view(j);
*view(j) = prev_val - *prev_view(j);
prev_val = val;
}
*view(cur_size - 1) = prev_val;
}
}

void Resize(int max_window_size) {
assert(1 <= max_window_size && max_window_size % 2 == 1);
max_window_size_ = max_window_size;
int num_windows = (max_window_size + 1) / 2;
int num_elements = num_windows * num_windows;
smoothing_memory_.resize(num_elements);
deriv_memory_.resize(num_elements);
smoothing_views_.resize(num_windows);
deriv_views_.resize(num_windows);
int offset = 0;
int window_size = 1;
for (int i = 0; i < num_windows; i++) {
smoothing_views_[i] = {&smoothing_memory_[offset], {window_size}};
deriv_views_[i] = {&deriv_memory_[offset], {window_size}};
offset += window_size;
window_size += 2;
}
}

int smooth_computed_, deriv_computed_;
int max_window_size_;
std::vector<T> smoothing_memory_;
std::vector<T> deriv_memory_;
std::vector<TensorView<StorageCPU, T, 1>> smoothing_views_;
std::vector<TensorView<StorageCPU, T, 1>> deriv_views_;
};

} // namespace kernels
} // namespace dali

#endif // DALI_KERNELS_IMGPROC_CONVOLUTION_LAPLACIAN_WINDOWS_H_
76 changes: 76 additions & 0 deletions dali/kernels/imgproc/convolution/laplacian_windows_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <cmath>
#include <opencv2/imgproc.hpp>

#include "dali/kernels/common/utils.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"

#include "dali/kernels/imgproc/convolution/laplacian_windows.h"

namespace dali {
namespace kernels {

void CheckDerivWindow(int window_size, LaplacianWindows<float> &windows) {
cv::Mat d, s;
cv::getDerivKernels(d, s, 2, 0, window_size, true, CV_32F);
const auto &window_view = windows.GetDerivWindow(window_size);
float d_scale = std::exp2f(-window_size + 3);
for (int i = 0; i < window_size; i++) {
EXPECT_NEAR(window_view.data[i] * d_scale, d.at<float>(i), 1e-6f)
<< "window_size: " << window_size << ", position: " << i;
}
}

void CheckSmoothingWindow(int window_size, LaplacianWindows<float> &windows) {
cv::Mat d, s;
cv::getDerivKernels(d, s, 2, 0, window_size, true, CV_32F);
const auto &window_view = windows.GetSmoothingWindow(window_size);
float s_scale = std::exp2f(-window_size + 1);
for (int i = 0; i < window_size; i++) {
EXPECT_NEAR(window_view.data[i] * s_scale, s.at<float>(i), 1e-6f)
<< "window_size: " << window_size << ", position: " << i;
}
}

TEST(LaplacianWindowsTest, GetDerivWindows) {
int max_window = 31;
LaplacianWindows<float> windows{max_window};
for (int window_size = 3; window_size <= max_window; window_size += 2) {
CheckDerivWindow(window_size, windows);
}
}

TEST(LaplacianWindowsTest, GetSmoothingWindows) {
int max_window = 31;
LaplacianWindows<float> windows{max_window};
for (int window_size = 3; window_size <= max_window; window_size += 2) {
CheckSmoothingWindow(window_size, windows);
}
}

TEST(LaplacianWindowsTest, CheckPrecomputed) {
int max_window = 31;
LaplacianWindows<float> windows{max_window};
for (int window_size = max_window; window_size >= 3; window_size -= 2) {
CheckDerivWindow(window_size, windows);
CheckSmoothingWindow(window_size, windows);
}
}

} // namespace kernels
} // namespace dali
3 changes: 2 additions & 1 deletion dali/operators/image/convolution/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
# limitations under the License.

add_subdirectory(gaussian_blur_gpu)
add_subdirectory(laplacian_gpu)

# Get all the source files and dump test files
collect_headers(DALI_INST_HDRS PARENT_SCOPE)
31 changes: 20 additions & 11 deletions dali/operators/image/convolution/laplacian.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@

#include "dali/core/static_switch.h"
#include "dali/kernels/imgproc/convolution/laplacian_cpu.h"
#include "dali/kernels/imgproc/convolution/laplacian_windows.h"
#include "dali/kernels/kernel_manager.h"
#include "dali/operators/image/convolution/laplacian.h"
#include "dali/pipeline/data/views.h"
@@ -106,8 +107,12 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {
using Kernel = kernels::LaplacianCpu<Out, In, float, axes, has_channels>;
static constexpr int ndim = Kernel::ndim;

explicit LaplacianOpCpu(const OpSpec& spec, const DimDesc& dim_desc)
: spec_{spec}, args{spec}, dim_desc_{dim_desc} {}
/**
* @param spec Pointer to a persistent OpSpec object,
* which is guaranteed to be alive for the entire lifetime of this object
*/
explicit LaplacianOpCpu(const OpSpec* spec, const DimDesc& dim_desc)
: spec_{*spec}, args{*spec}, dim_desc_{dim_desc}, lap_windows_{maxWindowSize} {}

bool SetupImpl(std::vector<OutputDesc>& output_desc, const workspace_t<CPUBackend>& ws) override {
const auto& input = ws.template Input<CPUBackend>(0);
@@ -125,7 +130,9 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {
const auto& window_sizes = args.GetWindowSizes(sample_idx);
for (int i = 0; i < axes; i++) {
for (int j = 0; j < axes; j++) {
windows_[sample_idx][i][j] = lap_windows_.GetWindow(window_sizes[i][j], i == j);
auto window_size = window_sizes[i][j];
windows_[sample_idx][i][j] = i == j ? lap_windows_.GetDerivWindow(window_size) :
lap_windows_.GetSmoothingWindow(window_size);
}
}
}
@@ -181,19 +188,20 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {

LaplacianArgs<axes> args;
DimDesc dim_desc_;
kernels::LaplacianWindows<float> lap_windows_;

kernels::KernelManager kmgr_;
kernels::KernelContext ctx_;

LaplacianWindows<float> lap_windows_;
// windows_[i][j] is a window used in convolution along j-th axis in the i-th partial derivative
std::vector<std::array<std::array<TensorView<StorageCPU, const float, 1>, axes>, axes>> windows_;
};


} // namespace laplacian

bool Laplacian::SetupImpl(std::vector<OutputDesc>& output_desc, const workspace_t<CPUBackend>& ws) {
template <>
bool Laplacian<CPUBackend>::SetupImpl(std::vector<OutputDesc>& output_desc,
const workspace_t<CPUBackend>& ws) {
const auto& input = ws.template Input<CPUBackend>(0);
auto layout = input.GetLayout();
auto dim_desc = ParseAndValidateDim(input.shape().sample_dim(), layout);
@@ -210,10 +218,10 @@ bool Laplacian::SetupImpl(std::vector<OutputDesc>& output_desc, const workspace_
BOOL_SWITCH(dim_desc.is_channel_last(), HasChannels, (
if (dtype == input.type()) {
using LaplacianSame = laplacian::LaplacianOpCpu<In, In, Axes, HasChannels>;
impl_ = std::make_unique<LaplacianSame>(spec_, dim_desc);
impl_ = std::make_unique<LaplacianSame>(&spec_, dim_desc);
} else {
using LaplacianFloat = laplacian::LaplacianOpCpu<float, In, Axes, HasChannels>;
impl_ = std::make_unique<LaplacianFloat>(spec_, dim_desc);
impl_ = std::make_unique<LaplacianFloat>(&spec_, dim_desc);
}
)); // NOLINT
), DALI_FAIL("Axis count out of supported range.")); // NOLINT
@@ -223,10 +231,11 @@ bool Laplacian::SetupImpl(std::vector<OutputDesc>& output_desc, const workspace_
return impl_->SetupImpl(output_desc, ws);
}

void Laplacian::RunImpl(workspace_t<CPUBackend>& ws) {
template <>
void Laplacian<CPUBackend>::RunImpl(workspace_t<CPUBackend>& ws) {
impl_->RunImpl(ws);
}

DALI_REGISTER_OPERATOR(Laplacian, Laplacian, CPU);
DALI_REGISTER_OPERATOR(Laplacian, Laplacian<CPUBackend>, CPU);

} // namespace dali
Loading