Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add equalize CPU variant #4742

Merged
merged 4 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions dali/operators/image/color/equalize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.


#include <limits>
#include <opencv2/opencv.hpp>

#include "dali/operators/image/color/equalize.h"
#include "dali/pipeline/data/views.h"
#include "dali/util/ocv.h"

namespace dali {

Expand All @@ -25,4 +31,75 @@ The supported inputs are images and videos of uint8_t type.)code")
.InputLayout(0, {"HW", "HWC", "CHW", "FHW", "FHWC", "FCHW"})
.AllowSequences();

namespace equalize {

class EqualizeCPU : public Equalize<CPUBackend> {
public:
explicit EqualizeCPU(const OpSpec &spec) : Equalize<CPUBackend>(spec) {}

protected:
void RunImpl(Workspace &ws) override {
const auto &input = ws.Input<CPUBackend>(0);
auto &output = ws.Output<CPUBackend>(0);
auto in_view = view<const uint8_t>(input);
auto out_view = view<uint8_t>(output);
int sample_dim = in_view.shape.sample_dim();
// by the check in Equalize::SetupImpl
assert(input.type() == type2id<uint8_t>::value);
// enforced by the layouts specified in operator schema
assert(sample_dim == 2 || sample_dim == 3);
output.SetLayout(input.GetLayout());
int num_samples = in_view.num_samples();
for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) {
const auto &in_sample = in_view[sample_idx];
const auto &in_shape = in_sample.shape;
int64_t num_channels = sample_dim == 2 ? 1 : in_shape[2];
DALI_ENFORCE(
1 <= num_channels && num_channels <= CV_CN_MAX,
make_string("The CPU equalize operator supports images with number of channels in [1, ",
CV_CN_MAX, "] channels. However, the sample at index ", sample_idx, " has ",
num_channels, " channels."));
DALI_ENFORCE(in_shape[0] <= std::numeric_limits<int>::max() &&
in_shape[1] <= std::numeric_limits<int>::max(),
make_string("The image height and width must not exceed the ",
std::numeric_limits<int>::max(), ". However, the sample at index ",
sample_idx, " has shape ", in_shape, "."));
}
auto &tp = ws.GetThreadPool();
for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) {
auto out_sample = out_view[sample_idx];
auto in_sample = in_view[sample_idx];
tp.AddWork([this, out_sample, in_sample](int) { RunSample(out_sample, in_sample); },
in_sample.shape.num_elements());
}
tp.RunAll();
}

template <int ndim>
void RunSample(TensorView<StorageCPU, uint8_t, ndim> out_sample,
TensorView<StorageCPU, const uint8_t, ndim> in_sample) {
auto &in_sample_shape = in_sample.shape;
int sample_dim = in_sample_shape.sample_dim();
int num_channels = sample_dim == 2 ? 1 : in_sample.shape[2];
int channel_flag = CV_8UC(num_channels);
int height = in_sample_shape[0], width = in_sample_shape[1];
const cv::Mat cv_img = CreateMatFromPtr(height, width, channel_flag, in_sample.data);
cv::Mat out_img = CreateMatFromPtr(height, width, channel_flag, out_sample.data);
if (num_channels == 1) {
cv::equalizeHist(cv_img, out_img);
} else {
std::vector<cv::Mat> channels(num_channels);
cv::split(cv_img, channels.data());
for (int channel_idx = 0; channel_idx < num_channels; channel_idx++) {
cv::equalizeHist(channels[channel_idx], channels[channel_idx]);
}
cv::merge(channels.data(), num_channels, out_img);
}
}
};

} // namespace equalize

DALI_REGISTER_OPERATOR(experimental__Equalize, equalize::EqualizeCPU, CPU);

} // namespace dali
6 changes: 2 additions & 4 deletions dali/operators/image/color/equalize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,9 @@ class EqualizeGPU : public Equalize<GPUBackend> {
void RunImpl(Workspace &ws) override {
const auto &input = ws.Input<GPUBackend>(0);
auto &output = ws.Output<GPUBackend>(0);
auto input_type = input.type();
// by the check in Equalize::SetupImpl
assert(input.type() == type2id<uint8_t>::value);
auto layout = input.GetLayout();
DALI_ENFORCE(input_type == type2id<uint8_t>::value,
make_string("Unsupported input type for equalize operator: ", input_type,
". Expected input type: `uint8_t`."));
// enforced by the layouts specified in operator schema
assert(layout.size() == 2 || layout.size() == 3);
output.SetLayout(layout);
Expand Down
7 changes: 6 additions & 1 deletion dali/operators/image/color/equalize.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@ class Equalize : public SequenceOperator<Backend> {
protected:
bool SetupImpl(std::vector<OutputDesc> &output_desc, const Workspace &ws) override {
output_desc.resize(1);
output_desc[0].type = ws.GetInputDataType(0);
auto input_type = ws.GetInputDataType(0);
DALI_ENFORCE(input_type == type2id<uint8_t>::value,
make_string("Unsupported input type for equalize operator: ", input_type,
". Expected input type: `uint8_t`."));

output_desc[0].type = input_type;
// output_desc[0].shape is set by ProcessOutputDesc
return true;
}
Expand Down
66 changes: 58 additions & 8 deletions dali/test/python/operator_2/test_equalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.

import os
import math
import itertools

import cv2
import numpy as np
Expand All @@ -38,32 +40,80 @@ def equalize_cv_baseline(img, layout):


@pipeline_def
def images_pipeline(layout):
def images_pipeline(layout, dev):
images, _ = fn.readers.file(name="Reader", file_root=images_dir, prefetch_queue_depth=2,
random_shuffle=True, seed=42)
decoder = "mixed" if dev == "gpu" else "cpu"
if layout == "HW":
images = fn.decoders.image(images, device="mixed", output_type=types.GRAY)
images = fn.decoders.image(images, device=decoder, output_type=types.GRAY)
images = fn.squeeze(images, axes=2)
else:
assert layout in ["HWC", "CHW"], f"{layout}"
images = fn.decoders.image(images, device="mixed", output_type=types.RGB)
images = fn.decoders.image(images, device=decoder, output_type=types.RGB)
if layout == "CHW":
images = fn.transpose(images, perm=[2, 0, 1])
equalized = fn.experimental.equalize(images)
return equalized, images


@params(("HWC", 1), ("HWC", 32), ("CHW", 1), ("CHW", 7), ("HW", 253), ("HW", 128))
def test_image_pipeline(layout, batch_size):
@params(*tuple(
itertools.product(("cpu", "gpu"),
(("HWC", 1), ("HWC", 32), ("CHW", 1), ("CHW", 7), ("HW", 253), ("HW", 128)))))
def test_image_pipeline(dev, layout_batch_size):
layout, batch_size = layout_batch_size
num_iters = 2

pipe = images_pipeline(num_threads=4, device_id=0, batch_size=batch_size, layout=layout)
pipe = images_pipeline(num_threads=4, device_id=0, batch_size=batch_size, layout=layout,
dev=dev)
pipe.build()

for _ in range(num_iters):
equalized, imgs = pipe.run()
equalized = [np.array(img) for img in equalized.as_cpu()]
imgs = [np.array(img) for img in imgs.as_cpu()]
if dev == "gpu":
imgs = imgs.as_cpu()
equalized = equalized.as_cpu()
equalized = [np.array(img) for img in equalized]
imgs = [np.array(img) for img in imgs]
assert len(equalized) == len(imgs)
baseline = [equalize_cv_baseline(img, layout) for img in imgs]
check_batch(equalized, baseline, max_allowed_error=1)


@params(("cpu", ), ("gpu", ))
def test_multichannel(dev):

sizes = [(200, 300), (700, 500), (1024, 200), (200, 1024), (1024, 1024)]
num_channels = [1, 2, 3, 4, 5, 13]
# keep len(sizes) and len(num_channels) co-prime to have all combinations
assert math.gcd(len(sizes), len(num_channels)) == 1
batch_size = len(sizes) * len(num_channels)
rng = np.random.default_rng(424242)
num_iters = 2

def input_sample(sample_info):
idx_in_batch = sample_info.idx_in_batch
size = sizes[idx_in_batch % len(sizes)]
num_channel = num_channels[idx_in_batch % len(num_channels)]
shape = (size[0], size[1], num_channel)
return np.uint8(rng.uniform(0, 255, shape))

@pipeline_def(batch_size=batch_size, device_id=0, num_threads=4, seed=42)
def pipeline():
input = fn.external_source(input_sample, batch=False)
if dev == "gpu":
input = input.gpu()
return fn.experimental.equalize(input), input

pipe = pipeline()
pipe.build()

for _ in range(num_iters):
equalized, imgs = pipe.run()
if dev == "gpu":
imgs = imgs.as_cpu()
equalized = equalized.as_cpu()
equalized = [np.array(img) for img in equalized]
imgs = [np.array(img) for img in imgs]
assert len(equalized) == len(imgs)
baseline = [equalize_cv_baseline(img, "HWC") for img in imgs]
check_batch(equalized, baseline, max_allowed_error=1)