From fb1d481c1c7306977d976bca2fe89f36b0319793 Mon Sep 17 00:00:00 2001 From: drivanov <32910461+drivanov@users.noreply.github.com> Date: Fri, 17 Aug 2018 14:06:40 -0700 Subject: [PATCH] Test refactor (#91) * Division by 0 in in CropMirrorNormalizeTest was fixed. Elimination of duplicated code from WriteImage functions. Signed-off-by: Andrei * Fix of problem in bresnet50 benchmark Signed-off-by: Andrei * CropMirrorNormalizePermuteTest refactored Signed-off-by: Andrei * Lint problem fixed Signed-off-by: Andrei * JpegDecodeTest re-implemented Signed-off-by: Andrei * Nondefault bound for average deviation implemented for 2 decode tests Signed-off-by: Andrei * Executore test refactored. The JPEG/PNG files are loaded and decoded only when it's is really need to be done for performing of current test. Signed-off-by: Andrei * Lint problem fixed Output of the images in benchmark eliminate Signed-off-by: Andrei * Eps for ExecutorTest and some changes, suggested by Cliff Signed-off-by: Andrei * Changes requested by Cliff. - Destructor for ImageSetDescr - Conditional compilation of WriteHWCBatch for resnet50 benchmark Signed-off-by: Andrei * Color augmentations on CPU Signed-off-by: Andrei * GenericMatchingTest class implemented This class was used for - new tests for Displacement Operators. - refactoring of the tests for Color Operators Signed-off-by: Andrei * Average deviation of pixel colors for unit tests rescaled (normalized to color range and converted to percentages) Signed-off-by: Andrei * Changes requested by Przemek Signed-off-by: Andrei --- dali/benchmark/dali_bench.h | 19 +- dali/benchmark/resnet50_nvjpeg_bench.cc | 4 +- dali/image/jpeg_test.cc | 120 +----- dali/image/transform.cc | 44 +++ dali/image/transform.h | 9 +- dali/pipeline/data/allocator.h | 4 +- dali/pipeline/executor/executor_test.cc | 97 +---- dali/pipeline/operators/color/color_test.cc | 42 ++ dali/pipeline/operators/color/color_twist.cc | 40 +- .../operators/decoder/host_decoder_test.cc | 11 +- .../operators/decoder/nvjpeg_decoder_test.cc | 2 +- .../displacement/displacement_test.cc | 55 +++ .../fused/crop_mirror_normalize_test.cc | 200 +++------- .../operators/fused/resize_crop_mirror.h | 12 +- .../fused/resize_crop_mirror_test.cc | 4 +- dali/pipeline/operators/operator.h | 2 +- dali/pipeline/operators/operator_factory.h | 19 +- .../operators/operator_factory_test.cc | 4 +- dali/pipeline/operators/resize/new_resize.h | 2 +- .../operators/resize/new_resize_test.cc | 4 +- dali/pipeline/operators/resize/resize_test.cc | 8 +- dali/pipeline/pipeline_test.cc | 372 +++++++----------- dali/test/dali_test.h | 66 ++-- dali/test/dali_test_decoder.h | 61 +-- dali/test/dali_test_matching.h | 75 ++++ dali/test/dali_test_resize.h | 66 +--- dali/test/dali_test_single_op.h | 317 ++++++++++++--- dali/util/image.cc | 65 +-- dali/util/image.h | 143 +++---- 29 files changed, 927 insertions(+), 940 deletions(-) create mode 100644 dali/pipeline/operators/color/color_test.cc create mode 100644 dali/pipeline/operators/displacement/displacement_test.cc create mode 100644 dali/test/dali_test_matching.h diff --git a/dali/benchmark/dali_bench.h b/dali/benchmark/dali_bench.h index fcc6f4e0fbe..30174c2606d 100644 --- a/dali/benchmark/dali_bench.h +++ b/dali/benchmark/dali_bench.h @@ -33,14 +33,10 @@ class DALIBenchmark : public benchmark::Fixture { public: DALIBenchmark() { rand_gen_.seed(time(nullptr)); - LoadJPEGS(image_folder, &jpeg_names_, &jpegs_, &jpeg_sizes_); + LoadJPEGS(image_folder, &jpeg_names_, &jpegs_); } - virtual ~DALIBenchmark() { - for (auto &ptr : jpegs_) { - delete[] ptr; - } - } + virtual ~DALIBenchmark() = default; int RandInt(int a, int b) { return std::uniform_int_distribution<>(a, b)(rand_gen_); @@ -52,10 +48,11 @@ class DALIBenchmark : public benchmark::Fixture { } inline void MakeJPEGBatch(TensorList *tl, int n) { - DALI_ENFORCE(jpegs_.size() > 0, "jpegs must be loaded to create batches"); + const auto nImgs = jpegs_.nImages(); + DALI_ENFORCE(nImgs > 0, "jpegs must be loaded to create batches"); vector shape(n); for (int i = 0; i < n; ++i) { - shape[i] = {jpeg_sizes_[i % jpegs_.size()]}; + shape[i] = {jpegs_.sizes_[i % nImgs]}; } tl->template mutable_data(); @@ -63,16 +60,14 @@ class DALIBenchmark : public benchmark::Fixture { for (int i = 0; i < n; ++i) { std::memcpy(tl->template mutable_tensor(i), - jpegs_[i % jpegs_.size()], - jpeg_sizes_[i % jpegs_.size()]); + jpegs_.data_[i % nImgs], jpegs_.sizes_[i % nImgs]); } } protected: std::mt19937 rand_gen_; vector jpeg_names_; - vector jpegs_; - vector jpeg_sizes_; + ImgSetDescr jpegs_; }; } // namespace dali diff --git a/dali/benchmark/resnet50_nvjpeg_bench.cc b/dali/benchmark/resnet50_nvjpeg_bench.cc index 9a4897343b0..42941953541 100644 --- a/dali/benchmark/resnet50_nvjpeg_bench.cc +++ b/dali/benchmark/resnet50_nvjpeg_bench.cc @@ -112,7 +112,9 @@ BENCHMARK_DEFINE_F(RealRN50, nvjpegPipe)(benchmark::State& st) { // NOLINT } } - WriteHWCBatch(*ws.Output(0), 0, 1, "img"); +#if DALI_DEBUG + WriteHWCBatch(*ws.Output(0), "img"); +#endif int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); diff --git a/dali/image/jpeg_test.cc b/dali/image/jpeg_test.cc index 87089cf3359..85cbc5c959a 100644 --- a/dali/image/jpeg_test.cc +++ b/dali/image/jpeg_test.cc @@ -12,117 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include - -#include -#include -#include -#include -#include - -#include "dali/common.h" -#include "dali/test/dali_test.h" -#include "dali/image/jpeg.h" +#include "dali/test/dali_test_decoder.h" namespace dali { -namespace { -// Our turbo jpeg decoder cannot handle CMYK images -// or 410 images -const vector tjpg_test_images = { - image_folder + "/420.jpg", - image_folder + "/422.jpg", - image_folder + "/440.jpg", - image_folder + "/444.jpg", - image_folder + "/gray.jpg", - image_folder + "/411.jpg", - image_folder + "/411-non-multiple-4-width.jpg", - image_folder + "/420-odd-height.jpg", - image_folder + "/420-odd-width.jpg", - image_folder + "/420-odd-both.jpg", - image_folder + "/422-odd-width.jpg" -}; -} // namespace - // Fixture for jpeg decode testing. Templated // to make googletest run our tests grayscale & rgb template -class JpegDecodeTest : public DALITest { - public: - void SetUp() { - if (IsColor(img_type_)) { - c_ = 3; - } else if (img_type_ == DALI_GRAY) { - c_ = 1; - } - rand_gen_.seed(time(nullptr)); - LoadJPEGS(tjpg_test_images, &jpegs_, &jpeg_sizes_); - } - - void TearDown() { - DALITest::TearDown(); - } - - void VerifyDecode(const uint8 *img, int h, int w, int img_id) { - // Compare w/ opencv result - cv::Mat ver; - cv::Mat jpeg = cv::Mat(1, jpeg_sizes_[img_id], CV_8UC1, jpegs_[img_id]); - - ASSERT_TRUE(CheckIsJPEG(jpegs_[img_id], jpeg_sizes_[img_id])); - int flag = IsColor(img_type_) ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE; - cv::imdecode(jpeg, flag, &ver); - - cv::Mat ver_img(h, w, IsColor(img_type_) ? CV_8UC3 : CV_8UC2); - if (img_type_ == DALI_RGB) { - // Convert from BGR to RGB for verification - cv::cvtColor(ver, ver_img, CV_BGR2RGB); - } else { - ver_img = ver; - } - - ASSERT_EQ(h, ver_img.rows); - ASSERT_EQ(w, ver_img.cols); - vector diff(h*w*c_, 0); - for (int i = 0; i < h*w*c_; ++i) { - diff[i] = abs(static_cast(ver_img.ptr()[i] - img[i])); - } - - // calculate the MSE - float mean, std; - MeanStdDev(diff, &mean, &std); - -#ifndef NDEBUG - cout << "num: " << diff.size() << endl; - cout << "mean: " << mean << endl; - cout << "std: " << std << endl; -#endif - - // Note: We allow a slight deviation from the ground truth. - // This value was picked fairly arbitrarily to let the test - // pass for libjpeg turbo - ASSERT_LT(mean, 2.f); - ASSERT_LT(std, 3.f); - } - - void MeanStdDev(const vector &diff, float *mean, float *std) { - // Avoid division by zero - ASSERT_NE(diff.size(), 0); - - double sum = 0, var_sum = 0; - for (auto &val : diff) { - sum += val; - } - *mean = sum / diff.size(); - for (auto &val : diff) { - var_sum += (val - *mean)*(val - *mean); - } - *std = sqrt(var_sum / diff.size()); - } - - protected: - const DALIImageType img_type_ = ImgType::type; - int c_; +class JpegDecodeTest : public GenericDecoderTest { }; // Run RGB & grayscale tests @@ -130,18 +27,7 @@ typedef ::testing::Types Types; TYPED_TEST_CASE(JpegDecodeTest, Types); TYPED_TEST(JpegDecodeTest, DecodeJPEGHost) { - vector image; - for (size_t img = 0; img < this->jpegs_.size(); ++img) { - Tensor t; - DALI_CALL(DecodeJPEGHost(this->jpegs_[img], - this->jpeg_sizes_[img], - this->img_type_, &t)); -#ifndef NDEBUG - cout << img << " " << tjpg_test_images[img] << " " << this->jpeg_sizes_[img] << endl; - cout << "dims: " << t.dim(1) << "x" << t.dim(0) << endl; -#endif - this->VerifyDecode(t.data(), t.dim(0), t.dim(1), img); - } + this->RunTestDecode(this->jpegs_, 1.5); } } // namespace dali diff --git a/dali/image/transform.cc b/dali/image/transform.cc index fd584e284f5..99b87069efc 100644 --- a/dali/image/transform.cc +++ b/dali/image/transform.cc @@ -152,4 +152,48 @@ DALIError_t FastResizeCropMirrorHost(const uint8 *img, int H, int W, int C, return DALISuccess; } +void CheckParam(const Tensor &input, const std::string &opName) { + DALI_ENFORCE(input.ndim() == 3); + DALI_ENFORCE(IsType(input.type()), + opName + " expects input data in uint8."); + DALI_ENFORCE(input.dim(2) == 1 || input.dim(2) == 3, + opName + " supports hwc rgb & grayscale inputs."); +} + +typedef cv::Vec Vec1b; + +DALIError_t MakeColorTransformation(const uint8 *img, int H, int W, int C, + const float *matr, uint8 *out_img) { + const int channel_flag = C == 3 ? CV_8UC3 : CV_8UC1; + + const cv::Mat cv_imgIn = CreateMatFromPtr(H, W, channel_flag, img); + cv::Mat cv_imgOut = CreateMatFromPtr(H, W, channel_flag, out_img); + + if (C == 1) { + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + cv_imgOut.at(y, x)[0] = + cv::saturate_cast((matr[0] * cv_imgIn.at(y, x)[0]) + matr[1]); + } + } + } else { + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + // Using direct calculation because they are 25% faster + // than two loops which could be used here + const auto &inpPix = cv_imgIn.at(y, x); + auto &outPix = cv_imgOut.at(y, x); + outPix[0] = cv::saturate_cast + (inpPix[0] * matr[0] + inpPix[1] * matr[1] + inpPix[2] * matr[2] + matr[3]); + outPix[1] = cv::saturate_cast + (inpPix[0] * matr[4] + inpPix[1] * matr[5] + inpPix[2] * matr[6] + matr[7]); + outPix[2] = cv::saturate_cast + (inpPix[0] * matr[8] + inpPix[1] * matr[9] + inpPix[2] * matr[10] + matr[11]); + } + } + } + + return DALISuccess; +} + } // namespace dali diff --git a/dali/image/transform.h b/dali/image/transform.h index bb700090718..2a6bc2eb00c 100644 --- a/dali/image/transform.h +++ b/dali/image/transform.h @@ -15,8 +15,10 @@ #ifndef DALI_IMAGE_TRANSFORM_H_ #define DALI_IMAGE_TRANSFORM_H_ +#include #include "dali/common.h" #include "dali/error_handling.h" +#include "dali/pipeline/data/tensor.h" namespace dali { @@ -31,7 +33,7 @@ namespace dali { * this temporary workspace pointer to avoid extra memory allocation. The size * of the memory pointed to by 'workspace' should be rsz_h*rsz_w*C bytes * - * Note: We leave the calculate of the resize dimesions & the decision of whether + * Note: We leave the calculate of the resize dimensions & the decision of whether * to mirror the image or not external to the function. With the GPU version of * this function, these params will need to have been calculated before-hand * and, in the case of a batched call, copied to the device. Separating these @@ -65,6 +67,11 @@ DALIError_t FastResizeCropMirrorHost(const uint8 *img, int H, int W, int C, int mirror, uint8 *out_img, DALIInterpType type = DALI_INTERP_LINEAR, uint8 *workspace = nullptr); +void CheckParam(const Tensor &input, const std::string &pOperator); + +DALIError_t MakeColorTransformation(const uint8 *img, int H, int W, int C, + const float *matrix, uint8 *out_img); + } // namespace dali #endif // DALI_IMAGE_TRANSFORM_H_ diff --git a/dali/pipeline/data/allocator.h b/dali/pipeline/data/allocator.h index 64030be8104..d62aae923a6 100644 --- a/dali/pipeline/data/allocator.h +++ b/dali/pipeline/data/allocator.h @@ -75,7 +75,7 @@ DALI_DECLARE_OPTYPE_REGISTRY(GPUAllocator, GPUAllocator); #define DALI_REGISTER_GPU_ALLOCATOR(OpName, OpType) \ DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType, \ - dali::GPUAllocator, dali::GPUAllocator) + dali::GPUAllocator, dali::GPUAllocator, "GPU_Allocator") /** @@ -99,7 +99,7 @@ DALI_DECLARE_OPTYPE_REGISTRY(CPUAllocator, CPUAllocator); #define DALI_REGISTER_CPU_ALLOCATOR(OpName, OpType) \ DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType, \ - dali::CPUAllocator, dali::CPUAllocator) + dali::CPUAllocator, dali::CPUAllocator, "CPU_Allocator") /** * @brief Pinned memory CPU allocator diff --git a/dali/pipeline/executor/executor_test.cc b/dali/pipeline/executor/executor_test.cc index 990497e1986..4470a7e9141 100644 --- a/dali/pipeline/executor/executor_test.cc +++ b/dali/pipeline/executor/executor_test.cc @@ -12,118 +12,58 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "dali/pipeline/executor/executor.h" - -#include - -#include "dali/pipeline/operators/util/external_source.h" -#include "dali/test/dali_test.h" +#include "dali/test/dali_test_decoder.h" namespace dali { -namespace { -// Our turbo jpeg decoder cannot handle CMYK images -// or 410 images -const vector tjpg_test_images = { - image_folder + "/420.jpg", - image_folder + "/422.jpg", - image_folder + "/440.jpg", - image_folder + "/444.jpg", - image_folder + "/gray.jpg", - image_folder + "/411.jpg", - image_folder + "/411-non-multiple-4-width.jpg", - image_folder + "/420-odd-height.jpg", - image_folder + "/420-odd-width.jpg", - image_folder + "/420-odd-both.jpg", - image_folder + "/422-odd-width.jpg" -}; -} // namespace +class ExecutorTest : public GenericDecoderTest { + protected: + uint32_t GetImageLoadingFlags() const override { + return t_loadJPEGs + t_decodeJPEGs; + } -class ExecutorTest : public DALITest { - public: void SetUp() override { - rand_gen_.seed(time(nullptr)); - LoadJPEGS(tjpg_test_images, &jpegs_, &jpeg_sizes_); - batch_size_ = jpegs_.size(); - DecodeJPEGS(DALI_RGB); + DALISingleOpTest::SetUp(); + set_batch_size(jpegs_.nImages()); } inline void set_batch_size(int size) { batch_size_ = size; } - inline OpSpec PrepareSpec(OpSpec spec) { + inline OpSpec PrepareSpec(OpSpec spec) const { spec.AddArg("batch_size", batch_size_) .AddArg("num_threads", num_threads_); return spec; } - inline void PruneGraph(Executor *exe) { + inline void PruneGraph(Executor *exe) const { exe->PruneUnusedGraphNodes(); } - vector CPUData(Executor *exe, int idx) { + vector CPUData(Executor *exe, int idx) const { return exe->wss_[idx].cpu_op_data; } - vector MixedData(Executor *exe, int idx) { + vector MixedData(Executor *exe, int idx) const { return exe->wss_[idx].mixed_op_data; } - vector GPUData(Executor *exe, int idx) { + vector GPUData(Executor *exe, int idx) const { return exe->wss_[idx].gpu_op_data; } - void VerifyDecode(const uint8 *img, int h, int w, int img_id) { + void VerifyDecode(const uint8 *img, int h, int w, int img_id) const { // Load the image to host uint8 *host_img = new uint8[h*w*c_]; CUDA_CALL(cudaMemcpy(host_img, img, h*w*c_, cudaMemcpyDefault)); - // Compare w/ opencv result - cv::Mat ver; - cv::Mat jpeg = cv::Mat(1, jpeg_sizes_[img_id], CV_8UC1, jpegs_[img_id]); - - ASSERT_TRUE(CheckIsJPEG(jpegs_[img_id], jpeg_sizes_[img_id])); - int flag = IsColor(img_type_) ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE; - cv::imdecode(jpeg, flag, &ver); - - cv::Mat ver_img(h, w, IsColor(img_type_) ? CV_8UC3 : CV_8UC2); - if (img_type_ == DALI_RGB) { - // Convert from BGR to RGB for verification - cv::cvtColor(ver, ver_img, CV_BGR2RGB); - } else { - ver_img = ver; - } - - // DEBUG - // WriteHWCImage(ver_img.ptr(), h, w, c_, std::to_string(img_id) + "-ver"); - - ASSERT_EQ(h, ver_img.rows); - ASSERT_EQ(w, ver_img.cols); - vector diff(h*w*c_, 0); - for (int i = 0; i < h*w*c_; ++i) { - diff[i] = abs(static_cast(ver_img.ptr()[i] - host_img[i])); - } - - // calculate the MSE - double mean, std; - this->MeanStdDev(diff, &mean, &std); - -#ifndef NDEBUG - cout << "num: " << diff.size() << endl; - cout << "mean: " << mean << endl; - cout << "std: " << std << endl; +#if DALI_DEBUG + WriteHWCImage(host_img, h, w, c_, std::to_string(img_id) + "-img"); #endif - - // Note: We allow a slight deviation from the ground truth. - // This value was picked fairly arbitrarily to let the test - // pass for libjpeg turbo - ASSERT_LT(mean, 2.f); - ASSERT_LT(std, 3.f); + GenericDecoderTest::VerifyDecode(host_img, h, w, jpegs_, img_id); + delete [] host_img; } - protected: int batch_size_, num_threads_ = 1; - int c_ = 3; - DALIImageType img_type_ = DALI_RGB; }; TEST_F(ExecutorTest, TestPruneBasicGraph) { @@ -454,6 +394,7 @@ TEST_F(ExecutorTest, TestRunBasicGraph) { TEST_F(ExecutorTest, TestPrefetchedExecution) { int batch_size = this->batch_size_ / 2; this->set_batch_size(batch_size); + this->SetEps(1.6); Executor exe(this->batch_size_, this->num_threads_, 0, 1); diff --git a/dali/pipeline/operators/color/color_test.cc b/dali/pipeline/operators/color/color_test.cc new file mode 100644 index 00000000000..df59d542a95 --- /dev/null +++ b/dali/pipeline/operators/color/color_test.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/test/dali_test_matching.h" + +namespace dali { + +template +class ColorTest : public GenericMatchingTest { +}; + +typedef ::testing::Types Types; +TYPED_TEST_CASE(ColorTest, Types); + +TYPED_TEST(ColorTest, Brightness) { + this->RunTest({"Brightness", "brightness", "3.", 1e-4}); +} + +TYPED_TEST(ColorTest, Contrast) { + this->RunTest({"Contrast", "contrast", "1.3", 0.18}); +} + +TYPED_TEST(ColorTest, Saturation) { + this->RunTest({"Saturation", "saturation", "3.", 0.26}); +} + +TYPED_TEST(ColorTest, Hue) { + this->RunTest({"Hue", "hue", "31.456", 0.27}); +} + +} // namespace dali diff --git a/dali/pipeline/operators/color/color_twist.cc b/dali/pipeline/operators/color/color_twist.cc index 8b274de38c5..fbeed64f190 100644 --- a/dali/pipeline/operators/color/color_twist.cc +++ b/dali/pipeline/operators/color/color_twist.cc @@ -13,8 +13,7 @@ // limitations under the License. #include "dali/pipeline/operators/color/color_twist.h" -#include -#include +#include "dali/image/transform.h" namespace dali { @@ -72,4 +71,41 @@ Values >= 0 are supported. For example: )code", 1.f, true) .AddParent("ColorTransformBase"); +template <> +void ColorTwistBase::RunImpl(SampleWorkspace *ws, const int idx) { + const auto &input = ws->Input(idx); + auto output = ws->Output(idx); + const auto &input_shape = input.shape(); + + CheckParam(input, "Color augmentation"); + + const auto H = input_shape[0]; + const auto W = input_shape[1]; + const auto C = input_shape[2]; + + output->ResizeLike(input); + + auto pImgInp = input.template data(); + auto pImgOut = output->template mutable_data(); + + if (!augments_.empty()) { + float matrix[nDim][nDim]; + float *m = reinterpret_cast(matrix); + IdentityMatrix(m); + for (size_t j = 0; j < augments_.size(); ++j) { + augments_[j]->Prepare(0, spec_, ws); + (*augments_[j])(m); + } + + MakeColorTransformation(pImgInp, H, W, C, m, pImgOut); + } else { + memcpy(pImgOut, pImgInp, H * W * C); + } +} + +DALI_REGISTER_OPERATOR(Brightness, BrightnessAdjust, CPU); +DALI_REGISTER_OPERATOR(Contrast, ContrastAdjust, CPU); +DALI_REGISTER_OPERATOR(Hue, HueAdjust, CPU); +DALI_REGISTER_OPERATOR(Saturation, SaturationAdjust, CPU); + } // namespace dali diff --git a/dali/pipeline/operators/decoder/host_decoder_test.cc b/dali/pipeline/operators/decoder/host_decoder_test.cc index 09c07c770c6..07fa7051e42 100644 --- a/dali/pipeline/operators/decoder/host_decoder_test.cc +++ b/dali/pipeline/operators/decoder/host_decoder_test.cc @@ -19,6 +19,10 @@ namespace dali { template class HostDecodeTest : public GenericDecoderTest { protected: + uint32_t GetImageLoadingFlags() const override { + return t_loadJPEGs + t_loadPNGs; + } + const OpSpec DecodingOp() const override { return OpSpec("HostDecoder") .AddArg("device", "cpu") @@ -26,8 +30,9 @@ class HostDecodeTest : public GenericDecoderTest { .AddInput("encoded", "cpu") .AddOutput("decoded", "cpu"); } - uint8 TestCheckType() const override { - return t_checkColorComp + t_checkElements + t_checkAll + t_checkNoAssert; + + uint8 GetTestCheckType() const override { + return t_checkColorComp + t_checkElements; // + t_checkAll + t_checkNoAssert; } }; @@ -35,7 +40,7 @@ typedef ::testing::Types Types; TYPED_TEST_CASE(HostDecodeTest, Types); TYPED_TEST(HostDecodeTest, TestJPEGDecode) { - this->RunTestDecode(t_jpegImgType, 0.00000005); + this->RunTestDecode(t_jpegImgType, 0.65); } TYPED_TEST(HostDecodeTest, TestPNGDecode) { diff --git a/dali/pipeline/operators/decoder/nvjpeg_decoder_test.cc b/dali/pipeline/operators/decoder/nvjpeg_decoder_test.cc index cfd379e6715..3a59869ed8f 100644 --- a/dali/pipeline/operators/decoder/nvjpeg_decoder_test.cc +++ b/dali/pipeline/operators/decoder/nvjpeg_decoder_test.cc @@ -32,7 +32,7 @@ class nvjpegDecodeTest : public GenericDecoderTest { void TestDecode(bool batched, int num_threads) { batched_ = batched; this->SetNumThreads(num_threads); - this->RunTestDecode(t_jpegImgType, 2.0); + this->RunTestDecode(t_jpegImgType, 0.7); } private: diff --git a/dali/pipeline/operators/displacement/displacement_test.cc b/dali/pipeline/operators/displacement/displacement_test.cc new file mode 100644 index 00000000000..a43e8c1d173 --- /dev/null +++ b/dali/pipeline/operators/displacement/displacement_test.cc @@ -0,0 +1,55 @@ +// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/test/dali_test_matching.h" + +namespace dali { + +template +class DisplacementTest : public GenericMatchingTest { +}; + +typedef ::testing::Types Types; +TYPED_TEST_CASE(DisplacementTest, Types); + +TYPED_TEST(DisplacementTest, Sphere) { + this->RunTest("Sphere"); +} + +TYPED_TEST(DisplacementTest, Water) { + const OpArg params[] = {{"ampl_x", "2.", t_floatParam}, + {"ampl_y", "3.", t_floatParam}, + {"phase_x", "0.2", t_floatParam}}; + this->RunTest("Water", params, sizeof(params)/sizeof(params[0])); +} + +/* + * As of 08/03/2018 this test is disabled because Jitter is not activated for CPU + * +TYPED_TEST(DisplacementTest, Jitter) { + this->RunTest("Jitter"); +} +*/ + +TYPED_TEST(DisplacementTest, WarpAffine) { + vector matrix{1.0, 0.8, 0.0, 0.0, 1.2, 0.0}; + const OpArg params = {"matrix", "1.0, 0.8, 0.0, 0.0, 1.2, 0.0", t_floatVector}; + this->RunTest("WarpAffine", ¶ms, 1); +} + +TYPED_TEST(DisplacementTest, Rotate) { + this->RunTest({"Rotate", "angle", "10", 0.001}); +} + +} // namespace dali diff --git a/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc b/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc index 55492b1232e..860aef53c67 100644 --- a/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc +++ b/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc @@ -12,179 +12,67 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include - -#include -#include - -#include "dali/pipeline/operators/fused/resize_crop_mirror.h" -#include "dali/common.h" -#include "dali/error_handling.h" -#include "dali/image/jpeg.h" -#include "dali/pipeline/pipeline.h" -#include "dali/test/dali_test.h" +#include "dali/test/dali_test_resize.h" namespace dali { -namespace { -// 440 & 410 not supported by npp -const vector hybdec_images = { - image_folder + "/411.jpg", - image_folder + "/420.jpg", - image_folder + "/422.jpg", - image_folder + "/444.jpg", - image_folder + "/gray.jpg", - image_folder + "/411-non-multiple-4-width.jpg", - image_folder + "/420-odd-height.jpg", - image_folder + "/420-odd-width.jpg", - image_folder + "/420-odd-both.jpg", - image_folder + "/422-odd-width.jpg" -}; -} // namespace - template -class CropMirrorNormalizePermuteTest : public DALITest { - public: - void SetUp() { - if (IsColor(img_type_)) { - c_ = 3; - } else if (img_type_ == DALI_GRAY) { - c_ = 1; - } else { - DALI_FAIL("Unsupported image type."); - } - - rand_gen_.seed(time(nullptr)); - LoadJPEGS(hybdec_images, &jpegs_, &jpeg_sizes_); - } - - void TearDown() { - DALITest::TearDown(); +class CropMirrorNormalizePermuteTest : public GenericResizeTest { + protected: + virtual vector*> + Reference(const vector*> &inputs, DeviceWorkspace *ws) { + return this->CopyToHost(*ws->Output(1)); } - void VerifyImage(const float *img, const float *img2, int n, - float mean_bound = 2.0, float std_bound = 3.0) { - std::vector host_img(n), host_img2(n); + void RunTest() { + const int batch_size = this->jpegs_.nImages(); + this->SetBatchSize(batch_size); + this->SetNumThreads(1); - CUDA_CALL(cudaMemcpy(host_img.data(), img, n*sizeof(float), cudaMemcpyDefault)); - CUDA_CALL(cudaMemcpy(host_img2.data(), img2, n*sizeof(float), cudaMemcpyDefault)); + TensorList data; + this->MakeJPEGBatch(&data, batch_size); + this->SetExternalInputs({{"jpegs", &data}}); - vector abs_diff(n, 0); - for (int i = 0; i < n; ++i) { - abs_diff[i] = abs(host_img[i] - host_img2[i]); - } - double mean, std; - MeanStdDev(abs_diff, &mean, &std); + shared_ptr pipe = this->GetPipeline(); + // Decode the images + pipe->AddOperator( + OpSpec("HostDecoder") + .AddArg("output_type", this->img_type_) + .AddInput("jpegs", "cpu") + .AddOutput("images", "cpu")); -#ifndef NDEBUG - cout << "num: " << abs_diff.size() << endl; - cout << "mean: " << mean << endl; - cout << "std: " << std << endl; + pipe->AddOperator( + OpSpec("HostDecoder") + .AddArg("output_type", this->img_type_) + .AddInput("jpegs", "cpu") + .AddOutput("images2", "cpu")); + + // CropMirrorNormalizePermute + crop multiple sets of images + DeviceWorkspace ws; + this->RunOperator(OpSpec("CropMirrorNormalize") + .AddArg("device", "gpu") + .AddInput("images", "gpu") + .AddOutput("cropped1", "gpu") + .AddInput("images2", "gpu") + .AddOutput("cropped2", "gpu") + .AddArg("crop", vector{64, 64}) + .AddArg("mean", vector(this->c_, 0.)) + .AddArg("std", vector(this->c_, 1.)) + .AddArg("image_type", this->img_type_) + .AddArg("num_input_sets", 2), 1e-4, &ws); + +#if DALI_DEBUG + WriteCHWBatch(*ws.Output(0), 0., 1, "img0"); + WriteCHWBatch(*ws.Output(1), 0., 1, "img1"); #endif - - // Note: We allow a slight deviation from the ground truth. - // This value was picked fairly arbitrarily to let the test - // pass for libjpeg turbo - ASSERT_LT(mean, mean_bound); - ASSERT_LT(std, std_bound); - } - - template - void MeanStdDev(const vector &diff, double *mean, double *std) { - // Avoid division by zero - ASSERT_NE(diff.size(), 0); - - double sum = 0, var_sum = 0; - for (auto &val : diff) { - sum += val; - } - *mean = sum / diff.size(); - for (auto &val : diff) { - var_sum += (val - *mean)*(val - *mean); - } - *std = sqrt(var_sum / diff.size()); } - - protected: - const DALIImageType img_type_ = ImgType::type; - int c_; }; typedef ::testing::Types Types; TYPED_TEST_CASE(CropMirrorNormalizePermuteTest, Types); TYPED_TEST(CropMirrorNormalizePermuteTest, MultipleData) { - int batch_size = this->jpegs_.size(); - int num_thread = 1; - - // Create the pipeline - Pipeline pipe( - batch_size, - num_thread, - 0); - - TensorList data; - this->MakeJPEGBatch(&data, batch_size); - pipe.AddExternalInput("jpegs"); - pipe.SetExternalInput("jpegs", data); - - // Decode the images - pipe.AddOperator( - OpSpec("HostDecoder") - .AddArg("output_type", this->img_type_) - .AddInput("jpegs", "cpu") - .AddOutput("images", "cpu")); - - pipe.AddOperator( - OpSpec("HostDecoder") - .AddArg("output_type", this->img_type_) - .AddInput("jpegs", "cpu") - .AddOutput("images2", "cpu")); - - - std::vector mean_vec(this->c_); - for (int i = 0; i < this->c_; ++i) { - mean_vec[i] = 0.; - } - - // CropMirrorNormalizePermute + crop multiple sets of images - pipe.AddOperator( - OpSpec("CropMirrorNormalize") - .AddArg("device", "gpu") - .AddInput("images", "gpu") - .AddOutput("cropped1", "gpu") - .AddInput("images2", "gpu") - .AddOutput("cropped2", "gpu") - .AddArg("crop", vector{64, 64}) - .AddArg("mean", mean_vec) - .AddArg("std", mean_vec) - .AddArg("image_type", this->img_type_) - .AddArg("num_input_sets", 2)); - - // Build and run the pipeline - vector> outputs = {{"cropped1", "gpu"}, {"cropped2", "gpu"}}; - - pipe.Build(outputs); - - // Decode the images - pipe.RunCPU(); - pipe.RunGPU(); - - DeviceWorkspace results; - pipe.Outputs(&results); - - // Verify the results - auto output0 = results.Output(0); - auto output1 = results.Output(1); - - // WriteHWCBatch(*output, "image"); - for (int i = 0; i < batch_size; ++i) { - this->VerifyImage( - output0->template tensor(i), - output1->template tensor(i), - output0->tensor_shape(i)[0]*output0->tensor_shape(i)[1]*output0->tensor_shape(i)[2]); - } + this->RunTest(); } } // namespace dali diff --git a/dali/pipeline/operators/fused/resize_crop_mirror.h b/dali/pipeline/operators/fused/resize_crop_mirror.h index 5e78e230882..160e95e686e 100644 --- a/dali/pipeline/operators/fused/resize_crop_mirror.h +++ b/dali/pipeline/operators/fused/resize_crop_mirror.h @@ -172,11 +172,7 @@ class ResizeCropMirror : public Operator, protected ResizeCropMirror inline void RunImpl(SampleWorkspace *ws, const int idx) override { auto &input = ws->Input(idx); auto output = ws->Output(idx); - DALI_ENFORCE(input.ndim() == 3); - DALI_ENFORCE(IsType(input.type()), - "Expects input data in uint8."); - DALI_ENFORCE(input.dim(2) == 1 || input.dim(2) == 3, - "ResizeCropMirror supports hwc rgb & grayscale inputs."); + CheckParam(input, "ResizeCropMirror"); const TransformMeta &meta = per_thread_meta_[ws->thread_idx()]; @@ -216,11 +212,7 @@ class FastResizeCropMirror : public ResizeCropMirror { inline void RunImpl(SampleWorkspace *ws, const int idx) override { auto &input = ws->Input(idx); auto output = ws->Output(idx); - DALI_ENFORCE(input.ndim() == 3); - DALI_ENFORCE(IsType(input.type()), - "Expects input data in uint8."); - DALI_ENFORCE(input.dim(2) == 1 || input.dim(2) == 3, - "FastResizeCropMirror supports hwc rgb & grayscale inputs."); + CheckParam(input, "FastResizeCropMirror"); const TransformMeta &meta = per_thread_meta_[ws->thread_idx()]; diff --git a/dali/pipeline/operators/fused/resize_crop_mirror_test.cc b/dali/pipeline/operators/fused/resize_crop_mirror_test.cc index 0d71afb279a..3f9b74384ed 100644 --- a/dali/pipeline/operators/fused/resize_crop_mirror_test.cc +++ b/dali/pipeline/operators/fused/resize_crop_mirror_test.cc @@ -52,14 +52,14 @@ TYPED_TEST(ResizeCropMirrorTest, TestFixedResizeAndCropWarp) { TYPED_TEST(ResizeCropMirrorTest, TestFixedFastResizeAndCrop) { this->TstBody(this->DefaultSchema(true) .AddArg("resize_shorter", 480.f) - .AddArg("crop", vector{224, 224}), 5.5); + .AddArg("crop", vector{224, 224}), 2.1); } TYPED_TEST(ResizeCropMirrorTest, TestFixedFastResizeAndCropWarp) { this->TstBody(this->DefaultSchema(true) .AddArg("resize_x", 480.f) .AddArg("resize_y", 480.f) - .AddArg("crop", vector{224, 224}), 5.5); + .AddArg("crop", vector{224, 224}), 2.1); } } // namespace dali diff --git a/dali/pipeline/operators/operator.h b/dali/pipeline/operators/operator.h index 835084f51b1..84f6e56c4cc 100644 --- a/dali/pipeline/operators/operator.h +++ b/dali/pipeline/operators/operator.h @@ -218,7 +218,7 @@ DALI_DECLARE_OPTYPE_REGISTRY(SupportOperator, OperatorBase); static int ANONYMIZE_VARIABLE(OpName) = \ DALI_OPERATOR_SCHEMA_REQUIRED_FOR_##OpName(); \ DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType, \ - device##Operator, dali::OperatorBase) + device##Operator, dali::OperatorBase, #device) class ResizeParamDescr; diff --git a/dali/pipeline/operators/operator_factory.h b/dali/pipeline/operators/operator_factory.h index ed328ec9660..9679eacc804 100644 --- a/dali/pipeline/operators/operator_factory.h +++ b/dali/pipeline/operators/operator_factory.h @@ -37,10 +37,11 @@ class OperatorRegistry { OperatorRegistry() {} - void Register(const std::string &name, Creator creator) { - std::lock_guard lock(mutex_); + void Register(const std::string &name, Creator creator, const std::string &devName = "") { + std::lock_guard lock(mutex_); DALI_ENFORCE(registry_.count(name) == 0, - "Operator \"" + name + "\" already registered."); + "Operator \"" + name + "\" already registered" + + (devName != ""? (" for " + devName) : "") + "."); registry_[name] = creator; } @@ -71,8 +72,8 @@ class Registerer { public: Registerer(const std::string &name, OperatorRegistry *registry, - typename OperatorRegistry::Creator creator) { - registry->Register(name, creator); + typename OperatorRegistry::Creator creator, const std::string &devName = "") { + registry->Register(name, creator, devName); } // Standard creator function used by all operators @@ -85,9 +86,9 @@ class Registerer { // Creators a registry object for a specific op type #define DALI_DECLARE_OPTYPE_REGISTRY(RegistryName, OpType) \ - class DLL_PUBLIC RegistryName##Registry { \ + class DLL_PUBLIC RegistryName##Registry { \ public: \ - DLL_PUBLIC static dali::OperatorRegistry& Registry(); \ + DLL_PUBLIC static dali::OperatorRegistry& Registry(); \ }; #define DALI_DEFINE_OPTYPE_REGISTRY(RegistryName, OpType) \ @@ -99,11 +100,11 @@ class Registerer { // Helper to define a registerer for a specific op type. Each op type // defines its own, more aptly named, registration macros on top of this #define DALI_DEFINE_OPTYPE_REGISTERER(OpName, DerivedType, \ - RegistryName, OpType) \ + RegistryName, OpType, dev) \ namespace { \ static dali::Registerer ANONYMIZE_VARIABLE(anon##OpName)( \ #OpName, &RegistryName##Registry::Registry(), \ - dali::Registerer::OperatorCreator); \ + dali::Registerer::OperatorCreator, dev); \ } } // namespace dali diff --git a/dali/pipeline/operators/operator_factory_test.cc b/dali/pipeline/operators/operator_factory_test.cc index eb3990870a0..1d6a0f52d34 100644 --- a/dali/pipeline/operators/operator_factory_test.cc +++ b/dali/pipeline/operators/operator_factory_test.cc @@ -73,11 +73,11 @@ DALI_DEFINE_OPTYPE_REGISTRY(GPUDummy, DummyBase); // Some registration macros #define DALI_REGISTER_CPU_DUMMY(OpName, OpType) \ DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType, \ - CPUDummy, DummyBase) + CPUDummy, DummyBase, "CPU") #define DALI_REGISTER_GPU_DUMMY(OpName, OpType) \ DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType, \ - GPUDummy, DummyBase) + GPUDummy, DummyBase, "GPU") // Register the classes diff --git a/dali/pipeline/operators/resize/new_resize.h b/dali/pipeline/operators/resize/new_resize.h index 67d759dbe0f..3e9aae96aa4 100644 --- a/dali/pipeline/operators/resize/new_resize.h +++ b/dali/pipeline/operators/resize/new_resize.h @@ -143,7 +143,7 @@ class NewResize : public Resize { void SetupSharedSampleParams(Workspace *ws) override { Resize ::SetupSharedSampleParams(ws); } - uint ResizeInfoNeeded() const override { return t_crop + t_mirrorHor; } + uint ResizeInfoNeeded() const override { return t_crop + t_mirrorHor; } private: MappingInfo **CopyResizeTableToGPU(size_t resizeMemory[], cudaStream_t s, diff --git a/dali/pipeline/operators/resize/new_resize_test.cc b/dali/pipeline/operators/resize/new_resize_test.cc index dd617ce40ff..cc04eb1cd23 100644 --- a/dali/pipeline/operators/resize/new_resize_test.cc +++ b/dali/pipeline/operators/resize/new_resize_test.cc @@ -13,14 +13,14 @@ TYPED_TEST_CASE(NewResizeTest, Types); TYPED_TEST(NewResizeTest, TestFixedResizeAndCrop) { this->TstBody(this->DefaultSchema("ResizeCropMirror") .AddArg("resize_shorter", 480.f) - .AddArg("crop", vector{224, 224}), 0.85); + .AddArg("crop", vector{224, 224}), 0.33); } TYPED_TEST(NewResizeTest, TestFixedResizeAndCropWarp) { this->TstBody(this->DefaultSchema("ResizeCropMirror") .AddArg("resize_x", 480.f) .AddArg("resize_y", 480.f) - .AddArg("crop", vector{224, 224}), 0.85); + .AddArg("crop", vector{224, 224}), 0.33); } } // namespace dali diff --git a/dali/pipeline/operators/resize/resize_test.cc b/dali/pipeline/operators/resize/resize_test.cc index cdd6bd1949b..5d8094e3122 100644 --- a/dali/pipeline/operators/resize/resize_test.cc +++ b/dali/pipeline/operators/resize/resize_test.cc @@ -27,24 +27,24 @@ TYPED_TEST_CASE(ResizeTest, Types); TYPED_TEST(ResizeTest, TestResizeShorter) { this->TstBody(this->DefaultSchema("Resize") - .AddArg("resize_shorter", 480.f), 14.0); + .AddArg("resize_shorter", 480.f), 5.5); } TYPED_TEST(ResizeTest, TestResizeShorter_A) { this->TstBody(this->DefaultSchema("Resize") - .AddArg("resize_shorter", 224.f), 14.0); + .AddArg("resize_shorter", 224.f), 5.5); } TYPED_TEST(ResizeTest, TestResizeXY) { this->TstBody(this->DefaultSchema("Resize") .AddArg("resize_x", 224.f) - .AddArg("resize_y", 224.f), 14.0); + .AddArg("resize_y", 224.f), 5.5); } TYPED_TEST(ResizeTest, TestResizeXY_A) { this->TstBody(this->DefaultSchema("Resize") .AddArg("resize_x", 240.f) - .AddArg("resize_y", 480.f), 14.0); + .AddArg("resize_y", 480.f), 5.5); } } // namespace dali diff --git a/dali/pipeline/pipeline_test.cc b/dali/pipeline/pipeline_test.cc index 1b9c320ef79..e3a77c3906f 100644 --- a/dali/pipeline/pipeline_test.cc +++ b/dali/pipeline/pipeline_test.cc @@ -36,18 +36,18 @@ class PipelineTest : public DALITest { DALITest::DecodeJPEGS(DALI_RGB); } - template - inline void CompareData(const T* data, const T* ground_truth, int n) { + template + inline void CompareData(const T *data, const T *ground_truth, int n) { CUDA_CALL(cudaDeviceSynchronize()); vector tmp_cpu(n); - CUDA_CALL(cudaMemcpy(tmp_cpu.data(), data, sizeof(T)*n, cudaMemcpyDefault)); + CUDA_CALL(cudaMemcpy(tmp_cpu.data(), data, sizeof(T) * n, cudaMemcpyDefault)); vector abs_diff(n, 0); for (int i = 0; i < n; ++i) { abs_diff[i] = abs(static_cast(tmp_cpu[i]) - static_cast(ground_truth[i])); } double mean, std; - DALITest::MeanStdDev(abs_diff, &mean, &std); + DALITest::MeanStdDevColorNorm(abs_diff, &mean, &std); #ifndef NDEBUG cout << "num: " << abs_diff.size() << endl; @@ -59,280 +59,190 @@ class PipelineTest : public DALITest { ASSERT_LT(std, 0.000001); } - inline OpGraph& GetGraph(Pipeline *pipe) { - return pipe->graph_; - } -}; - -template -struct ThreadCount { - static const int nt = number_of_threads; -}; - -class PipelineTestOnce : public PipelineTest> { -}; - -typedef ::testing::Types, - ThreadCount<2>, - ThreadCount<3>, - ThreadCount<4>> NumThreads; -TYPED_TEST_CASE(PipelineTest, NumThreads); - -TEST_F(PipelineTestOnce, TestInputNotKnown) { - Pipeline pipe(1, 1, 0); + void RunTestEnforce(const string &dev1, const string &dev2) { + Pipeline pipe(1, 1, 0); - ASSERT_THROW( + // Inputs must be know to the pipeline, i.e. ops + // must be added in a topological ordering. + ASSERT_THROW( pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "cpu") - .AddInput("data", "cpu") - .AddOutput("copy_out", "cpu")), + OpSpec("Copy") + .AddArg("device", dev1) + .AddInput("data", dev1) + .AddOutput("copy_out", dev1)), std::runtime_error); -} -TEST_F(PipelineTestOnce, TestEnforceCPUOpConstraints) { - Pipeline pipe(1, 1, 0); - - // Inputs must be know to the pipeline, i.e. ops - // must be added in a topological ordering. - ASSERT_THROW( - pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "cpu") - .AddInput("data", "cpu") - .AddOutput("copy_out", "cpu")), - std::runtime_error); - - pipe.AddOperator( + pipe.AddOperator( OpSpec("ExternalSource") - .AddArg("device", "gpu") - .AddOutput("data", "gpu")); - - // Inputs to CPU ops must be on CPU - ASSERT_THROW( + .AddArg("device", "gpu") + .AddOutput("data", "gpu")); + + // For dev1 = "cpu": Inputs to CPU ops must be on CPU, + // we do not auto-copy them from gpu to cpu. + // For dev1 = "gpu": CPU inputs to GPU ops must be on CPU, + // we will not copy them back to the host. + ASSERT_THROW( pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "cpu") - .AddInput("data", "gpu") - .AddOutput("copy_out", "cpu")), + OpSpec("Copy") + .AddArg("device", dev1) + .AddInput("data", dev2) + .AddOutput("copy_out", dev1)), std::runtime_error); - // Inputs to CPU ops must already exist on CPU, - // we do not auto-copy them from gpu to cpu. - ASSERT_THROW( - pipe.AddOperator( + if (dev1 == "cpu") { + // Inputs to CPU ops must already exist on CPU, + // we do not auto-copy them from gpu to cpu. + ASSERT_THROW( + pipe.AddOperator( OpSpec("Copy") - .AddArg("device", "cpu") - .AddInput("data", "cpu") - .AddOutput("copy_out", "cpu")), - std::runtime_error); + .AddArg("device", dev1) + .AddInput("data", dev1) + .AddOutput("copy_out", dev1)), + std::runtime_error); + } - pipe.AddOperator( + pipe.AddOperator( OpSpec("ExternalSource") - .AddArg("device", "cpu") - .AddOutput("data_2", "cpu")); + .AddArg("device", dev1) + .AddOutput("data_2", dev1)); - pipe.AddOperator( + pipe.AddOperator( OpSpec("ExternalSource") - .AddArg("device", "cpu") - .AddOutput("data_3", "cpu")); + .AddArg("device", dev1) + .AddOutput("data_3", dev1)); - // Outputs must have unique names. - ASSERT_THROW( + // Outputs must have unique names. + ASSERT_THROW( pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "cpu") - .AddInput("data_2", "cpu") - .AddOutput("data_3", "cpu")), + OpSpec("Copy") + .AddArg("device", dev1) + .AddInput("data_2", dev1) + .AddOutput("data_3", dev1)), std::runtime_error); - // All data must have unique names regardless - // of the device they exist on. - ASSERT_THROW( + if (dev1 == "gpu") { pipe.AddOperator( - OpSpec("Copy") + OpSpec("ExternalSource") .AddArg("device", "cpu") - .AddInput("data_2", "cpu") - .AddOutput("data", "cpu")), - std::runtime_error); - - // CPU ops can only produce CPU outputs - ASSERT_THROW( + .AddOutput("data_4", "cpu")); + } + // All data must have unique names regardless + // of the device they exist on. + ASSERT_THROW( pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "cpu") - .AddInput("data_2", "cpu") - .AddOutput("data_4", "gpu")), + OpSpec("Copy") + .AddArg("device", dev1) + .AddInput("data_2", dev1) + .AddOutput("data", dev1)), std::runtime_error); -} -TEST_F(PipelineTestOnce, TestEnforceGPUOpConstraints) { - Pipeline pipe(1, 1, 0); - // Inputs must be know to the pipeline, i.e. ops - // must be added in a topological ordering. - ASSERT_THROW( + // CPU ops can only produce CPU outputs + ASSERT_THROW( pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data", "gpu") - .AddOutput("copy_out", "gpu")), + OpSpec("Copy") + .AddArg("device", dev1) + .AddInput("data_2", dev1) + .AddOutput("data_4", dev2)), std::runtime_error); + } - pipe.AddOperator( - OpSpec("ExternalSource") - .AddArg("device", "gpu") - .AddOutput("data", "gpu")); + void RunTestTrigger(const string &dev) { + Pipeline pipe(1, 1, 0); - // CPU inputs to GPU ops must be on CPU, we will - // not copy them back to the host. - ASSERT_THROW( - pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data", "cpu") - .AddOutput("copy_out", "gpu")), - std::runtime_error); + pipe.AddExternalInput("data"); - pipe.AddOperator( - OpSpec("ExternalSource") - .AddArg("device", "gpu") - .AddOutput("data_2", "gpu")); + pipe.AddOperator( + OpSpec("Copy") + .AddArg("device", "gpu") + .AddInput("data", dev) + .AddOutput("data_copy", "gpu")); + + vector> outputs = {{"data_copy", "gpu"}}; + pipe.Build(outputs); + + OpGraph &graph = this->GetGraph(&pipe); + + // Validate the graph + ASSERT_EQ(graph.NumCPUOp(), 1); + ASSERT_EQ(graph.NumMixedOp(), 1); + ASSERT_EQ(graph.NumGPUOp(), 1); + + ASSERT_EQ(graph.mixed_op(0).name(), "MakeContiguous"); + + // Validate the source op + auto &node = graph.node(0); + ASSERT_EQ(node.id, 0); + ASSERT_EQ(node.children.size(), 1); + ASSERT_EQ(node.parents.size(), 0); + ASSERT_EQ(node.children.count(1), 1); + + // Validate the MakeContiguous op + auto &node2 = graph.node(1); + ASSERT_EQ(node2.id, 1); + ASSERT_EQ(node2.children.size(), 1); + ASSERT_EQ(node2.parents.size(), 1); + ASSERT_EQ(node2.parents.count(0), 1); + ASSERT_EQ(node2.children.count(2), 1); + + // Validate the copy op + auto &node3 = graph.node(2); + ASSERT_EQ(node3.id, 2); + ASSERT_EQ(node3.children.size(), 0); + ASSERT_EQ(node3.parents.size(), 1); + ASSERT_EQ(node3.parents.count(1), 1); + } - pipe.AddOperator( - OpSpec("ExternalSource") - .AddArg("device", "gpu") - .AddOutput("data_3", "gpu")); + inline OpGraph& GetGraph(Pipeline *pipe) { + return pipe->graph_; + } +}; - // Outputs must have unique names. - ASSERT_THROW( - pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data_2", "gpu") - .AddOutput("data_3", "gpu")), - std::runtime_error); +template +struct ThreadCount { + static const int nt = number_of_threads; +}; - pipe.AddOperator( - OpSpec("ExternalSource") - .AddArg("device", "cpu") - .AddOutput("data_4", "cpu")); +class PipelineTestOnce : public PipelineTest> { +}; - // All data must have unique names regardless - // of the device they exist on. - ASSERT_THROW( - pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data_2", "gpu") - .AddOutput("data_4", "gpu")), - std::runtime_error); +typedef ::testing::Types, + ThreadCount<2>, + ThreadCount<3>, + ThreadCount<4>> NumThreads; +TYPED_TEST_CASE(PipelineTest, NumThreads); + +TEST_F(PipelineTestOnce, TestInputNotKnown) { + Pipeline pipe(1, 1, 0); - // GPU ops can only produce GPU outputs ASSERT_THROW( pipe.AddOperator( OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data_2", "gpu") - .AddOutput("data_4", "cpu")), + .AddArg("device", "cpu") + .AddInput("data", "cpu") + .AddOutput("copy_out", "cpu")), std::runtime_error); } -TEST_F(PipelineTestOnce, TestTriggerToContiguous) { - Pipeline pipe(1, 1, 0); - - pipe.AddExternalInput("data"); - - pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data", "cpu") - .AddOutput("data_copy", "gpu")); - - vector> outputs = {{"data_copy", "gpu"}}; - pipe.Build(outputs); - - OpGraph &graph = this->GetGraph(&pipe); - - // Validate the graph - ASSERT_EQ(graph.NumCPUOp(), 1); - ASSERT_EQ(graph.NumMixedOp(), 1); - ASSERT_EQ(graph.NumGPUOp(), 1); +TEST_F(PipelineTestOnce, TestEnforceCPUOpConstraints) { + RunTestEnforce("cpu", "gpu"); +} - ASSERT_EQ(graph.mixed_op(0).name(), "MakeContiguous"); +TEST_F(PipelineTestOnce, TestEnforceGPUOpConstraints) { + RunTestEnforce("gpu", "cpu"); +} - // Validate the source op - auto& node = graph.node(0); - ASSERT_EQ(node.id, 0); - ASSERT_EQ(node.children.size(), 1); - ASSERT_EQ(node.parents.size(), 0); - ASSERT_EQ(node.children.count(1), 1); - - // Validate the MakeContiguous op - auto& node2 = graph.node(1); - ASSERT_EQ(node2.id, 1); - ASSERT_EQ(node2.children.size(), 1); - ASSERT_EQ(node2.parents.size(), 1); - ASSERT_EQ(node2.parents.count(0), 1); - ASSERT_EQ(node2.children.count(2), 1); - - // Validate the copy op - auto& node3 = graph.node(2); - ASSERT_EQ(node3.id, 2); - ASSERT_EQ(node3.children.size(), 0); - ASSERT_EQ(node3.parents.size(), 1); - ASSERT_EQ(node3.parents.count(1), 1); +TEST_F(PipelineTestOnce, TestTriggerToContiguous) { + RunTestTrigger("cpu"); } TEST_F(PipelineTestOnce, TestTriggerCopyToDevice) { - Pipeline pipe(1, 1, 0); - - pipe.AddExternalInput("data"); - - pipe.AddOperator( - OpSpec("Copy") - .AddArg("device", "gpu") - .AddInput("data", "gpu") - .AddOutput("data_copy", "gpu")); - - vector> outputs = {{"data_copy", "gpu"}}; - pipe.Build(outputs); - - OpGraph &graph = this->GetGraph(&pipe); - - // Validate the graph - ASSERT_EQ(graph.NumCPUOp(), 1); - ASSERT_EQ(graph.NumMixedOp(), 1); - ASSERT_EQ(graph.NumGPUOp(), 1); - - ASSERT_EQ(graph.mixed_op(0).name(), "MakeContiguous"); - - // Validate the source op - auto& node = graph.node(0); - ASSERT_EQ(node.id, 0); - ASSERT_EQ(node.children.size(), 1); - ASSERT_EQ(node.parents.size(), 0); - ASSERT_EQ(node.children.count(1), 1); - - // Validate the MakeContiguous op - auto& node2 = graph.node(1); - ASSERT_EQ(node2.id, 1); - ASSERT_EQ(node2.children.size(), 1); - ASSERT_EQ(node2.parents.size(), 1); - ASSERT_EQ(node2.parents.count(0), 1); - ASSERT_EQ(node2.children.count(2), 1); - - // Validate the copy op - auto& node3 = graph.node(2); - ASSERT_EQ(node3.id, 2); - ASSERT_EQ(node3.children.size(), 0); - ASSERT_EQ(node3.parents.size(), 1); - ASSERT_EQ(node3.parents.count(1), 1); + RunTestTrigger("gpu"); } TYPED_TEST(PipelineTest, TestExternalSource) { int num_thread = TypeParam::nt; - int batch_size = this->jpegs_.size(); + int batch_size = this->jpegs_.nImages(); Pipeline pipe(batch_size, num_thread, 0); @@ -354,7 +264,7 @@ TYPED_TEST(PipelineTest, TestExternalSource) { TYPED_TEST(PipelineTest, TestSerialization) { int num_thread = TypeParam::nt; - int batch_size = this->jpegs_.size(); + int batch_size = this->jpegs_.nImages(); Pipeline pipe(batch_size, num_thread, 0); diff --git a/dali/test/dali_test.h b/dali/test/dali_test.h index 85ffc3e92eb..0760c9a92b5 100644 --- a/dali/test/dali_test.h +++ b/dali/test/dali_test.h @@ -53,11 +53,10 @@ class DALITest : public ::testing::Test { public: virtual inline void SetUp() { rand_gen_.seed(time(nullptr)); - LoadJPEGS(image_folder, &jpeg_names_, &jpegs_, &jpeg_sizes_); + LoadJPEGS(image_folder, &jpeg_names_, &jpegs_); } virtual inline void TearDown() { - for (auto &ptr : jpegs_) delete[] ptr; for (auto &ptr : images_) delete[] ptr; } @@ -71,7 +70,7 @@ class DALITest : public ::testing::Test { } void DecodeImage(const unsigned char *data, int data_size, int c, int img_type, - Tensor *out, unsigned char *out_dataPntr = NULL) { + Tensor *out, unsigned char *out_dataPntr = NULL) const { cv::Mat input(1, data_size, CV_8UC1, const_cast(data)); cv::Mat tmp = cv::imdecode(input, c == 1 ? CV_LOAD_IMAGE_GRAYSCALE : CV_LOAD_IMAGE_COLOR); @@ -93,12 +92,14 @@ class DALITest : public ::testing::Test { std::memcpy(out_dataPntr, out_img.ptr(), out_img.rows * out_img.cols * c); } - inline void DecodeImages(DALIImageType type, const vector& encoded, - const vector& encoded_sizes, + inline void DecodeImages(DALIImageType type, const ImgSetDescr &imgs, vector *images, vector *image_dims) { - images->resize(encoded.size()); - image_dims->resize(encoded.size()); - for (size_t i = 0; i < encoded.size(); ++i) { + const auto & encoded = imgs.data_; + const auto & encoded_sizes = imgs.sizes_; + const auto nImgs = imgs.nImages(); + images->resize(nImgs); + image_dims->resize(nImgs); + for (size_t i = 0; i < nImgs; ++i) { cv::Mat img; cv::Mat encode = cv::Mat(1, encoded_sizes[i], CV_8UC1, encoded[i]); @@ -127,7 +128,7 @@ class DALITest : public ::testing::Test { } inline void DecodeJPEGS(DALIImageType type) { - DecodeImages(type, jpegs_, jpeg_sizes_, &images_, &image_dims_); + DecodeImages(type, jpegs_, &images_, &image_dims_); } inline void MakeDecodedBatch(int n, TensorList *tl, @@ -159,13 +160,14 @@ class DALITest : public ::testing::Test { } // Make a batch (in TensorList) of arbitrary raw data - inline void MakeEncodedBatch(TensorList *tl, int n, - const vector &data, - const vector &data_sizes) { - DALI_ENFORCE(data.size() > 0, "data must be populated to create batches"); + inline void MakeEncodedBatch(TensorList *tl, int n, const ImgSetDescr &imgs) { + const auto &data = imgs.data_; + const auto &data_sizes = imgs.sizes_; + const auto nImgs = imgs.nImages(); + DALI_ENFORCE(nImgs > 0, "data must be populated to create batches"); vector shape(n); for (int i = 0; i < n; ++i) { - shape[i] = {data_sizes[i % data.size()]}; + shape[i] = {data_sizes[i % nImgs]}; } tl->template mutable_data(); @@ -173,41 +175,42 @@ class DALITest : public ::testing::Test { for (int i = 0; i < n; ++i) { std::memcpy(tl->template mutable_tensor(i), - data[i % data.size()], - data_sizes[i % data.size()]); + data[i % nImgs], + data_sizes[i % nImgs]); } } // Make a batch (of vector) of arbitrary raw data - inline void MakeEncodedBatch(vector> *t, int n, - const vector &data, - const vector &data_sizes) { - DALI_ENFORCE(data.size() > 0, "data must be populated to create batches"); + inline void MakeEncodedBatch(vector> *t, int n, const ImgSetDescr &imgs) { + const auto &data = imgs.data_; + const auto &data_sizes = imgs.sizes_; + const auto nImgs = data.size(); + DALI_ENFORCE(nImgs > 0, "data must be populated to create batches"); t->resize(n); for (int i = 0; i < n; ++i) { auto& ti = t->at(i); ti = Tensor{}; - ti.Resize({data_sizes[i % data.size()]}); + ti.Resize({data_sizes[i % nImgs]}); ti.template mutable_data(); std::memcpy(ti.raw_mutable_data(), - data[i % data.size()], - data_sizes[i % data.size()]); + data[i % nImgs], + data_sizes[i % nImgs]); } } inline void MakeJPEGBatch(TensorList *tl, int n) { - MakeEncodedBatch(tl, n, jpegs_, jpeg_sizes_); + MakeEncodedBatch(tl, n, jpegs_); } inline void MakeJPEGBatch(vector> *t, int n) { - MakeEncodedBatch(t, n, jpegs_, jpeg_sizes_); + MakeEncodedBatch(t, n, jpegs_); } template - void MeanStdDev(const vector &diff, double *mean, double *std) { + void MeanStdDev(const vector &diff, double *mean, double *std) const { const size_t N = diff.size(); // Avoid division by zero ASSERT_NE(N, 0); @@ -223,6 +226,12 @@ class DALITest : public ::testing::Test { *std = sqrt(var_sum / N); } + template + void MeanStdDevColorNorm(const vector &diff, double *mean, double *std) const { + MeanStdDev(diff, mean, std); + *mean /= (255. / 100.); // normalizing to the color range and use percents + } + // From OCV example : // docs.opencv.org/2.4/doc/tutorials/gpu/gpu-basics-similarity/gpu-basics-similarity.html cv::Scalar MSSIM(uint8 *a, uint8 *b, int h, int w, int c) { @@ -281,10 +290,11 @@ class DALITest : public ::testing::Test { } protected: + int GetNumColorComp() const { return c_; } + std::mt19937 rand_gen_; vector jpeg_names_; - vector jpegs_; - vector jpeg_sizes_; + ImgSetDescr jpegs_; // Decoded images vector images_; diff --git a/dali/test/dali_test_decoder.h b/dali/test/dali_test_decoder.h index eeef6286709..318f7b4e26b 100644 --- a/dali/test/dali_test_decoder.h +++ b/dali/test/dali_test_decoder.h @@ -11,10 +11,8 @@ namespace dali { template -class GenericDecoderTest : public DALISingleOpTest { +class GenericDecoderTest : public DALISingleOpTest { public: - USING_DALI_SINGLE_OP_TEST(); - vector*> Reference(const vector*> &inputs, DeviceWorkspace *ws) { @@ -25,12 +23,12 @@ class GenericDecoderTest : public DALISingleOpTest { const TensorList& encoded_data = *inputs[0]; - c_ = (IsColor(img_type_) ? 3 : 1); + const int c = this->GetNumColorComp(); for (int i = 0; i < encoded_data.ntensor(); ++i) { auto *data = encoded_data.tensor(i); auto data_size = Product(encoded_data.tensor_shape(i)); - DecodeImage(data, data_size, c_, img_type_, &out[i]); + this->DecodeImage(data, data_size, c, this->ImageType(), &out[i]); } vector*> outputs(1); @@ -40,23 +38,16 @@ class GenericDecoderTest : public DALISingleOpTest { } protected: - virtual const OpSpec DecodingOp() const = 0; - virtual uint8 TestCheckType() const { return t_checkDefault; } + virtual const OpSpec DecodingOp() const { return OpSpec(); } void RunTestDecode(t_imgType imageType, float eps = 5e-2) { -#ifdef PIXEL_STAT_FILE - FILE *file = fopen(PIXEL_STAT_FILE".txt", "a"); - fprintf(file, "Type of the files: %s eps = %6.4f\n", jpegData? "JPEG" : "PNG", eps); - fprintf(file, " Color#: mean: std: eq. pos. neg.\n"); - fclose(file); -#endif TensorList encoded_data; switch (imageType) { case t_jpegImgType: - EncodedJPEGData(&encoded_data, batch_size_); + this->EncodedJPEGData(&encoded_data); break; case t_pngImgType: - EncodedPNGData(&encoded_data, batch_size_); + this->EncodedPNGData(&encoded_data); break; default: { char buff[32]; @@ -65,19 +56,41 @@ class GenericDecoderTest : public DALISingleOpTest { } } - SetExternalInputs({std::make_pair("encoded", &encoded_data)}); + this->SetExternalInputs({std::make_pair("encoded", &encoded_data)}); + this->RunOperator(DecodingOp(), eps); + } - AddSingleOp(DecodingOp()); + void RunTestDecode(const ImgSetDescr &imgs, float eps = 5e-2) { + this->SetEps(eps); + for (size_t imgIdx = 0; imgIdx < imgs.nImages(); ++imgIdx) { + Tensor t; + DALI_CALL(DecodeJPEGHost(imgs.data_[imgIdx], + imgs.sizes_[imgIdx], + this->img_type_, &t)); + +#if DALI_DEBUG + WriteHWCImage(t.data(), t.dim(0), t.dim(1), t.dim(2), + std::to_string(imgIdx) + "-img"); +#ifndef NDEBUG + cout << imgIdx << ": " << imgs.sizes_[imgIdx] + << " dims: " << t.dim(1) << "x" << t.dim(0) << endl; +#endif +#endif + this->VerifyDecode(t.data(), t.dim(0), t.dim(1), imgs, imgIdx); + } + } - DeviceWorkspace ws; - RunOperator(&ws); + void VerifyDecode(const uint8 *img, int h, int w, const ImgSetDescr &imgs, int img_id) const { + // Compare w/ opencv result + const auto imgData = imgs.data_[img_id]; + const auto imgSize = imgs.sizes_[img_id]; + ASSERT_TRUE(CheckIsJPEG(imgData, imgSize)); - SetEps(eps); - SetTestCheckType(TestCheckType()); - CheckAnswers(&ws, {0}); + Tensor out; + const int c = this->GetNumColorComp(); + this->DecodeImage(imgData, imgSize, c, this->ImageType(), &out); + this->CheckBuffers(h*w*c, out.mutable_data(), img, false); } - - const DALIImageType img_type_ = ImgType::type; }; } // namespace dali diff --git a/dali/test/dali_test_matching.h b/dali/test/dali_test_matching.h new file mode 100644 index 00000000000..2f01f494ddf --- /dev/null +++ b/dali/test/dali_test_matching.h @@ -0,0 +1,75 @@ +// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +#ifndef DALI_TEST_DALI_TEST_MATCHING_H_ +#define DALI_TEST_DALI_TEST_MATCHING_H_ + +#include "dali/test/dali_test_single_op.h" +#include +#include +#include +#include + +namespace dali { + +typedef struct { + const char *opName; + const char *paramName; + const char *paramVal; + double epsVal; +} singleParamOpDescr; + +template +class GenericMatchingTest : public DALISingleOpTest { + protected: + void RunTest(const opDescr &descr) { + const int batch_size = this->jpegs_.nImages(); + this->SetBatchSize(batch_size); + this->SetNumThreads(1); + + TensorList data; + this->MakeJPEGBatch(&data, batch_size); + this->SetExternalInputs({{"jpegs", &data}}); + + shared_ptr pipe = this->GetPipeline(); + // Decode the images + pipe->AddOperator( + OpSpec("HostDecoder") + .AddArg("output_type", this->img_type_) + .AddInput("jpegs", "cpu") + .AddOutput("input", "cpu")); + + // Launching the same transformation on CPU (outputIdx 0) and GPU (outputIdx 1) + this->AddOperatorWithOutput(descr); + this->RunOperator(descr); + } + + virtual vector*> + Reference(const vector*> &inputs, DeviceWorkspace *ws) { + return this->CopyToHost(*ws->Output(1)); + } + + uint8 GetTestCheckType() const override { + return t_checkColorComp + t_checkElements; // + t_checkAll + t_checkNoAssert; + } + + void RunTest(const singleParamOpDescr ¶mOp) { + OpArg arg = {paramOp.paramName, paramOp.paramVal, t_floatParam}; + vector args; + args.push_back(arg); + opDescr aaa(paramOp.opName, paramOp.epsVal, &args); + RunTest(aaa); + } + + void RunTest(const char *opName, const OpArg params[] = NULL, + int nParam = 0, double eps = 0.001) { + if (params && nParam > 0) { + vector args(params, params + nParam); + RunTest(opDescr(opName, eps, &args)); + } else { + RunTest(opDescr(opName, eps, NULL)); + } + } +}; + +} // namespace dali + +#endif // DALI_TEST_DALI_TEST_MATCHING_H_ diff --git a/dali/test/dali_test_resize.h b/dali/test/dali_test_resize.h index 90cb6cc8dc6..2c36628238f 100644 --- a/dali/test/dali_test_resize.h +++ b/dali/test/dali_test_resize.h @@ -16,39 +16,12 @@ typedef enum { } t_resizeOptions; template -class GenericResizeTest : public DALISingleOpTest { +class GenericResizeTest : public DALISingleOpTest { public: - USING_DALI_SINGLE_OP_TEST(); - - void TstBody(const string &pName, const string &pDevice = "gpu", double eps = 2e-1) { - OpSpec operation = DefaultSchema(pName, pDevice); - TstBody(GetOperationSpec(operation), eps); - } - - void TstBody(const OpSpec &operation, double eps = 2e-1) { -#ifdef PIXEL_STAT_FILE - FILE *file = fopen(PIXEL_STAT_FILE".txt", "a"); - fprintf(file, "Eps = %6.4f\n", eps); - fprintf(file, " Color#: mean: std: eq. pos. neg.\n"); - fclose(file); -#endif - TensorList data; - this->DecodedData(&data, this->batch_size_, this->img_type_); - this->SetExternalInputs({std::make_pair("input", &data)}); - - this->AddSingleOp(operation); - - DeviceWorkspace ws; - this->RunOperator(&ws); - - this->SetEps(eps); - this->CheckAnswers(&ws, {0}); - } - vector*> Reference(const vector*> &inputs, DeviceWorkspace *ws) { - c_ = (IsColor(img_type_) ? 3 : 1); - auto cv_type = (c_ == 3) ? CV_8UC3 : CV_8UC1; + const int c = this->GetNumColorComp(); + auto cv_type = (c == 3) ? CV_8UC3 : CV_8UC1; // single input - encoded images // single output - decoded images @@ -59,25 +32,25 @@ class GenericResizeTest : public DALISingleOpTest { int resize_a = 0, resize_b = 0; bool warp_resize = true; - + const OpSpec &spec = this->GetOperationSpec(); const bool useExternSizes = (resizeOptions & t_externSizes) && - spec_.GetArgument("save_attrs"); + spec.GetArgument("save_attrs"); if (!useExternSizes) { if (resizeOptions & t_externSizes) assert(false); // Can't handle these right now - resize_a = spec_.GetArgument("resize_x"); + resize_a = spec.GetArgument("resize_x"); warp_resize = resize_a != 0; if (warp_resize) - resize_b = spec_.GetArgument("resize_y"); + resize_b = spec.GetArgument("resize_y"); else - resize_a = spec_.GetArgument("resize_shorter"); + resize_a = spec.GetArgument("resize_shorter"); } int crop_h = 0, crop_w = 0; if (resizeOptions & t_cropping) { // Perform a crop - const vector crop = spec_.GetRepeatedArgument("crop"); + const vector crop = spec.GetRepeatedArgument("crop"); crop_h = crop.at(0), crop_w = crop.at(1); } @@ -122,11 +95,11 @@ class GenericResizeTest : public DALISingleOpTest { const int crop_x = (rsz_w - crop_w) / 2; crop_img.create(crop_h, crop_w, cv_type); - const int crop_offset = (crop_y * rsz_w + crop_x) * c_; + const int crop_offset = (crop_y * rsz_w + crop_x) * c; uint8 *crop_ptr = rsz_img.ptr() + crop_offset; - CUDA_CALL(cudaMemcpy2D(crop_img.ptr(), crop_w * c_, - crop_ptr, rsz_w * c_, crop_w * c_, crop_h, + CUDA_CALL(cudaMemcpy2D(crop_img.ptr(), crop_w * c, + crop_ptr, rsz_w * c, crop_w * c, crop_h, cudaMemcpyHostToHost)); } @@ -137,10 +110,10 @@ class GenericResizeTest : public DALISingleOpTest { finalImg = &mirror_img; } - out[i].Resize({finalImg->rows, finalImg->cols, c_}); + out[i].Resize({finalImg->rows, finalImg->cols, c}); auto *out_data = out[i].mutable_data(); - std::memcpy(out_data, finalImg->ptr(), finalImg->rows * finalImg->cols * c_); + std::memcpy(out_data, finalImg->ptr(), finalImg->rows * finalImg->cols * c); } vector*> outputs(1); @@ -151,17 +124,6 @@ class GenericResizeTest : public DALISingleOpTest { protected: virtual uint32_t getResizeOptions() const { return t_cropping /*+ t_mirroring*/; } - virtual OpSpec DefaultSchema(const string &pName, const string &pDevice = "gpu") const { - return OpSpec(pName) - .AddArg("device", pDevice) - .AddArg("output_type", this->img_type_) - .AddInput("input", pDevice) - .AddOutput("output", pDevice); - } - - virtual const OpSpec &GetOperationSpec(const OpSpec &op) const { return op; } - - const DALIImageType img_type_ = ImgType::type; }; } // namespace dali diff --git a/dali/test/dali_test_single_op.h b/dali/test/dali_test_single_op.h index a00152143c2..a243589a2a2 100644 --- a/dali/test/dali_test_single_op.h +++ b/dali/test/dali_test_single_op.h @@ -31,6 +31,7 @@ namespace dali { #define MAKE_IMG_OUTPUT 0 // Make the output of compared (obtained and referenced) images #if MAKE_IMG_OUTPUT #define PIXEL_STAT_FILE "pixelStatFile" // Output of statistics for compared sets of images + // Use "" to make the output in stdout #endif namespace images { @@ -80,6 +81,35 @@ typedef enum { t_pngImgType, } t_imgType; +typedef enum { + t_loadJPEGs = 1, + t_decodeJPEGs = 2, + t_loadPNGs = 4, + t_decodePNGs = 8 +} t_loadingFlags; + +typedef enum { + t_intParam, + t_floatParam, + t_stringParam, + t_floatVector +} t_paramType; + +typedef struct { + const char *m_Name; + const char *m_val; + t_paramType type; +} OpArg; + +class opDescr { + public: + explicit opDescr(const char *name, double eps = 0.0, const vector *argPntr = NULL) : + opName(name), epsVal(eps), args(argPntr) {} + const char *opName; + double epsVal; + const vector *args; +}; + // Define a virtual base class for single operator tests, // where we want to add a single operator to a pipeline, // run the pipe using known data, and compare the result to @@ -90,23 +120,31 @@ typedef enum { // void SetInputs() - define all external inputs to the graph // void GetOutputs() - retrieve all testable outputs from the graph // bool Compare() - Compare against a (supplied) reference implementation +template class DALISingleOpTest : public DALITest { public: inline void SetUp() override { DALITest::SetUp(); + c_ = (IsColor(img_type_) ? 3 : 1); jpegs_.clear(); - jpeg_sizes_.clear(); - // encoded in jpegs_ - LoadJPEGS(images::jpeg_test_images, &jpegs_, &jpeg_sizes_); - LoadImages(images::png_test_images, &png_, &png_sizes_); + const auto flags = GetImageLoadingFlags(); + + if (flags & t_loadJPEGs) { + LoadJPEGS(images::jpeg_test_images, &jpegs_); + if (flags & t_decodeJPEGs) + DecodeImages(DALI_RGB, jpegs_, &jpeg_decoded_, &jpeg_dims_); + } - // decoded in images_ - DecodeImages(DALI_RGB, jpegs_, jpeg_sizes_, &jpeg_decoded_, &jpeg_dims_); - DecodeImages(DALI_RGB, png_, png_sizes_, &png_decoded_, &png_dims_); + if (flags & t_loadPNGs) { + LoadImages(images::png_test_images, &png_); + + if (flags & t_decodePNGs) + DecodeImages(DALI_RGB, png_, &png_decoded_, &png_dims_); + } // Set the pipeline batch size - batch_size_ = 32; + SetBatchSize(32); } inline void TearDown() override { DALITest::TearDown(); @@ -132,19 +170,30 @@ class DALISingleOpTest : public DALITest { return testCheckType_ & type; } - void AddSingleOp(const OpSpec& spec) { - spec_ = spec; - InitPipeline(); - // generate the output mapping for this op - for (int i = 0; i < spec.NumOutput(); ++i) { - auto output_name = spec.OutputName(i); - auto output_device = spec.OutputDevice(i); + virtual uint8 GetTestCheckType() const { + return t_checkDefault; + } - outputs_.push_back(std::make_pair(output_name, output_device)); - } + void AddOperatorWithOutput(const OpSpec& spec) { + // generate the output mapping for this op + for (int i = 0; i < spec.NumOutput(); ++i) + outputs_.push_back(std::make_pair(spec.OutputName(i), spec.OutputDevice(i))); pipeline_->AddOperator(spec); + } + + void AddOperatorWithOutput(const opDescr &descr, const string &pDevice = "cpu", + const string &pInput = "input", const string &pOutput = "outputCPU") { + OpSpec spec(descr.opName); + AddOperatorWithOutput(AddArguments(&spec, descr.args) + .AddInput(pInput, pDevice) + .AddOutput(pOutput, pDevice)); + } + void AddSingleOp(const OpSpec& spec) { + spec_ = spec; + InitPipeline(); + AddOperatorWithOutput(spec); pipeline_->Build(outputs_); } @@ -159,9 +208,9 @@ class DALISingleOpTest : public DALITest { } void RunOperator(DeviceWorkspace *ws) { + SetTestCheckType(GetTestCheckType()); pipeline_->RunCPU(); pipeline_->RunGPU(); - pipeline_->Outputs(ws); } @@ -208,6 +257,8 @@ class DALISingleOpTest : public DALITest { // check calculated vs. reference answers CheckTensorLists(calc_output, ref_output); } + + delete res[i]; } } @@ -215,12 +266,12 @@ class DALISingleOpTest : public DALITest { * Provide some encoded data * TODO(slayton): Add different encodings */ - void EncodedJPEGData(TensorList* t, int n) { - DALITest::MakeEncodedBatch(t, n, jpegs_, jpeg_sizes_); + void EncodedJPEGData(TensorList* t) { + DALITest::MakeEncodedBatch(t, batch_size_, jpegs_); } - void EncodedPNGData(TensorList* t, int n) { - DALITest::MakeEncodedBatch(t, n, png_, png_sizes_); + void EncodedPNGData(TensorList* t) { + DALITest::MakeEncodedBatch(t, batch_size_, png_); } /** @@ -231,39 +282,163 @@ class DALISingleOpTest : public DALITest { DALITest::MakeImageBatch(n, t, type); } - private: - // use a Get mean, std-dev of difference separately for each color component + protected: + inline shared_ptr GetPipeline() const { + return pipeline_; + } + + virtual uint32_t GetImageLoadingFlags() const { + return t_loadJPEGs; // Only loading of JPEG files + } + + const OpSpec &GetOperationSpec() const { + return spec_; + } + + DALIImageType ImageType() const { + return img_type_; + } + + void TstBody(const string &pName, const string &pDevice = "gpu", double eps = 2e-1) { + OpSpec operation = DefaultSchema(pName, pDevice); + TstBody(operation, eps); + } + + void TstBody(const OpSpec &operation, double eps = 2e-1, bool flag = true) { + TensorList data; + DecodedData(&data, this->batch_size_, this->img_type_); + if (flag) + SetExternalInputs({std::make_pair("input", &data)}); + + RunOperator(operation, eps); + } + + virtual OpSpec DefaultSchema(const string &pName, const string &pDevice = "gpu") const { + return OpSpec(pName) + .AddArg("device", pDevice) + .AddArg("output_type", this->img_type_) + .AddInput("input", pDevice) + .AddOutput("output", pDevice); + } + + OpSpec AddArguments(OpSpec *spec, const vector *args) const { + if (!args || args->empty()) + return *spec; + + for (auto param : *args) { + auto val = param.m_val; + auto name = param.m_Name; + switch (param.type) { + case t_intParam: + spec->AddArg(name, atoi(val)); + break; + case t_floatParam: + spec->AddArg(name, strtof(val, NULL)); + break; + case t_stringParam: + spec->AddArg(name, val); + break; + case t_floatVector: { + const auto len = strlen(val); + vector vect; + char *pEnd, *pTmp = new char[len+1]; + memcpy(pEnd = pTmp, val, len); + pEnd[len] = '\0'; + while (pEnd[0]) { + if (pEnd[0] == ',') + pEnd++; + + vect.push_back(strtof(pEnd, &pEnd)); + } + + delete [] pTmp; + spec->AddArg(name, vect); + } + } + } + + return *spec; + } + + void RunOperator(const opDescr &descr) { + OpSpec spec(DefaultSchema(descr.opName)); + RunOperator(AddArguments(&spec, descr.args), descr.epsVal); + } + + void RunOperator(const OpSpec& spec, double eps, DeviceWorkspace *pWS = nullptr) { + AddSingleOp(spec); + + DeviceWorkspace ws; + if (!pWS) + pWS = &ws; + + RunOperator(pWS); + + SetEps(eps); + CheckAnswers(pWS, {0}); + } + + template + vector *>CopyToHost(const TensorList &calcOutput) { + // copy to host + vector *> outputs(1); + outputs[0] = new TensorList(); + outputs[0]->Copy(calcOutput, 0); + return outputs; + } template - int CheckBuffers(int N, const T *a, const T *b, bool checkAll, double *pDiff = NULL) { + int CheckBuffers(int N, const T *a, const T *b, bool checkAll, double *pMean = NULL) const { + // use a Get mean, std-dev of difference separately for each color component const int jMax = TestCheckType(t_checkColorComp)? c_ : 1; const int len = N / jMax; - double mean = 0, std; + double mean = 0, std = 0; vector diff(len); + int retVal = -1; #ifndef PIXEL_STAT_FILE for (int j = 0; j < jMax; ++j) { for (int i = j; i < N; i += jMax) diff[i / jMax] = abs(static_cast(a[i]) - static_cast(b[i])); - MeanStdDev(diff, &mean, &std); + MeanStdDevColorNorm(diff, &mean, &std); if (checkAll) { - const auto diff = fabs(mean) - eps_; + const auto diff = mean - eps_; if (diff <= 0) continue; - if (pDiff) - *pDiff = diff; + if (pMean) + *pMean = mean; return j; } - ASSERT_LE(fabs(mean), eps_), -1; + ASSERT_LE(mean, eps_), -1; } #else - static int fff; - FILE *file = fopen(PIXEL_STAT_FILE".txt", "a"); - if (!fff++) - fprintf(file, "Buffer Length: %7d (for each color component)\n", len); + static int fff = 0; + FILE *file = NULL; + if (!fff) { + // Header of the pixel statistic table + const char *pHeader = + "\nImgID: ClrID: Mean: Std: SameValue: Bigger: Less:"; + + if (strlen(PIXEL_STAT_FILE)) { + file = fopen(PIXEL_STAT_FILE".txt", fff ? "a" : "w"); + fprintf(file, "%s", pHeader); + } else { + cout << pHeader; + } + } + + char buffer[256]; + snprintf(buffer, sizeof(buffer), "%s%3d:", (fff % 32? "" : "\n"), fff); + fff++; + + // Image number + if (file) + fprintf(file, "%s", buffer); + else + cout << buffer; for (int j = 0; j < c_; ++j) { int pos = 0, neg = 0; @@ -275,23 +450,43 @@ class DALISingleOpTest : public DALITest { neg++; } - MeanStdDev(diff, &mean, &std); - fprintf(file, " %1d %8.2f %8.2f %7d %7d %7d\n", - j, mean, std, len - pos - neg, pos, neg); + MeanStdDevColorNorm(diff, &mean, &std); + snprintf(buffer, sizeof(buffer), + "%s %1d %8.2f %8.2f %7d %7d %7d\n", + j? " " : "", j, mean, std, len - pos - neg, pos, neg); + + if (file) + fprintf(file, "%s", buffer); + else + cout << buffer; + + if (mean > eps_) { + if (retVal < 0) { + retVal = j; // First violation of the boundary found + if (pMean) // Keep the color index as a return value + *pMean = mean; + } else { + if (pMean && *pMean < mean) { + *pMean = mean; // More strong violation of the boundary found + retVal = j; // Change the color index as a return value + } + } + } } - fclose(file); + if (file) + fclose(file); #endif - return -1; + return retVal; } - void ReportTestFailure(double diff, int colorIdx, int idx = -1, - const vector *pShape = NULL) { + void ReportTestFailure(double mean, int colorIdx, int idx = -1, + const vector *pShape = NULL) const { if (TestCheckType(t_checkNoAssert)) - cout << "Test warning:"; + cout << "\nTest warning:"; else - cout << "Test failed:"; + cout << "\nTest failed:"; if (TestCheckType(t_checkColorComp)) cout << " color # " << colorIdx; @@ -302,11 +497,11 @@ class DALISingleOpTest : public DALITest { if (pShape) cout << " (h, w) = (" << (*pShape)[0] << ", " << (*pShape)[1] << ")"; - cout << " fabs(mean) = " << diff + eps_ << " and it was expected to be <= " << eps_ << endl; + cout << " mean = " << mean << " and it was expected to be <= " << eps_ << endl; } void CheckTensorLists(const TensorList *t1, - const TensorList *t2) { + const TensorList *t2) const { ASSERT_TRUE(t1); ASSERT_TRUE(t2); ASSERT_EQ(t1->ntensor(), t2->ntensor()); @@ -319,7 +514,7 @@ class DALISingleOpTest : public DALITest { int failNumb = 0, colorIdx = 0; const bool checkAll = TestCheckType(t_checkAll); - double diff; + double mean; if (TestCheckType(t_checkElements)) { // The the results are checked for each element separately for (int i = 0; i < t1->ntensor(); ++i) { @@ -336,16 +531,16 @@ class DALISingleOpTest : public DALITest { if (floatType) { colorIdx = CheckBuffers(lenBuffer, (*t1).template tensor(i), - (*t2).template tensor(i), true, &diff); + (*t2).template tensor(i), checkAll, &mean); } else { colorIdx = CheckBuffers(lenBuffer, (*t1).template tensor(i), - (*t2).template tensor(i), true, &diff); + (*t2).template tensor(i), checkAll, &mean); } if (colorIdx >= 0) { // Test failed for colorIdx - ReportTestFailure(diff, colorIdx, i, &shape1); + ReportTestFailure(mean, colorIdx, i, &shape1); failNumb++; if (!checkAll) break; @@ -354,15 +549,15 @@ class DALISingleOpTest : public DALITest { } else { if (floatType) { colorIdx = CheckBuffers(t1->size(), - t1->data(), - t2->data(), true, &diff); + t1->data(), + t2->data(), checkAll, &mean); } else { colorIdx = CheckBuffers(t1->size(), - t1->data(), - t2->data(), checkAll, &diff); + t1->data(), + t2->data(), checkAll, &mean); } - if (colorIdx >= 0 && checkAll) - ReportTestFailure(diff, colorIdx); + if (colorIdx >= 0) + ReportTestFailure(mean, colorIdx); } if (!TestCheckType(t_checkNoAssert)) { @@ -380,8 +575,7 @@ class DALISingleOpTest : public DALITest { vector> outputs_; shared_ptr pipeline_; - vector png_; - vector png_sizes_; + ImgSetDescr png_; vector jpeg_decoded_, png_decoded_; vector jpeg_dims_, png_dims_; @@ -392,17 +586,12 @@ class DALISingleOpTest : public DALITest { int num_threads_ = 2; double eps_ = 1e-4; uint32_t testCheckType_ = t_checkDefault; + const DALIImageType img_type_ = ImgType::type; // keep a copy of the creation OpSpec for reference OpSpec spec_; }; -#define USING_DALI_SINGLE_OP_TEST() \ - using DALISingleOpTest::AddSingleOp; \ - using DALISingleOpTest::SetExternalInputs; \ - using DALISingleOpTest::EncodedJPEGData; \ - using DALISingleOpTest::DecodedData; - } // namespace dali #endif // DALI_TEST_DALI_TEST_SINGLE_OP_H_ diff --git a/dali/util/image.cc b/dali/util/image.cc index 8c1d1fdaaac..0779fa854c0 100644 --- a/dali/util/image.cc +++ b/dali/util/image.cc @@ -16,8 +16,8 @@ namespace dali { -void LoadImages(const string image_folder, vector *image_names, - vector *images, vector *image_sizes) { +void LoadImages(const string &image_folder, vector *image_names, + ImgSetDescr *imgs) { const string image_list = image_folder + "/image_list.txt"; std::ifstream file(image_list); DALI_ENFORCE(file.is_open()); @@ -28,22 +28,10 @@ void LoadImages(const string image_folder, vector *image_names, image_names->push_back(image_folder + "/" + img); } - for (auto img_name : *image_names) { - std::ifstream img_file(img_name); - DALI_ENFORCE(img_file.is_open()); - - img_file.seekg(0, std::ios::end); - int img_size = static_cast(img_file.tellg()); - img_file.seekg(0, std::ios::beg); - - images->push_back(new uint8[img_size]); - image_sizes->push_back(img_size); - img_file.read(reinterpret_cast((*images)[images->size()-1]), img_size); - } + LoadImages(*image_names, imgs); } -void LoadImages(const vector &image_names, - vector *images, vector *image_sizes) { +void LoadImages(const vector &image_names, ImgSetDescr *imgs) { for (auto img_name : image_names) { std::ifstream img_file(img_name); DALI_ENFORCE(img_file.is_open()); @@ -52,23 +40,22 @@ void LoadImages(const vector &image_names, int img_size = static_cast(img_file.tellg()); img_file.seekg(0, std::ios::beg); - images->push_back(new uint8[img_size]); - image_sizes->push_back(img_size); - img_file.read(reinterpret_cast((*images)[images->size()-1]), img_size); + auto data = new uint8[img_size]; + imgs->data_.push_back(data); + imgs->sizes_.push_back(img_size); + img_file.read(reinterpret_cast(data), img_size); } } -void LoadJPEGS(const string image_folder, vector *jpeg_names, - vector *jpegs, vector *jpeg_sizes) { - LoadImages(image_folder, jpeg_names, jpegs, jpeg_sizes); +void LoadJPEGS(const string &image_folder, vector *jpeg_names, ImgSetDescr *jpegs) { + LoadImages(image_folder, jpeg_names, jpegs); } -void LoadJPEGS(const vector &jpeg_names, - vector *jpegs, vector *jpeg_sizes) { - LoadImages(jpeg_names, jpegs, jpeg_sizes); +void LoadJPEGS(const vector &jpeg_names, ImgSetDescr *jpegs) { + LoadImages(jpeg_names, jpegs); } -void LoadFromFile(string file_name, uint8 **image, int *h, int *w, int *c) { +void LoadFromFile(const string &file_name, uint8 **image, int *h, int *w, int *c) { std::ifstream file(file_name + ".txt"); DALI_ENFORCE(file.is_open()); @@ -86,30 +73,8 @@ void LoadFromFile(string file_name, uint8 **image, int *h, int *w, int *c) { } } -void WriteHWCImage(const uint8 *img, int h, int w, int c, string file_name) { - DALI_ENFORCE(img != nullptr); - DALI_ENFORCE(h >= 0); - DALI_ENFORCE(w >= 0); - DALI_ENFORCE(c >= 0); - CUDA_CALL(cudaDeviceSynchronize()); - vector tmp(h*w*c, 0); - MemCopy(tmp.data(), img, h*w*c); - std::ofstream file(file_name + ".ppm"); - DALI_ENFORCE(file.is_open()); - - file << "P3" << endl; - file << w << " " << h << endl; - file << "255" << endl; - - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - for (int k = 0; k < 3; ++k) { - int c_id = k % c; - file << int(tmp[i*w*c + j*c + c_id]) << " "; - } - } - file << endl; - } +void WriteHWCImage(const uint8 *img, int h, int w, int c, const string &file_name) { + WriteImageScaleBias(img, h, w, c, 0.f, 1.0f, file_name, outHWCImage); } } // namespace dali diff --git a/dali/util/image.h b/dali/util/image.h index 7f26ae36c76..c847ef09442 100644 --- a/dali/util/image.h +++ b/dali/util/image.h @@ -34,105 +34,75 @@ namespace dali { +class ImgSetDescr { + public: + ~ImgSetDescr() { clear(); } + inline void clear() { + for (auto &ptr : data_) delete[] ptr; + data_.clear(); sizes_.clear(); + } + + inline size_t nImages() const { return data_.size(); } + + vector data_; + vector sizes_; +}; + /** * Load all images from a list of image names. Assumes names contain * full path */ -DLL_PUBLIC void LoadImages(const vector &image_names, - vector *images, vector *image_sizes); +DLL_PUBLIC void LoadImages(const vector &image_names, ImgSetDescr *imgs); /** * Loads images from a specified image folder. Assumes the folder contains * a file 'image_list.txt' that lists all the different images in the * folder */ -DLL_PUBLIC void LoadImages(string image_folder, vector *jpeg_names, - vector *jpegs, vector *jpeg_sizes); +DLL_PUBLIC void LoadImages(const string &image_folder, vector *jpeg_names, + ImgSetDescr *imgs); /** * Loads jpegs from a specified image folder. Assumes the folder contains * a file 'image_list.txt' that lists all the different images in the * folder */ -DLL_PUBLIC void LoadJPEGS(string image_folder, vector *jpeg_names, - vector *jpegs, vector *jpeg_sizes); +DLL_PUBLIC void LoadJPEGS(const string &image_folder, vector *jpeg_names, + ImgSetDescr *imgs); /** * Loads all jpegs from the list of image names. Assumes names contains * full path */ -DLL_PUBLIC void LoadJPEGS(const vector &jpeg_names, - vector *jpegs, vector *jpeg_sizes); +DLL_PUBLIC void LoadJPEGS(const vector &jpeg_names, ImgSetDescr *imgs); /** * @brief Writes the input image as a ppm file */ -DLL_PUBLIC void WriteHWCImage(const uint8 *img, int h, int w, int c, string file_name); +DLL_PUBLIC void WriteHWCImage(const uint8 *img, int h, int w, int c, const string &file_name); -/** - * @brief Writes all images in a batch - */ -template -DLL_PUBLIC void WriteHWCBatch(const TensorList &tl, string suffix) { - DALI_ENFORCE(IsType(tl.type())); - for (int i = 0; i < tl.ntensor(); ++i) { - DALI_ENFORCE(tl.tensor_shape(i).size() == 3); - int h = tl.tensor_shape(i)[0]; - int w = tl.tensor_shape(i)[1]; - int c = tl.tensor_shape(i)[2]; - WriteHWCImage(tl.template tensor(i), - h, w, c, std::to_string(i) + "-" + suffix); - } +template +int outHWCImage(const vector &tmp, int h, int w, int c, + int i, int j, int k, float bias, float scale) { + return static_cast(tmp[i*w*c + j*c + k]*scale + bias); } -/** - * @brief Writes an image after applying a scale and bias to get - * pixel values in the range 0-255 - */ template -void WriteHWCImageScaleBias(const T *img, int h, int w, - int c, float bias, float scale, string file_name) { - DALI_ENFORCE(img != nullptr); - DALI_ENFORCE(h >= 0); - DALI_ENFORCE(w >= 0); - DALI_ENFORCE(c >= 0); - CUDA_CALL(cudaDeviceSynchronize()); - Tensor tmp_gpu, double_gpu; - tmp_gpu.Resize({h, w, c}); - tmp_gpu.template mutable_data(); // make sure the buffer is allocated - double_gpu.Resize({h, w, c}); - - // Copy the data and convert to double - MemCopy(tmp_gpu.template mutable_data(), img, tmp_gpu.nbytes()); - Convert(tmp_gpu.template data(), tmp_gpu.size(), double_gpu.template mutable_data()); - - vector tmp(h*w*c, 0); - MemCopy(tmp.data(), double_gpu.template data(), double_gpu.nbytes()); - CUDA_CALL(cudaDeviceSynchronize()); - std::ofstream file(file_name + ".ppm"); - DALI_ENFORCE(file.is_open()); - - file << "P3" << endl; - file << w << " " << h << endl; - file << "255" << endl; - - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - for (int k = 0; k < c; ++k) { - file << int(tmp[i*w*c + j*c + k]*scale + bias) << " "; - } - } - file << endl; - } +int outCHWImage(const vector &tmp, int h, int w, int c, + int i, int j, int k, float bias, float scale) { + return static_cast(tmp[k*h*w + i*w + j]*scale + bias); } +typedef int (*outFunc)(const vector &tmp, int h, int w, int c, + int i, int j, int k, float bias, float scale); + /** * @brief Writes an image after applying a scale and bias to get * pixel values in the range 0-255 */ template -void WriteCHWImageScaleBias(const T *img, int h, int w, - int c, float bias, float scale, string file_name) { +void WriteImageScaleBias(const T *img, int h, int w, + int c, float bias, float scale, const string &file_name, outFunc pFunc) { DALI_ENFORCE(img != nullptr); DALI_ENFORCE(h >= 0); DALI_ENFORCE(w >= 0); @@ -147,19 +117,20 @@ void WriteCHWImageScaleBias(const T *img, int h, int w, MemCopy(tmp_gpu.template mutable_data(), img, tmp_gpu.nbytes()); Convert(tmp_gpu.template data(), tmp_gpu.size(), double_gpu.template mutable_data()); - vector tmp(h*w*c, 0); + vector tmp(h * w * c, 0); MemCopy(tmp.data(), double_gpu.template data(), double_gpu.nbytes()); + CUDA_CALL(cudaDeviceSynchronize()); std::ofstream file(file_name + ".ppm"); DALI_ENFORCE(file.is_open()); - file << "P3" << endl; + file << (c == 3? "P3" : "P2") << endl; // For color/grayscale images, respectively file << w << " " << h << endl; file << "255" << endl; for (int i = 0; i < h; ++i) { for (int j = 0; j < w; ++j) { for (int k = 0; k < c; ++k) { - file << int(tmp[k*h*w + i*w + j]*scale + bias) << " "; + file << (*pFunc)(tmp, h, w, c, i, j, k, bias, scale) << " "; } } file << endl; @@ -170,36 +141,34 @@ void WriteCHWImageScaleBias(const T *img, int h, int w, * @brief Writes all images in a batch with a scale and bias */ template -void WriteHWCBatch(const TensorList &tl, float bias, float scale, string suffix) { +void WriteBatch(const TensorList &tl, float bias, float scale, const string &suffix, + const std::array &permute, outFunc pFunc) { DALI_ENFORCE(IsType(tl.type())); for (int i = 0; i < tl.ntensor(); ++i) { DALI_ENFORCE(tl.tensor_shape(i).size() == 3); - int h = tl.tensor_shape(i)[0]; - int w = tl.tensor_shape(i)[1]; - int c = tl.tensor_shape(i)[2]; - WriteHWCImageScaleBias( + int h = tl.tensor_shape(i)[permute[0]]; + int w = tl.tensor_shape(i)[permute[1]]; + int c = tl.tensor_shape(i)[permute[2]]; + WriteImageScaleBias( tl.template tensor(i), h, w, c, bias, scale, - std::to_string(i) + "-" + suffix); + std::to_string(i) + "-" + suffix, pFunc); } } -/** - * @brief Writes all images in a batch with a scale and bias - */ template -void WriteCHWBatch(const TensorList &tl, float bias, float scale, string suffix) { - DALI_ENFORCE(IsType(tl.type())); - for (int i = 0; i < tl.ntensor(); ++i) { - DALI_ENFORCE(tl.tensor_shape(i).size() == 3); - int c = tl.tensor_shape(i)[0]; - int h = tl.tensor_shape(i)[1]; - int w = tl.tensor_shape(i)[2]; - WriteCHWImageScaleBias( - tl.template tensor(i), - h, w, c, bias, scale, - std::to_string(i) + "-" + suffix); - } +void WriteHWCBatch(const TensorList &tl, float bias, float scale, const string &suffix) { + WriteBatch(tl, bias, scale, suffix, std::array{0, 1, 2}, outHWCImage); +} + +template +void WriteCHWBatch(const TensorList &tl, float bias, float scale, const string &suffix) { + WriteBatch(tl, bias, scale, suffix, std::array{1, 2, 0}, outCHWImage); +} + +template +void WriteHWCBatch(const TensorList &tl, const string &suffix) { + WriteBatch(tl, 0.f, 1.0, suffix, std::array{0, 1, 2}, outHWCImage); } } // namespace dali