NVIDIA · ptrendx · Aug 17, 2018 · Jul 23, 2018 · Jul 23, 2018 · Jul 24, 2018
diff --git a/dali/benchmark/dali_bench.h b/dali/benchmark/dali_bench.h
@@ -33,14 +33,10 @@ class DALIBenchmark : public benchmark::Fixture {
  public:
   DALIBenchmark() {
     rand_gen_.seed(time(nullptr));
-    LoadJPEGS(image_folder, &jpeg_names_, &jpegs_, &jpeg_sizes_);
+    LoadJPEGS(image_folder, &jpeg_names_, &jpegs_);
   }
 
-  virtual ~DALIBenchmark() {
-    for (auto &ptr : jpegs_) {
-      delete[] ptr;
-    }
-  }
+  virtual ~DALIBenchmark() = default;
 
   int RandInt(int a, int b) {
     return std::uniform_int_distribution<>(a, b)(rand_gen_);
@@ -52,27 +48,26 @@ class DALIBenchmark : public benchmark::Fixture {
   }
 
   inline void MakeJPEGBatch(TensorList<CPUBackend> *tl, int n) {
-    DALI_ENFORCE(jpegs_.size() > 0, "jpegs must be loaded to create batches");
+    const auto nImgs = jpegs_.nImages();
+    DALI_ENFORCE(nImgs > 0, "jpegs must be loaded to create batches");
     vector<Dims> shape(n);
     for (int i = 0; i < n; ++i) {
-      shape[i] = {jpeg_sizes_[i % jpegs_.size()]};
+      shape[i] = {jpegs_.sizes_[i % nImgs]};
     }
 
     tl->template mutable_data<uint8>();
     tl->Resize(shape);
 
     for (int i = 0; i < n; ++i) {
       std::memcpy(tl->template mutable_tensor<uint8>(i),
-          jpegs_[i % jpegs_.size()],
-          jpeg_sizes_[i % jpegs_.size()]);
+          jpegs_.data_[i % nImgs], jpegs_.sizes_[i % nImgs]);
     }
   }
 
  protected:
   std::mt19937 rand_gen_;
   vector<string> jpeg_names_;
-  vector<uint8*> jpegs_;
-  vector<int> jpeg_sizes_;
+  ImgSetDescr jpegs_;
 };
 
 }  // namespace dali

diff --git a/dali/benchmark/resnet50_nvjpeg_bench.cc b/dali/benchmark/resnet50_nvjpeg_bench.cc
@@ -112,7 +112,9 @@ BENCHMARK_DEFINE_F(RealRN50, nvjpegPipe)(benchmark::State& st) { // NOLINT
     }
   }
 
-  WriteHWCBatch<uint8_t>(*ws.Output<GPUBackend>(0), 0, 1, "img");
+#if DALI_DEBUG
+  WriteHWCBatch(*ws.Output<GPUBackend>(0), "img");
+#endif
   int num_batches = st.iterations() + static_cast<int>(pipelined);
   st.counters["FPS"] = benchmark::Counter(batch_size*num_batches,
       benchmark::Counter::kIsRate);

diff --git a/dali/image/jpeg_test.cc b/dali/image/jpeg_test.cc
@@ -12,136 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <gtest/gtest.h>
-#include <opencv2/opencv.hpp>
-
-#include <cmath>
-#include <fstream>
-#include <stdexcept>
-#include <vector>
-#include <string>
-
-#include "dali/common.h"
-#include "dali/test/dali_test.h"
-#include "dali/image/jpeg.h"
+#include "dali/test/dali_test_decoder.h"
 
 namespace dali {
 
-namespace {
-// Our turbo jpeg decoder cannot handle CMYK images
-// or 410 images
-const vector<string> tjpg_test_images = {
-  image_folder + "/420.jpg",
-  image_folder + "/422.jpg",
-  image_folder + "/440.jpg",
-  image_folder + "/444.jpg",
-  image_folder + "/gray.jpg",
-  image_folder + "/411.jpg",
-  image_folder + "/411-non-multiple-4-width.jpg",
-  image_folder + "/420-odd-height.jpg",
-  image_folder + "/420-odd-width.jpg",
-  image_folder + "/420-odd-both.jpg",
-  image_folder + "/422-odd-width.jpg"
-};
-}  // namespace
-
 // Fixture for jpeg decode testing. Templated
 // to make googletest run our tests grayscale & rgb
 template <typename ImgType>
-class JpegDecodeTest : public DALITest {
- public:
-  void SetUp() {
-    if (IsColor(img_type_)) {
-      c_ = 3;
-    } else if (img_type_ == DALI_GRAY) {
-      c_ = 1;
-    }
-    rand_gen_.seed(time(nullptr));
-    LoadJPEGS(tjpg_test_images, &jpegs_, &jpeg_sizes_);
-  }
-
-  void TearDown() {
-    DALITest::TearDown();
-  }
-
-  void VerifyDecode(const uint8 *img, int h, int w, int img_id) {
-    // Compare w/ opencv result
-    cv::Mat ver;
-    cv::Mat jpeg = cv::Mat(1, jpeg_sizes_[img_id], CV_8UC1, jpegs_[img_id]);
-
-    ASSERT_TRUE(CheckIsJPEG(jpegs_[img_id], jpeg_sizes_[img_id]));
-    int flag = IsColor(img_type_) ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE;
-    cv::imdecode(jpeg, flag, &ver);
-
-    cv::Mat ver_img(h, w, IsColor(img_type_) ? CV_8UC3 : CV_8UC2);
-    if (img_type_ == DALI_RGB) {
-      // Convert from BGR to RGB for verification
-      cv::cvtColor(ver, ver_img, CV_BGR2RGB);
-    } else {
-      ver_img = ver;
-    }
-
-    ASSERT_EQ(h, ver_img.rows);
-    ASSERT_EQ(w, ver_img.cols);
-    vector<int> diff(h*w*c_, 0);
-    for (int i = 0; i < h*w*c_; ++i) {
-      diff[i] = abs(static_cast<int>(ver_img.ptr()[i] - img[i]));
-    }
-
-    // calculate the MSE
-    float mean, std;
-    MeanStdDev(diff, &mean, &std);
-
-#ifndef NDEBUG
-    cout << "num: " << diff.size() << endl;
-    cout << "mean: " << mean << endl;
-    cout << "std: " << std << endl;
-#endif
-
-    // Note: We allow a slight deviation from the ground truth.
-    // This value was picked fairly arbitrarily to let the test
-    // pass for libjpeg turbo
-    ASSERT_LT(mean, 2.f);
-    ASSERT_LT(std, 3.f);
-  }
-
-  void MeanStdDev(const vector<int> &diff, float *mean, float *std) {
-    // Avoid division by zero
-    ASSERT_NE(diff.size(), 0);
-
-    double sum = 0, var_sum = 0;
-    for (auto &val : diff) {
-      sum += val;
-    }
-    *mean = sum / diff.size();
-    for (auto &val : diff) {
-      var_sum += (val - *mean)*(val - *mean);
-    }
-    *std = sqrt(var_sum / diff.size());
-  }
-
- protected:
-  const DALIImageType img_type_ = ImgType::type;
-  int c_;
+class JpegDecodeTest : public GenericDecoderTest<ImgType> {
 };
 
 // Run RGB & grayscale tests
 typedef ::testing::Types<RGB, BGR, Gray> Types;
 TYPED_TEST_CASE(JpegDecodeTest, Types);
 
 TYPED_TEST(JpegDecodeTest, DecodeJPEGHost) {
-  vector<uint8> image;
-  for (size_t img = 0; img < this->jpegs_.size(); ++img) {
-    Tensor<CPUBackend> t;
-    DALI_CALL(DecodeJPEGHost(this->jpegs_[img],
-            this->jpeg_sizes_[img],
-            this->img_type_, &t));
-#ifndef NDEBUG
-    cout << img << " " << tjpg_test_images[img] << " " << this->jpeg_sizes_[img] << endl;
-    cout << "dims: " << t.dim(1) << "x" << t.dim(0) << endl;
-#endif
-    this->VerifyDecode(t.data<uint8_t>(), t.dim(0), t.dim(1), img);
-  }
+  this->RunTestDecode(this->jpegs_, 1.5);
 }
 
 }  // namespace dali
diff --git a/dali/image/transform.cc b/dali/image/transform.cc
@@ -152,4 +152,48 @@ DALIError_t FastResizeCropMirrorHost(const uint8 *img, int H, int W, int C,
   return DALISuccess;
 }
 
+void CheckParam(const Tensor<CPUBackend> &input, const std::string &opName) {
+  DALI_ENFORCE(input.ndim() == 3);
+  DALI_ENFORCE(IsType<uint8>(input.type()),
+               opName + " expects input data in uint8.");
+  DALI_ENFORCE(input.dim(2) == 1 || input.dim(2) == 3,
+               opName + " supports hwc rgb & grayscale inputs.");
+}
+
+typedef cv::Vec<uchar, 1> Vec1b;
+
+DALIError_t MakeColorTransformation(const uint8 *img, int H, int W, int C,
+                                    const float *matr, uint8 *out_img) {
+  const int channel_flag = C == 3 ? CV_8UC3 : CV_8UC1;
+
+  const cv::Mat cv_imgIn = CreateMatFromPtr(H, W, channel_flag, img);
+  cv::Mat cv_imgOut = CreateMatFromPtr(H, W, channel_flag, out_img);
+
+  if (C == 1) {
+    for (int y = 0; y < H; ++y) {
+      for (int x = 0; x < W; ++x) {
+        cv_imgOut.at<Vec1b>(y, x)[0] =
+            cv::saturate_cast<uint8>((matr[0] * cv_imgIn.at<Vec1b>(y, x)[0]) + matr[1]);
+      }
+    }
+  } else {
+    for (int y = 0; y < H; ++y) {
+      for (int x = 0; x < W; ++x) {
+        // Using direct calculation because they are 25% faster
+        // than two loops which could be used here
+        const auto &inpPix = cv_imgIn.at<cv::Vec3b>(y, x);
+        auto &outPix = cv_imgOut.at<cv::Vec3b>(y, x);
+        outPix[0] = cv::saturate_cast<uint8>
+          (inpPix[0] * matr[0] + inpPix[1] * matr[1] + inpPix[2] * matr[2] + matr[3]);
+        outPix[1] = cv::saturate_cast<uint8>
+          (inpPix[0] * matr[4] + inpPix[1] * matr[5] + inpPix[2] * matr[6] + matr[7]);
+        outPix[2] = cv::saturate_cast<uint8>
+          (inpPix[0] * matr[8] + inpPix[1] * matr[9] + inpPix[2] * matr[10] + matr[11]);
+      }
+    }
+  }
+
+  return DALISuccess;
+}
+
 }  // namespace dali
diff --git a/dali/image/transform.h b/dali/image/transform.h
@@ -15,8 +15,10 @@
 #ifndef DALI_IMAGE_TRANSFORM_H_
 #define DALI_IMAGE_TRANSFORM_H_
 
+#include <string>
 #include "dali/common.h"
 #include "dali/error_handling.h"
+#include "dali/pipeline/data/tensor.h"
 
 namespace dali {
 
@@ -31,7 +33,7 @@ namespace dali {
  * this temporary workspace pointer to avoid extra memory allocation. The size
  * of the memory pointed to by 'workspace' should be rsz_h*rsz_w*C bytes
  *
- * Note: We leave the calculate of the resize dimesions & the decision of whether
+ * Note: We leave the calculate of the resize dimensions & the decision of whether
  * to mirror the image or not external to the function. With the GPU version of
  * this function, these params will need to have been calculated before-hand
  * and, in the case of a batched call, copied to the device. Separating these
@@ -65,6 +67,11 @@ DALIError_t FastResizeCropMirrorHost(const uint8 *img, int H, int W, int C,
     int mirror, uint8 *out_img, DALIInterpType type = DALI_INTERP_LINEAR,
     uint8 *workspace = nullptr);
 
+void CheckParam(const Tensor<CPUBackend> &input,  const std::string &pOperator);
+
+DALIError_t MakeColorTransformation(const uint8 *img, int H, int W, int C,
+                                    const float *matrix, uint8 *out_img);
+
 }  // namespace dali
 
 #endif  // DALI_IMAGE_TRANSFORM_H_
diff --git a/dali/pipeline/data/allocator.h b/dali/pipeline/data/allocator.h
@@ -75,7 +75,7 @@ DALI_DECLARE_OPTYPE_REGISTRY(GPUAllocator, GPUAllocator);
 
 #define DALI_REGISTER_GPU_ALLOCATOR(OpName, OpType) \
   DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType,     \
-      dali::GPUAllocator, dali::GPUAllocator)
+      dali::GPUAllocator, dali::GPUAllocator, "GPU_Allocator")
 
 
 /**
@@ -99,7 +99,7 @@ DALI_DECLARE_OPTYPE_REGISTRY(CPUAllocator, CPUAllocator);
 
 #define DALI_REGISTER_CPU_ALLOCATOR(OpName, OpType) \
   DALI_DEFINE_OPTYPE_REGISTERER(OpName, OpType,     \
-      dali::CPUAllocator, dali::CPUAllocator)
+      dali::CPUAllocator, dali::CPUAllocator, "CPU_Allocator")
 
 /**
  * @brief Pinned memory CPU allocator