polish code

LielinJiang · Apr 28, 2021 · 66ccc97 · 66ccc97
1 parent be2fcc6
commit 66ccc97
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 39 deletions.
diff --git a/paddle/fluid/operators/decode_jpeg_op.cc b/paddle/fluid/operators/decode_jpeg_op.cc
@@ -43,7 +43,20 @@ class DecodeJpegOp : public framework::OperatorWithKernel {
     OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "DecodeJpeg");
     OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "DecodeJpeg");
 
-    auto out_dims = std::vector<int>(1, -1);
+    auto mode = ctx->Attrs().Get<std::string>("mode");
+    std::vector<int> out_dims;
+
+    if (mode == "unchanged") {
+      out_dims = {-1, -1, -1};
+    } else if (mode == "gray") {
+      out_dims = {1, -1, -1};
+    } else if (mode == "rgb") {
+      out_dims = {3, -1, -1};
+    } else {
+      PADDLE_THROW(platform::errors::Fatal(
+          "The provided mode is not supported for JPEG files on GPU: ", mode));
+    }
+
     ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
   }
 
@@ -74,9 +87,10 @@ class DecodeJpegOpMaker : public framework::OpProtoAndCheckerMaker {
              "of the JPEG image. It is a tensor with rank 1.");
     AddOutput("Out", "The output tensor of DecodeJpeg op");
     AddComment(R"DOC(
-This operator decode a JPEG image into a 3 dimensional RGB Tensor.
-Optionally converts the image to the desired format. The values of 
-the output tensor are uint8 between 0 and 255.
+This operator decodes a JPEG image into a 3 dimensional RGB Tensor 
+or 1 dimensional Gray Tensor. Optionally converts the image to the 
+desired format. The values of the output tensor are uint8 between 0 
+and 255.
 )DOC");
     AddAttr<std::string>(
         "mode",

diff --git a/paddle/fluid/operators/decode_jpeg_op.cu b/paddle/fluid/operators/decode_jpeg_op.cu
@@ -18,13 +18,15 @@
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/dynload/nvjpeg.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/stream/cuda_stream.h"
 
 namespace paddle {
 namespace operators {
 
+static cudaStream_t nvjpeg_stream = nullptr;
 static nvjpegHandle_t nvjpeg_handle = nullptr;
 
-void init_nvjpegImage(nvjpegImage_t* img) {
+void InitNvjpegImage(nvjpegImage_t* img) {
   for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
     img->channel[c] = nullptr;
     img->pitch[c] = 0;
@@ -72,57 +74,58 @@ class GPUDecodeJpegKernel : public framework::OpKernel<T> {
     int width = widths[0];
     int height = heights[0];
 
-    nvjpegOutputFormat_t outputFormat;
-    int outputComponents;
+    nvjpegOutputFormat_t output_format;
+    int output_components;
 
     auto mode = ctx.Attr<std::string>("mode");
     if (mode == "unchanged") {
       if (components == 1) {
-        outputFormat = NVJPEG_OUTPUT_Y;
-        outputComponents = 1;
+        output_format = NVJPEG_OUTPUT_Y;
+        output_components = 1;
       } else if (components == 3) {
-        outputFormat = NVJPEG_OUTPUT_RGB;
-        outputComponents = 3;
+        output_format = NVJPEG_OUTPUT_RGB;
+        output_components = 3;
       } else {
         platform::dynload::nvjpegJpegStateDestroy(nvjpeg_state);
         PADDLE_THROW(platform::errors::Fatal(
             "The provided mode is not supported for JPEG files on GPU"));
       }
     } else if (mode == "gray") {
-      outputFormat = NVJPEG_OUTPUT_Y;
-      outputComponents = 1;
+      output_format = NVJPEG_OUTPUT_Y;
+      output_components = 1;
     } else if (mode == "rgb") {
-      outputFormat = NVJPEG_OUTPUT_RGB;
-      outputComponents = 3;
+      output_format = NVJPEG_OUTPUT_RGB;
+      output_components = 3;
     } else {
       platform::dynload::nvjpegJpegStateDestroy(nvjpeg_state);
-      PADDLE_ENFORCE_EQ(
-          false, true,
-          platform::errors::Fatal(
-              "The provided mode is not supported for JPEG files on GPU"));
+      PADDLE_THROW(platform::errors::Fatal(
+          "The provided mode is not supported for JPEG files on GPU"));
     }
 
-    nvjpegImage_t outImage;
-    init_nvjpegImage(&outImage);
+    nvjpegImage_t out_image;
+    InitNvjpegImage(&out_image);
 
-    auto stream = ctx.cuda_device_context().stream();
+    // create nvjpeg stream
+    if (nvjpeg_stream == nullptr) {
+      cudaStreamCreateWithFlags(&nvjpeg_stream, cudaStreamNonBlocking);
+    }
 
     int sz = widths[0] * heights[0];
 
     auto* out = ctx.Output<framework::LoDTensor>("Out");
-    std::vector<int64_t> out_shape = {outputComponents, height, width};
+    std::vector<int64_t> out_shape = {output_components, height, width};
     out->Resize(framework::make_ddim(out_shape));
 
     T* data = out->mutable_data<T>(ctx.GetPlace());
 
-    for (int c = 0; c < outputComponents; c++) {
-      outImage.channel[c] = data + c * sz;
-      outImage.pitch[c] = width;
+    for (int c = 0; c < output_components; c++) {
+      out_image.channel[c] = data + c * sz;
+      out_image.pitch[c] = width;
     }
 
     nvjpegStatus_t decode_status = platform::dynload::nvjpegDecode(
-        nvjpeg_handle, nvjpeg_state, x_data, x->numel(), outputFormat,
-        &outImage, stream);
+        nvjpeg_handle, nvjpeg_state, x_data, x->numel(), output_format,
+        &out_image, nvjpeg_stream);
   }
 };
 

diff --git a/paddle/fluid/operators/read_file_op.cc b/paddle/fluid/operators/read_file_op.cc
@@ -29,25 +29,20 @@ class CPUReadFileKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto filename = ctx.Attr<std::string>("filename");
-    std::vector<char> image_data;
+
     std::ifstream input(filename.c_str(),
                         std::ios::in | std::ios::binary | std::ios::ate);
     std::streamsize file_size = input.tellg();
 
     input.seekg(0, std::ios::beg);
 
-    if ((int64_t)image_data.size() < (int64_t)file_size) {
-      image_data.resize(file_size);
-    }
-
-    input.read(image_data.data(), file_size);
-
     auto* out = ctx.Output<framework::LoDTensor>("Out");
     std::vector<int64_t> out_shape = {file_size};
     out->Resize(framework::make_ddim(out_shape));
 
     uint8_t* data = out->mutable_data<T>(ctx.GetPlace());
-    std::memcpy(data, reinterpret_cast<uint8_t*>(image_data.data()), file_size);
+
+    input.read(reinterpret_cast<char*>(data), file_size);
   }
 };
 

diff --git a/python/paddle/tests/test_read_file.py b/python/paddle/tests/test_read_file.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py
@@ -834,8 +834,8 @@ def read_file(filename, name=None):
 
 def decode_jpeg(x, mode='unchanged', name=None):
     """
-    Decodes a JPEG image into a 3 dimensional RGB Tensor.
-    Optionally converts the image to the desired format.
+    Decodes a JPEG image into a 3 dimensional RGB Tensor or 1 dimensional Gray Tensor. 
+    Optionally converts the image to the desired format. 
     The values of the output tensor are uint8 between 0 and 255.
 
     Args: