#43 fix mapping to EGL in DSCudaMemory

insight-platform · Feb 1, 2023 · 8c922ba · 8c922ba
1 parent 6017ded
commit 8c922ba
Show file tree

Hide file tree

Showing 5 changed files with 74 additions and 89 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,5 +1,5 @@
 **/__pycache__
-.idea/
+**/.idea/
 .vscode/
 .gitignore
 .git
@@ -10,3 +10,4 @@ Makefile
 **/_skbuild/
 **/dist/
 **/*.egg-info/
+**/build/
diff --git a/libs/savantboost/pysavantboost/src/nms.cpp b/libs/savantboost/pysavantboost/src/nms.cpp
@@ -54,15 +54,15 @@ namespace pysavantboost {
 
         gst_buffer_map(buffer, &inmap, GST_MAP_READ);
         auto *inputnvsurface = reinterpret_cast<NvBufSurface *>(inmap.data);
-        DSCudaMemory ds_cuda_memory = DSCudaMemory(inputnvsurface, 0);
+        DSCudaMemory ds_cuda_memory = DSCudaMemory(inputnvsurface, batchID);
         gst_buffer_unmap(buffer, &inmap);
 
         int frame_height = (int) inputnvsurface->surfaceList[batchID].planeParams.height[0];
         int frame_width = (int) inputnvsurface->surfaceList[batchID].planeParams.width[0];
 
         NppiSize ref_frame_size = {frame_width, frame_height};
 
-        ref_frame = ds_cuda_memory.GetMapCudaPtr(batchID);
+        ref_frame = ds_cuda_memory.GetMapCudaPtr();
         RotateBBox rotated_bbox = RotateBBox(left + width/2, top+height/2, width, height, angle);
 
         object_image = rotated_bbox.CutFromFrame(ref_frame, ref_frame_size, padding_width, padding_height);
@@ -98,4 +98,4 @@ namespace pysavantboost {
         m.def("cut_rotated_bbox", &cut_rotated_bbox, "Returns numpy array with rotatted object");
     }
 
-}
+}
diff --git a/libs/savantboost/savantboost/deepstream/nvsurfaceptr.cpp b/libs/savantboost/savantboost/deepstream/nvsurfaceptr.cpp
@@ -14,79 +14,64 @@ GST_DEBUG_CATEGORY_STATIC (gst_dsclcprepro_debug);
     } \
 }
 
-DSCudaMemory::DSCudaMemory(NvBufSurface * surface, bool EglImageMapBuffer){
+DSCudaMemory::DSCudaMemory(NvBufSurface *surface, guint batch_id) {
     GST_DEBUG_CATEGORY_INIT (
             gst_dsclcprepro_debug,
             "dsclcprepro",
             150,
             "dsclcprepro plugin");
-    _EglImageMapBuffer = EglImageMapBuffer;
+    if (!surface) {
+        throw std::invalid_argument("Invalid pointer to NvBufSurface");
+    }
+    if (batch_id >= surface->batchSize) {
+        throw std::invalid_argument("batch_id is out of bound");
+    }
     _surface = surface;
+    _batch_id = batch_id;
 }
 
-Npp8u* DSCudaMemory::GetMapCudaPtr(guint batch_id){
-    Npp8u* frame_ptr = nullptr;
-    Npp8u* _gpu_frame = nullptr;
-    Npp8u* _gpu_frame_1 = nullptr;
+Npp8u *DSCudaMemory::GetMapCudaPtr() {
+    Npp8u *frame_ptr = nullptr;
     cudaEglFrame cudaEgl;
     cudaError_t err;
-    unsigned long int buffer_size;
-    int status;
+    NvBufSurfaceParams &surface = _surface->surfaceList[_batch_id];
 
     switch (_surface->memType) {
 
         case NVBUF_MEM_SURFACE_ARRAY:
-            if (_EglImageMapBuffer){
-                GST_WARNING("Interaction with the buffers directly on the GPU of Jetson NX using NvBufSurfaceMapEglImage "
-                            "unfortunately does not work correctly on long videos and long processing time. "
-                            "This can lead to errors and crashes of the pipeline.");
-                if (NvBufSurfaceMapEglImage (_surface, 0) !=0 ) {
+            if (_egl_frame_ptr != nullptr) {
+                return _egl_frame_ptr;
+            }
+            if (!surface.mappedAddr.eglImage) {
+                if (NvBufSurfaceMapEglImage(_surface, _batch_id) != 0) {
                     GST_ERROR("Error NvBufSurfaceMapEglImage");
                     goto error;
                 }
+            }
+            if (_pResource == nullptr) {
                 err = cudaGraphicsEGLRegisterImage(
-                    &_pResource,
-                    _surface->surfaceList[batch_id].mappedAddr.eglImage,
-                    cudaGraphicsRegisterFlagsNone
+                        &_pResource,
+                        surface.mappedAddr.eglImage,
+                        cudaGraphicsRegisterFlagsNone
                 );
                 if (err != cudaSuccess) {
                     GST_ERROR("Error cudaGraphicsEGLRegisterImage");
                     goto error;
                 }
-
-                err = cudaGraphicsResourceGetMappedEglFrame(&cudaEgl, _pResource, 0, 0);
-                if (err != cudaSuccess) {
-                    GST_ERROR("Error cudaGraphicsResourceGetMappedEglFrame");
-                    goto error;
-                }
-
-                size_t inputSize;
-                cudaGraphicsResourceGetMappedPointer((void **) &frame_ptr, &inputSize, _pResource);
             }
-            else {
-                if (this->mapped_surface.find(batch_id) == this->mapped_surface.end()) {
-                    status = NvBufSurfaceMap(_surface, -1, -1, NVBUF_MAP_READ_WRITE);
-                    if (status != 0) GST_ERROR("Mapping error NvBufSurfaceMap");
-                    NvBufSurfaceSyncForCpu(_surface, -1, -1);
-                    cudaCheckError()
-                    if (status != 0) GST_ERROR("Mapping error NvBufSurfaceSyncForCpu");
-                    buffer_size = _surface->surfaceList[batch_id].width * _surface->surfaceList[batch_id].height * sizeof(Npp32u);
-                    cudaMalloc((void **) &_gpu_frame, buffer_size);
-                    cudaCheckError()
-                    cudaMemcpy(_gpu_frame, _surface->surfaceList[batch_id].mappedAddr.addr[0], buffer_size, cudaMemcpyHostToDevice);
-                    cudaCheckError()
-                    this->mapped_surface[batch_id] = _gpu_frame;
-                    frame_ptr = _gpu_frame;
-                }
-                else
-                {
-                    frame_ptr = this->mapped_surface[batch_id];
-                }
 
+            err = cudaGraphicsResourceGetMappedEglFrame(&cudaEgl, _pResource, 0, 0);
+            if (err != cudaSuccess) {
+                GST_ERROR("Error cudaGraphicsResourceGetMappedEglFrame");
+                goto error;
             }
+
+            size_t inputSize;
+            cudaGraphicsResourceGetMappedPointer((void **) &frame_ptr, &inputSize, _pResource);
+            _egl_frame_ptr = frame_ptr;
             break;
         case NVBUF_MEM_CUDA_DEVICE: case NVBUF_MEM_CUDA_UNIFIED:
-            frame_ptr = (Npp8u*) _surface->surfaceList[batch_id].dataPtr;
+            frame_ptr = (Npp8u *) surface.dataPtr;
             break;
         default:
             GST_ERROR("Not supported memory type");
@@ -99,35 +84,32 @@ Npp8u* DSCudaMemory::GetMapCudaPtr(guint batch_id){
 }
 
 void DSCudaMemory::UnMapCudaPtr() {
-    cudaError_t err;
-    Npp8u* _gpu_frame = nullptr;
-    int status;
-    if (_surface != nullptr)
-        if (_surface->memType == NVBUF_MEM_SURFACE_ARRAY){
-            if (_EglImageMapBuffer){
-                if (NvBufSurfaceUnMapEglImage(_surface, 0) !=0) GST_ERROR("Error NvBufSurfaceUnMapEglImage");
+    if (_surface->memType == NVBUF_MEM_SURFACE_ARRAY) {
+        if (_pResource != nullptr) {
+            if (cudaGraphicsUnregisterResource(_pResource) != cudaSuccess) {
+                GST_ERROR("Error cudaGraphicsUnregisterResource");
+                return;
             }
-            else {
-                guint batch_id;
-                for (std::pair<guint, Npp8u *> element : this->mapped_surface)
-                {
-                    batch_id = element.first;
-                    unsigned long int buffer_size = _surface->surfaceList[batch_id].width * _surface->surfaceList[batch_id].height * sizeof(Npp32u);
-                    _gpu_frame = (Npp8u*) element.second;
-                    err = cudaMemcpy(
-                      (void*) _surface->surfaceList[batch_id].mappedAddr.addr[0], 
-                        _gpu_frame, 
-                        buffer_size,
-                        cudaMemcpyDeviceToHost
-                    );
-                    if (err != cudaSuccess) GST_ERROR("Error cudaMemcpy");
-                    cudaFree(_gpu_frame);  
-                }
-                this->mapped_surface.erase(this->mapped_surface.begin(), this->mapped_surface.end());
-                NvBufSurfaceSyncForDevice(_surface, -1, -1);
-                status = NvBufSurfaceUnMap(_surface, -1, -1);
-                if (status!=0) GST_ERROR("UnMapping error NvBufSurfaceUnMap");
+            _pResource = nullptr;
+            _egl_frame_ptr = nullptr;
+        }
+        if (_surface->surfaceList[_batch_id].mappedAddr.eglImage) {
+            if (NvBufSurfaceUnMapEglImage(_surface, _batch_id) != 0) {
+                GST_ERROR("Error NvBufSurfaceUnMapEglImage");
+                return;
             }
         }
+    }
 }
 
+guint DSCudaMemory::width() {
+    return _surface->surfaceList[_batch_id].width;
+}
+
+guint DSCudaMemory::height() {
+    return _surface->surfaceList[_batch_id].height;
+}
+
+guint DSCudaMemory::size() {
+    return _surface->surfaceList[_batch_id].dataSize;
+}
diff --git a/libs/savantboost/savantboost/deepstream/nvsurfaceptr.h b/libs/savantboost/savantboost/deepstream/nvsurfaceptr.h
@@ -6,17 +6,19 @@
 #include "nvbufsurface.h"
 #include <unordered_map>
 
-class DSCudaMemory{
+class DSCudaMemory {
 private:
-    struct cudaGraphicsResource* _pResource = nullptr;
+    struct cudaGraphicsResource *_pResource = nullptr;
+    Npp8u *_egl_frame_ptr = nullptr;
     guint _batch_id;
-    bool _EglImageMapBuffer;
     NvBufSurface *_surface;
-    std::unordered_map<guint, Npp8u *> mapped_surface;
 public:
-    DSCudaMemory(NvBufSurface * surface, bool EglImageMapBuffer);
-    Npp8u* GetMapCudaPtr(guint batch_id);
+    DSCudaMemory(NvBufSurface *surface, guint batch_id);
+    Npp8u *GetMapCudaPtr();
     void UnMapCudaPtr();
+    guint width();
+    guint height();
+    guint size();
 };
 
 

diff --git a/libs/savantboost/savantboost/deepstream/preprocessing.cpp b/libs/savantboost/savantboost/deepstream/preprocessing.cpp
@@ -108,7 +108,6 @@ GstFlowReturn ObjectsPreprocessing::preprocessing(
     if (status)
     {
         surface = (NvBufSurface *) in_map_info.data;
-        DSCudaMemory ds_cuda_memory = DSCudaMemory(surface, false);
         batch_meta = gst_buffer_get_nvds_batch_meta(inbuf);
 
         if (batch_meta == nullptr) {
@@ -119,11 +118,12 @@ GstFlowReturn ObjectsPreprocessing::preprocessing(
 
         for (frame_meta_list_item = batch_meta->frame_meta_list; frame_meta_list_item != nullptr; frame_meta_list_item = frame_meta_list_item->next)
         {
+            DSCudaMemory ds_cuda_memory = DSCudaMemory(surface, frame_meta->batch_id);
             frame_meta = (NvDsFrameMeta *) (frame_meta_list_item->data);
             frame_height = (int) surface->surfaceList[frame_meta->batch_id].planeParams.height[0];
             frame_width = (int) surface->surfaceList[frame_meta->batch_id].planeParams.width[0];
             ref_frame_size = {frame_width, frame_height};
-            ref_frame = ds_cuda_memory.GetMapCudaPtr(frame_meta->batch_id);
+            ref_frame = ds_cuda_memory.GetMapCudaPtr();
             const size_t ref_image_bytes= ref_frame_size.width*ref_frame_size.height*sizeof(Npp8u)*4;
 
             cudaMalloc((void **)&copy_frame, ref_image_bytes);
@@ -218,9 +218,9 @@ GstFlowReturn ObjectsPreprocessing::preprocessing(
                     if (preproc_object!=nullptr) delete preproc_object;
                 }
             }
-
+
+            ds_cuda_memory.UnMapCudaPtr();
         }
-        ds_cuda_memory.UnMapCudaPtr();
         gst_buffer_unmap (inbuf, &in_map_info);
         return GST_FLOW_OK;
     }
@@ -250,14 +250,14 @@ GstFlowReturn ObjectsPreprocessing::restore_frame(GstBuffer* gst_buffer){
     {
         batch_meta = gst_buffer_get_nvds_batch_meta(gst_buffer);
         surface = (NvBufSurface *) in_map_info.data;
-        DSCudaMemory ds_cuda_memory = DSCudaMemory(surface, false);
         for (frame_list_item = batch_meta->frame_meta_list; frame_list_item != nullptr; frame_list_item = frame_list_item->next)
         {
+            DSCudaMemory ds_cuda_memory = DSCudaMemory(surface, frame_meta->batch_id);
             frame_meta = (NvDsFrameMeta *) (frame_list_item->data);
             frame_height = (int) surface->surfaceList[frame_meta->batch_id].planeParams.height[0];
             frame_width = (int) surface->surfaceList[frame_meta->batch_id].planeParams.width[0];
             frame_size = {frame_width, frame_height};
-            frame = ds_cuda_memory.GetMapCudaPtr(frame_meta->batch_id);
+            frame = ds_cuda_memory.GetMapCudaPtr();
             frame_bytes = frame_width*frame_height*sizeof(Npp8u)*4;
 
             ref_frame = (Npp8u *) frames_map[(size_t) gst_buffer][frame_meta->batch_id];
@@ -267,8 +267,8 @@ GstFlowReturn ObjectsPreprocessing::restore_frame(GstBuffer* gst_buffer){
             cudaFree(ref_frame);
             cudaCheckError()
             frames_map[(size_t) gst_buffer].erase(frame_meta->batch_id);
+            ds_cuda_memory.UnMapCudaPtr();
         }
-        ds_cuda_memory.UnMapCudaPtr();
         if (frames_map[(size_t) gst_buffer].empty())
         {
             frames_map.erase((size_t) gst_buffer);