Add api paddle.device.cuda.empty_cache to release idle gpu memory hol…

…d by allocator。 (PaddlePaddle#35427) * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Fix test coverage problem for empty_cache * delete redundant check for empty_cache * fix the problem of empty_cache's doc * delete the nvidia-smi comment in doc of empty_cache, test=document_fix
AnnaTrainingG · Sep 29, 2021 · c514ea5 · c514ea5
1 parent 402673e
commit c514ea5
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 0 deletions.
diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc
@@ -22,10 +22,12 @@ limitations under the License. */
 #else
 #include "paddle/fluid/platform/dynload/cudnn.h"
 #endif
+#include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/lock_guard_ptr.h"
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/monitor.h"
+#include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/split.h"
 
 DECLARE_double(fraction_of_gpu_memory_to_use);
@@ -630,5 +632,12 @@ bool IsCudaMallocRecorded(int dev_id) {
   return RecordedCudaMallocHelper::Instance(dev_id)->NeedRecord();
 }
 
+void EmptyCache(void) {
+  std::vector<int> devices = GetSelectedDevices();
+  for (auto device : devices) {
+    memory::Release(CUDAPlace(device));
+  }
+}
+
 }  // namespace platform
 }  // namespace paddle
diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h
@@ -137,6 +137,9 @@ uint64_t RecordedCudaMallocSize(int dev_id);
 
 bool IsCudaMallocRecorded(int dev_id);
 
+//! Empty idle cached memory held by the allocator.
+void EmptyCache(void);
+
 }  // namespace platform
 }  // namespace paddle
 

diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -2254,6 +2254,7 @@ All parameter, weight, gradient are variables in Paddle.
   m.def("op_support_gpu", OpSupportGPU);
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
+  m.def("cuda_empty_cache", platform::EmptyCache);
 
 #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32)
   m.def("nvprof_init", platform::CudaProfilerInit);

diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py
@@ -23,6 +23,7 @@
     'current_stream',
     'synchronize',
     'device_count',
+    'empty_cache',
 ]
 
 
@@ -117,3 +118,26 @@ def device_count():
         core, 'get_cuda_device_count') else 0
 
     return num_gpus
+
+
+def empty_cache():
+    """
+    Releases idle cached memory held by the allocator so that those can be used in other GPU
+    application and visible in `nvidia-smi`. In most cases you don't need to use this function,
+    Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
+    Because it keeps gpu memory in a pool so that next allocations can be done much faster.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            # required: gpu
+            paddle.set_device("gpu")
+            tensor = paddle.randn([512, 512, 512], "float")
+            del tensor
+            paddle.device.cuda.empty_cache()
+    """
+
+    if core.is_compiled_with_cuda():
+        core.cuda_empty_cache()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import unittest
+
+
+class TestEmptyCache(unittest.TestCase):
+    def test_empty_cache(self):
+        x = paddle.randn((2, 10, 12)).astype('float32')
+        del x
+        self.assertIsNone(paddle.device.cuda.empty_cache())
+
+
+if __name__ == '__main__':
+    unittest.main()