From c514ea5f87c64f0180126d1ef2d370a250c15bdc Mon Sep 17 00:00:00 2001 From: chenenquan Date: Tue, 14 Sep 2021 11:22:32 +0800 Subject: [PATCH] =?UTF-8?q?Add=20api=20paddle.device.cuda.empty=5Fcache=20?= =?UTF-8?q?to=20release=20idle=20gpu=20memory=20hold=20by=20allocator?= =?UTF-8?q?=E3=80=82=20(#35427)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Fix test coverage problem for empty_cache * delete redundant check for empty_cache * fix the problem of empty_cache's doc * delete the nvidia-smi comment in doc of empty_cache, test=document_fix --- paddle/fluid/platform/gpu_info.cc | 9 +++++++ paddle/fluid/platform/gpu_info.h | 3 +++ paddle/fluid/pybind/pybind.cc | 1 + python/paddle/device/cuda/__init__.py | 24 +++++++++++++++++ .../tests/unittests/test_cuda_empty_cache.py | 27 +++++++++++++++++++ 5 files changed, 64 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index fda233b3a016b..76edb3910ccce 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -22,10 +22,12 @@ limitations under the License. */ #else #include "paddle/fluid/platform/dynload/cudnn.h" #endif +#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/monitor.h" +#include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/split.h" DECLARE_double(fraction_of_gpu_memory_to_use); @@ -630,5 +632,12 @@ bool IsCudaMallocRecorded(int dev_id) { return RecordedCudaMallocHelper::Instance(dev_id)->NeedRecord(); } +void EmptyCache(void) { + std::vector devices = GetSelectedDevices(); + for (auto device : devices) { + memory::Release(CUDAPlace(device)); + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h index b5800ef083885..ef7f93a61dbfb 100644 --- a/paddle/fluid/platform/gpu_info.h +++ b/paddle/fluid/platform/gpu_info.h @@ -137,6 +137,9 @@ uint64_t RecordedCudaMallocSize(int dev_id); bool IsCudaMallocRecorded(int dev_id); +//! Empty idle cached memory held by the allocator. +void EmptyCache(void); + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b0148e50afc54..f797ed5142c3d 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -2254,6 +2254,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("op_support_gpu", OpSupportGPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("get_cuda_device_count", platform::GetCUDADeviceCount); + m.def("cuda_empty_cache", platform::EmptyCache); #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32) m.def("nvprof_init", platform::CudaProfilerInit); diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py index 834cda71fdc5f..be2e2488a3049 100644 --- a/python/paddle/device/cuda/__init__.py +++ b/python/paddle/device/cuda/__init__.py @@ -23,6 +23,7 @@ 'current_stream', 'synchronize', 'device_count', + 'empty_cache', ] @@ -117,3 +118,26 @@ def device_count(): core, 'get_cuda_device_count') else 0 return num_gpus + + +def empty_cache(): + """ + Releases idle cached memory held by the allocator so that those can be used in other GPU + application and visible in `nvidia-smi`. In most cases you don't need to use this function, + Paddle does not release the memory back to the OS when you remove Tensors on the GPU, + Because it keeps gpu memory in a pool so that next allocations can be done much faster. + + Examples: + .. code-block:: python + + import paddle + + # required: gpu + paddle.set_device("gpu") + tensor = paddle.randn([512, 512, 512], "float") + del tensor + paddle.device.cuda.empty_cache() + """ + + if core.is_compiled_with_cuda(): + core.cuda_empty_cache() diff --git a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py new file mode 100644 index 0000000000000..4aefb234bbfc1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py @@ -0,0 +1,27 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import unittest + + +class TestEmptyCache(unittest.TestCase): + def test_empty_cache(self): + x = paddle.randn((2, 10, 12)).astype('float32') + del x + self.assertIsNone(paddle.device.cuda.empty_cache()) + + +if __name__ == '__main__': + unittest.main()