PaddlePaddle · zhwesky2010 · Aug 4, 2021 · Jul 27, 2021
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
@@ -1400,20 +1400,26 @@ void BindImperative(py::module *m_ptr) {
 
       )DOC")
       .def("cuda",
-           [](const std::shared_ptr<imperative::VarBase> &self, int device_id,
-              bool blocking) {
+           [](const std::shared_ptr<imperative::VarBase> &self,
+              py::handle &handle, bool blocking) {
 #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
              PADDLE_THROW(platform::errors::PermissionDenied(
                  "Cannot copy this Tensor to GPU in CPU version Paddle, "
                  "Please recompile or reinstall Paddle with CUDA support."));
 #else
              int device_count = platform::GetCUDADeviceCount();
-             if (device_id == -1) {
+             int device_id = 0;
+             if (handle == py::none()) {
                if (platform::is_gpu_place(self->Place())) {
                  return self;
-               } else {
-                 device_id = 0;
                }
+             } else {
+               PyObject *py_obj = handle.ptr();
+               PADDLE_ENFORCE_EQ(
+                   PyCheckInteger(py_obj), true,
+                   platform::errors::InvalidArgument(
+                       " 'device_id' must be a positive integer"));
+               device_id = py::cast<int>(handle);
              }
              PADDLE_ENFORCE_GE(
                  device_id, 0,
@@ -1437,26 +1443,30 @@ void BindImperative(py::module *m_ptr) {
              }
 #endif
            },
-           py::arg("device_id") = -1, py::arg("blocking") = true, R"DOC(
+           py::arg("device_id") = py::none(), py::arg("blocking") = true, R"DOC(
         Returns a copy of this Tensor in GPU memory.
 
         If this Tensor is already in GPU memory and device_id is default, 
         then no copy is performed and the original Tensor is returned.
 
         Args:
-            device_id(int, optional): The destination GPU device id. Defaults to the current device.
+            device_id(int, optional): The destination GPU device id. Default: None, means current device.
             blocking(bool, optional): If False and the source is in pinned memory, the copy will be 
               asynchronous with respect to the host. Otherwise, the argument has no effect. Default: False.
 
         Examples:
             .. code-block:: python
 
+              # required: gpu
               import paddle
               x = paddle.to_tensor(1.0, place=paddle.CPUPlace())
               print(x.place)        # CPUPlace
 
               y = x.cuda()
               print(y.place)        # CUDAPlace(0)
+
+              y = x.cuda(None)
+              print(y.place)        # CUDAPlace(0)
 
               y = x.cuda(1)
               print(y.place)        # CUDAPlace(1)

diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py
@@ -72,10 +72,17 @@ def _test_place(place):
                 if core.is_compiled_with_cuda():
                     y = x.pin_memory()
                     self.assertEqual(y.place.__repr__(), "CUDAPinnedPlace")
+                    y = x.cuda()
+                    y = x.cuda(None)
+                    self.assertEqual(y.place.__repr__(), "CUDAPlace(0)")
+                    y = x.cuda(device_id=0)
+                    self.assertEqual(y.place.__repr__(), "CUDAPlace(0)")
                     y = x.cuda(blocking=False)
                     self.assertEqual(y.place.__repr__(), "CUDAPlace(0)")
                     y = x.cuda(blocking=True)
                     self.assertEqual(y.place.__repr__(), "CUDAPlace(0)")
+                    with self.assertRaises(ValueError):
+                        y = x.cuda("test")
 
                 # support 'dtype' is core.VarType
                 x = paddle.rand((2, 2))