Add Python Module interface for MPS backend (#251)

razarmehr · razarmehr · commit cbe9530b3edc · 2023-02-08T17:47:14.000-05:00
- Enable global manual seeding via torch.manual_seed() + test case
- Add torch.mps.synchronize() to wait for MPS stream to finish + test case
- Enable the following python interfaces for MPS:
torch.mps.get_rng_state()
torch.mps.set_rng_state()
torch.mps.is_available()
torch.mps.synchronize()
torch.mps.manual_seed()
torch.mps.seed()
torch.mps.is_initialized()
torch.mps.init()
diff --git a/aten/src/ATen/detail/MPSHooksInterface.h b/aten/src/ATen/detail/MPSHooksInterface.h
@@ -28,13 +28,21 @@ struct TORCH_API MPSHooksInterface {
     return false;
   }
 
+  virtual bool isOnMacOS13orNewer() const {
+    return false;
+  }
+
   virtual const Generator& getDefaultMPSGenerator() const {
     AT_ERROR("Cannot get default MPS generator without MPS backend.");
   }
 
   virtual Allocator* getMPSDeviceAllocator() const {
     AT_ERROR("MPSDeviceAllocator requires MPS.");
   }
+
+  virtual void deviceSynchronize() const {
+    TORCH_CHECK(false, "Cannot synchronize MPS device without MPS backend. ");
+  }
 };
 
 struct TORCH_API MPSHooksArgs {};
diff --git a/aten/src/ATen/mps/MPSDevice.h b/aten/src/ATen/mps/MPSDevice.h
@@ -72,7 +72,7 @@ class TORCH_API MPSDevice {
 
 TORCH_API bool is_available();
 TORCH_API bool is_macos_13_or_newer();
-
+TORCH_API void device_synchronize();
 TORCH_API at::Allocator* GetMPSAllocator(bool useSharedAllocator = false);
 
 } // namespace mps
diff --git a/aten/src/ATen/mps/MPSDevice.mm b/aten/src/ATen/mps/MPSDevice.mm
@@ -3,6 +3,7 @@
 #include <c10/util/CallOnce.h>
 
 #include <ATen/mps/MPSDevice.h>
+#include <ATen/mps/MPSStream.h>
 #include <ATen/mps/MPSAllocatorInterface.h>
 #include <ATen/mps/IndexKernels.h>
 
@@ -107,5 +108,9 @@ bool is_macos_13_or_newer() {
   return MPSDevice::getInstance()->isMacOS13Plus();
 }
 
+void device_synchronize() {
+  getDefaultMPSStream()->synchronize(SyncType::COMMIT_AND_WAIT);
+}
+
 } // namespace mps
 } // namespace at
diff --git a/aten/src/ATen/mps/MPSHooks.cpp b/aten/src/ATen/mps/MPSHooks.cpp
@@ -16,6 +16,10 @@ bool MPSHooks::hasMPS() const {
   return at::mps::is_available();
 }
 
+bool MPSHooks::isOnMacOS13orNewer() const {
+  return at::mps::is_macos_13_or_newer();
+}
+
 Allocator* MPSHooks::getMPSDeviceAllocator() const {
   return at::mps::GetMPSAllocator();
 }
@@ -24,6 +28,10 @@ const Generator& MPSHooks::getDefaultMPSGenerator() const {
   return at::mps::detail::getDefaultMPSGenerator();
 }
 
+void MPSHooks::deviceSynchronize() const {
+  at::mps::device_synchronize();
+}
+
 using at::MPSHooksRegistry;
 using at::RegistererMPSHooksRegistry;
 
diff --git a/aten/src/ATen/mps/MPSHooks.h b/aten/src/ATen/mps/MPSHooks.h
@@ -13,8 +13,10 @@ struct MPSHooks : public at::MPSHooksInterface {
   MPSHooks(at::MPSHooksArgs) {}
   void initMPS() const override;
   bool hasMPS() const override;
+  bool isOnMacOS13orNewer() const override;
   Allocator* getMPSDeviceAllocator() const override;
   const Generator& getDefaultMPSGenerator() const override;
+  void deviceSynchronize() const override;
 };
 
 }} // at::mps
diff --git a/build_variables.bzl b/build_variables.bzl
@@ -715,6 +715,7 @@ torch_cpp_srcs = [
     "torch/csrc/api/src/imethod.cpp",
     "torch/csrc/api/src/jit.cpp",
     "torch/csrc/api/src/serialize.cpp",
+    "torch/csrc/api/src/mps.cpp",
     "torch/csrc/api/src/nn/init.cpp",
     "torch/csrc/api/src/nn/module.cpp",
     "torch/csrc/api/src/nn/modules/_functions.cpp",
@@ -821,6 +822,7 @@ libtorch_python_core_sources = [
     "torch/csrc/dynamo/guards.cpp",
     "torch/csrc/dynamo/init.cpp",
     "torch/csrc/functorch/init.cpp",
+    "torch/csrc/mps/Module.cpp",
     "torch/csrc/jit/backends/backend_init.cpp",
     "torch/csrc/jit/python/init.cpp",
     "torch/csrc/jit/passes/onnx.cpp",
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -25,6 +25,7 @@
 from torch.testing import make_tensor
 from torch.testing._comparison import TensorLikePair
 from torch.testing._internal.common_dtype import get_all_dtypes, integral_types
+import torch.mps
 import torch.backends.mps
 from torch.distributions import Uniform, Exponential
 from functools import partial
@@ -5741,6 +5742,45 @@ def test_mps_generator(self):
         mps_x = torch.randn(5, device='mps', generator=g_mps)
         self.assertEqual(mps_x, mps_y)
 
+    def test_default_mps_generator(self):
+        # manual seeding on the "default" MPS generator using
+        # the global torch.manual_seed()
+        torch.manual_seed(230)
+        mps_x = torch.randn(5, device='mps')
+        # manual seeding using torch.mps.manual_seed()
+        # which should set the "default" MPS generator
+        # like the global torch.manual_seed()
+        torch.mps.manual_seed(230)
+        mps_y = torch.randn(5, device='mps')
+        # seed values were the same, so the random tensor contents should match
+        self.assertEqual(mps_x, mps_y)
+
+        # save the default generator's state to restore it later
+        g_state = torch.mps.get_rng_state()
+
+        # generate random numbers without seeding
+        mps_x = torch.randn(5, device='mps')
+        # in this case, the random results must differ from the last generated random results
+        self.assertNotEqual(mps_x, mps_y)
+
+        # restore the previously saved state, and the results should match again
+        torch.mps.set_rng_state(g_state)
+        mps_x = torch.randn(5, device='mps')
+        self.assertEqual(mps_x, mps_y)
+
+    def test_device_synchronize(self):
+        # just running some ops each followed by a synchronize to wait for
+        # MPS stream to finish running each of them
+        net1 = torch.nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)\
+            .to(device='mps', dtype=torch.float)
+
+        x = torch.rand(1, 128, 6, 6, device='mps', dtype=torch.float, requires_grad=True)
+        torch.mps.synchronize()
+        x = net1(x)
+        torch.mps.synchronize()
+        x.backward(torch.randn_like(x))
+        torch.mps.synchronize()
+
     # Test random_.to and random_.from_int
     def test_random(self):
         def helper(shape, low, high, dtype=torch.int32):
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
@@ -904,8 +904,6 @@ def _disabled_torch_function_impl(func: Callable, types: Iterable[Type], args: T
 def _disabled_torch_dispatch_impl(func: Callable, types: Iterable[Type], args: Tuple, kwargs: Dict) -> Any: ...  # THPModule_disable_dispatch_function
 def _get_linalg_preferred_backend() -> torch._C._LinalgBackend: ...
 def _set_linalg_preferred_backend(arg: torch._C._LinalgBackend): ...
-def _is_mps_available() -> _bool: ...
-def _is_mps_on_macos_13_or_newer() -> _bool: ...
 class _LinalgBackend:
     Default: _LinalgBackend
     Cusolver: _LinalgBackend
@@ -1201,6 +1199,12 @@ class _TensorBase(metaclass=_TensorMeta):
 # Defined in torch/csrc/multiprocessing/init.cpp
 def _multiprocessing_init() -> None: ...
 
+# Defined in torch/csrc/mps/Module.cpp
+def _mps_synchronize() -> None: ...
+def _mps_init() -> None: ...
+def _is_mps_available() -> _bool: ...
+def _is_mps_on_macos_13_or_newer() -> _bool: ...
+
 # Defined in torch/csrc/cuda/Module.cpp
 def _cuda_getCurrentStream(device: _int) -> Tuple: ...
 def _cuda_getCurrentRawStream(device: _int) -> _int: ...
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -87,10 +87,6 @@
 #endif
 #endif
 
-#if defined(USE_MPS)
-#include <ATen/mps/MPSDevice.h>
-#endif
-
 #if defined(USE_VALGRIND)
 #include <callgrind.h>
 #endif
@@ -1219,6 +1215,10 @@ void initIttBindings(PyObject* module);
 } // namespace torch
 #endif
 
+#ifdef USE_MPS
+PyMethodDef* MPSModule_methods();
+#endif
+
 namespace torch {
 void initVerboseBindings(PyObject* module);
 } // namespace torch
@@ -1274,6 +1274,9 @@ PyObject* initModule() {
 #ifdef USE_CUDA
   THPUtils_addPyMethodDefs(methods, THCPModule_methods());
 #endif
+#ifdef USE_MPS
+  THPUtils_addPyMethodDefs(methods, MPSModule_methods());
+#endif
 #if defined(USE_DISTRIBUTED) && defined(USE_C10D)
   THPUtils_addPyMethodDefs(
       methods, torch::distributed::c10d::python_functions());
@@ -1593,15 +1596,6 @@ Call this whenever a new thread is created in order to propagate values from
 
   ASSERT_TRUE(set_module_attr("has_cuda", has_cuda));
   ASSERT_TRUE(set_module_attr("has_mps", has_mps));
-  py_module.def("_is_mps_available", []() { return at::hasMPS(); });
-  py_module.def("_is_mps_on_macos_13_or_newer", []() {
-#ifdef USE_MPS
-    return at::mps::is_macos_13_or_newer();
-#else
-    return false;
-#endif
-  });
-
   ASSERT_TRUE(
       set_module_attr("has_mkldnn", at::hasMKLDNN() ? Py_True : Py_False));
 
diff --git a/torch/csrc/api/include/torch/mps.h b/torch/csrc/api/include/torch/mps.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <torch/csrc/Export.h>
+
+#include <cstddef>
+#include <cstdint>
+
+namespace torch {
+namespace mps {
+
+/// Returns true if MPS device is available.
+bool TORCH_API is_available();
+
+/// Sets the seed for the current GPU.
+void TORCH_API manual_seed(uint64_t seed);
+
+/// Waits for all streams on a MPS device to complete.
+void TORCH_API synchronize();
+
+} // namespace mps
+} // namespace torch
diff --git a/torch/csrc/api/src/mps.cpp b/torch/csrc/api/src/mps.cpp
@@ -0,0 +1,31 @@
+#include <ATen/Context.h>
+#include <c10/util/irange.h>
+
+#include <cstddef>
+
+namespace torch {
+namespace mps {
+
+bool is_available() {
+  return at::detail::getMPSHooks().hasMPS();
+}
+
+/// Sets the seed for the MPS's default generator.
+void manual_seed(uint64_t seed) {
+  if (is_available()) {
+    auto gen = at::detail::getMPSHooks().getDefaultMPSGenerator();
+    {
+      // See Note [Acquire lock when using random generators]
+      std::lock_guard<std::mutex> lock(gen.mutex());
+      gen.set_current_seed(seed);
+    }
+  }
+}
+
+void synchronize() {
+  TORCH_CHECK(is_available(), "No MPS devices are available");
+  at::detail::getMPSHooks().deviceSynchronize()
+}
+
+} // namespace mps
+} // namespace torch
diff --git a/torch/csrc/mps/Module.cpp b/torch/csrc/mps/Module.cpp
@@ -0,0 +1,81 @@
+#include <ATen/ATen.h>
+#include <ATen/mps/MPSAllocatorInterface.h>
+#include <ATen/mps/MPSGeneratorImpl.h>
+
+#include <torch/csrc/Generator.h>
+#include <torch/csrc/python_headers.h>
+#include <torch/csrc/utils/pybind.h>
+#include <torch/csrc/utils/python_numbers.h>
+#include <torch/csrc/utils/python_strings.h>
+
+using namespace torch;
+
+static PyObject* MPSModule_initExtension(PyObject* self, PyObject* noargs) {
+#if C10_ASAN_ENABLED
+  TORCH_WARN(
+      "torch.mps: your pytorch binary has address sanitizer (asan) built in, "
+      "asan is currently not compatible with torch.mps module, "
+      "you might get unexpected behavior (eg. out of memory, crash, etc.), "
+      "please rebuild pytorch without asan if you need to use this module");
+#endif
+  HANDLE_TH_ERRORS
+
+  auto m = THPObjectPtr(PyImport_ImportModule("torch.mps"));
+  if (!m)
+    throw python_error();
+
+  auto set_module_attr = [&](const char* name, PyObject* v) {
+    if (PyObject_SetAttrString(m, name, v) < 0) {
+      throw python_error();
+    }
+  };
+
+  // NOLINTNEXTLINE(performance-unnecessary-copy-initialization)
+  auto gen = at::mps::detail::getDefaultMPSGenerator();
+  auto default_mps_generator = (THPGenerator*)THPGenerator_initDefaultGenerator(gen);
+  set_module_attr("default_generator", (PyObject*) default_mps_generator);
+
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+PyObject* MPSModule_isAvailable(PyObject* _unused, PyObject* noargs) {
+  HANDLE_TH_ERRORS
+  if (at::detail::getMPSHooks().hasMPS()) {
+    Py_RETURN_TRUE;
+  } else {
+    Py_RETURN_FALSE;
+  }
+  END_HANDLE_TH_ERRORS
+}
+
+PyObject* MPSModule_isMacOS13orNewer(PyObject* _unused, PyObject* noargs) {
+  HANDLE_TH_ERRORS
+  if (at::detail::getMPSHooks().isOnMacOS13orNewer()) {
+    Py_RETURN_TRUE;
+  } else {
+    Py_RETURN_FALSE;
+  }
+  END_HANDLE_TH_ERRORS
+}
+
+PyObject* MPSModule_Synchronize(PyObject* _unused, PyObject* noargs) {
+  HANDLE_TH_ERRORS
+  at::detail::getMPSHooks().deviceSynchronize();
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+// NOLINTNEXTLINE(modernize-avoid-c-arrays,
+// cppcoreguidelines-avoid-non-const-global-variables,
+// cppcoreguidelines-avoid-c-arrays)
+static struct PyMethodDef _MPSModule_methods[] = {
+    {"_mps_init", MPSModule_initExtension, METH_NOARGS, nullptr},
+    {"_mps_synchronize", MPSModule_Synchronize, METH_NOARGS, nullptr},
+    {"_is_mps_available", MPSModule_isAvailable, METH_NOARGS, nullptr},
+    {"_is_mps_on_macos_13_or_newer", MPSModule_isMacOS13orNewer, METH_NOARGS, nullptr},
+};
+
+PyMethodDef* MPSModule_methods() {
+  return _MPSModule_methods;
+}
diff --git a/torch/mps/__init__.py b/torch/mps/__init__.py
diff --git a/torch/random.py b/torch/random.py

Original file line number	Diff line number	Diff line change
`@@ -28,13 +28,21 @@ struct TORCH_API MPSHooksInterface {`
`28`	`28`	`return false;`
`29`	`29`	`}`
`30`	`30`
	`31`	`+ virtual bool isOnMacOS13orNewer() const {`
	`32`	`+ return false;`
	`33`	`+ }`
	`34`	`+`
`31`	`35`	`virtual const Generator& getDefaultMPSGenerator() const {`
`32`	`36`	`AT_ERROR("Cannot get default MPS generator without MPS backend.");`
`33`	`37`	`}`
`34`	`38`
`35`	`39`	`virtual Allocator* getMPSDeviceAllocator() const {`
`36`	`40`	`AT_ERROR("MPSDeviceAllocator requires MPS.");`
`37`	`41`	`}`
	`42`	`+`
	`43`	`+ virtual void deviceSynchronize() const {`
	`44`	`+ TORCH_CHECK(false, "Cannot synchronize MPS device without MPS backend. ");`
	`45`	`+ }`
`38`	`46`	`};`
`39`	`47`
`40`	`48`	`struct TORCH_API MPSHooksArgs {};`
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,10 @@ bool MPSHooks::hasMPS() const {`
`16`	`16`	`return at::mps::is_available();`
`17`	`17`	`}`
`18`	`18`
	`19`	`+bool MPSHooks::isOnMacOS13orNewer() const {`
	`20`	`+ return at::mps::is_macos_13_or_newer();`
	`21`	`+}`
	`22`	`+`
`19`	`23`	`Allocator* MPSHooks::getMPSDeviceAllocator() const {`
`20`	`24`	`return at::mps::GetMPSAllocator();`
`21`	`25`	`}`
`@@ -24,6 +28,10 @@ const Generator& MPSHooks::getDefaultMPSGenerator() const {`
`24`	`28`	`return at::mps::detail::getDefaultMPSGenerator();`
`25`	`29`	`}`
`26`	`30`
	`31`	`+void MPSHooks::deviceSynchronize() const {`
	`32`	`+ at::mps::device_synchronize();`
	`33`	`+}`
	`34`	`+`
`27`	`35`	`using at::MPSHooksRegistry;`
`28`	`36`	`using at::RegistererMPSHooksRegistry;`
`29`	`37`