From 7d5c742d1d9a2ac1676811002b1d511ff655dc57 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 27 Nov 2024 16:39:48 -0800 Subject: [PATCH 01/16] add the system class --- cuda_core/cuda/core/experimental/__init__.py | 1 + cuda_core/cuda/core/experimental/_system.py | 66 ++++++++++++++++++++ cuda_core/docs/source/api.rst | 1 + cuda_core/docs/source/api_private.rst | 1 + cuda_core/docs/source/release.md | 2 + cuda_core/docs/source/release/0.2.0-notes.md | 10 +++ cuda_core/tests/test_system.py | 37 +++++++++++ 7 files changed, 118 insertions(+) create mode 100644 cuda_core/cuda/core/experimental/_system.py create mode 100644 cuda_core/docs/source/release/0.2.0-notes.md create mode 100644 cuda_core/tests/test_system.py diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 9b978398..25f5f82c 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -7,3 +7,4 @@ from cuda.core.experimental._launcher import LaunchConfig, launch from cuda.core.experimental._program import Program from cuda.core.experimental._stream import Stream, StreamOptions +from cuda.core.experimental._system import system diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/experimental/_system.py new file mode 100644 index 00000000..58fbd6ae --- /dev/null +++ b/cuda_core/cuda/core/experimental/_system.py @@ -0,0 +1,66 @@ +from typing import Tuple +from cuda import cuda, cudart +from cuda.core.experimental._device import Device +from cuda.core.experimental._utils import handle_return + +class System: + """ Provide information about the cuda system. + This class is a singleton and should not be instantiated directly. + """ + + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(System, cls).__new__(cls) + return cls._instance + + def __init__(self): + if hasattr(self, '_initialized') and self._initialized: + return + self._initialized = True + + @property + def driver_version(self) -> Tuple[int, int]: + """ + Query the CUDA driver version. + + Returns + ------- + tuple of int + A 2-tuple of (major, minor) version numbers. + """ + version = handle_return(cuda.cuDriverGetVersion()) + major = version // 1000 + minor = (version % 1000) // 10 + return (major, minor) + + @property + def num_devices(self) -> int: + """ + Query the number of available GPUs. + + Returns + ------- + int + The number of available GPU devices. + """ + return handle_return(cudart.cudaGetDeviceCount()) + + @property + def devices(self) -> tuple: + """ + Query the available device instances. + + Returns + ------- + tuple of Device + A tuple containing instances of available devices. + """ + total = self.num_devices + return tuple(Device(device_id) for device_id in range(total)) + +system = System() +system.__doc__ = """ +Singleton instance of the :obj:`~cuda.core.experimental._system.System` class. +""" diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index 1cb9811b..3d2a8481 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -16,6 +16,7 @@ CUDA runtime Device launch + system :template: dataclass.rst diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index f100eb7c..a833d69c 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -16,6 +16,7 @@ CUDA runtime _memory.Buffer _stream.Stream _event.Event + _system.System CUDA compilation toolchain diff --git a/cuda_core/docs/source/release.md b/cuda_core/docs/source/release.md index 48e24786..5cbaa7f2 100644 --- a/cuda_core/docs/source/release.md +++ b/cuda_core/docs/source/release.md @@ -6,4 +6,6 @@ maxdepth: 3 --- 0.1.0 + 0.2.0 + ``` diff --git a/cuda_core/docs/source/release/0.2.0-notes.md b/cuda_core/docs/source/release/0.2.0-notes.md new file mode 100644 index 00000000..e1a3c4ec --- /dev/null +++ b/cuda_core/docs/source/release/0.2.0-notes.md @@ -0,0 +1,10 @@ +# `cuda.core` Release notes + +Released on , 2024 + +## Hightlights +- Addition of the system singleton + +## Limitations + + diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py new file mode 100644 index 00000000..548e8685 --- /dev/null +++ b/cuda_core/tests/test_system.py @@ -0,0 +1,37 @@ +# test_System.py + +try: + from cuda.bindings import driver, runtime +except ImportError: + from cuda import cuda as driver + from cuda import cudart as runtime + +from cuda.core.experimental import Device, System + +from cuda.core.experimental import Device +from cuda.core.experimental._utils import handle_return + +def test_System_singleton(): + System1 = System + System2 = System + assert System1 is System2, "System is not a singleton" + +def test_driver_version(): + driver_version = System.driver_version + print(driver_version) + version = handle_return(driver.cuDriverGetVersion()) + expected_driver_version = (version // 1000, (version % 1000) // 10) + assert driver_version == expected_driver_version, "Driver version does not match expected value" + +def test_num_devices(): + num_devices = System.num_devices + expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) + assert num_devices == expected_num_devices, "Number of devices does not match expected value" + +def test_devices(): + devices = System.devices + expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) + expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices)) + assert len(devices) == len(expected_devices), "Number of devices does not match expected value" + for device, expected_device in zip(devices, expected_devices): + assert device.device_id == expected_device.device_id, "Device ID does not match expected value" From 4c4acef6f840ebce13dcf41317f447d448420ae6 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Thu, 28 Nov 2024 16:48:59 -0800 Subject: [PATCH 02/16] fix old test change --- cuda_core/tests/test_system.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index 548e8685..400d5e22 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -1,35 +1,33 @@ -# test_System.py - try: from cuda.bindings import driver, runtime except ImportError: from cuda import cuda as driver from cuda import cudart as runtime -from cuda.core.experimental import Device, System +from cuda.core.experimental import Device, system from cuda.core.experimental import Device from cuda.core.experimental._utils import handle_return -def test_System_singleton(): - System1 = System - System2 = System - assert System1 is System2, "System is not a singleton" +def test_system_singleton(): + system1 = system + system2 = system + assert system1 is system2, "system is not a singleton" def test_driver_version(): - driver_version = System.driver_version + driver_version = system.driver_version print(driver_version) version = handle_return(driver.cuDriverGetVersion()) expected_driver_version = (version // 1000, (version % 1000) // 10) assert driver_version == expected_driver_version, "Driver version does not match expected value" def test_num_devices(): - num_devices = System.num_devices + num_devices = system.num_devices expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) assert num_devices == expected_num_devices, "Number of devices does not match expected value" def test_devices(): - devices = System.devices + devices = system.devices expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices)) assert len(devices) == len(expected_devices), "Number of devices does not match expected value" From 36f045c6a1a834fc28f8652d348ac281ca827a15 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Thu, 28 Nov 2024 16:50:18 -0800 Subject: [PATCH 03/16] run ruff manually --- cuda_core/cuda/core/experimental/_system.py | 4 +++- cuda_core/tests/test_system.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/experimental/_system.py index 58fbd6ae..c1ce9402 100644 --- a/cuda_core/cuda/core/experimental/_system.py +++ b/cuda_core/cuda/core/experimental/_system.py @@ -1,8 +1,10 @@ from typing import Tuple + from cuda import cuda, cudart from cuda.core.experimental._device import Device from cuda.core.experimental._utils import handle_return + class System: """ Provide information about the cuda system. This class is a singleton and should not be instantiated directly. @@ -12,7 +14,7 @@ class System: def __new__(cls): if cls._instance is None: - cls._instance = super(System, cls).__new__(cls) + cls._instance = super().__new__(cls) return cls._instance def __init__(self): diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index 400d5e22..a093dc94 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -5,10 +5,9 @@ from cuda import cudart as runtime from cuda.core.experimental import Device, system - -from cuda.core.experimental import Device from cuda.core.experimental._utils import handle_return + def test_system_singleton(): system1 = system system2 = system From 5f8ff802ee9efba50492870410d14d8633471cde Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Mon, 2 Dec 2024 09:28:56 -0800 Subject: [PATCH 04/16] Update cuda_core/docs/source/release/0.2.0-notes.md Co-authored-by: Leo Fang --- cuda_core/docs/source/release/0.2.0-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.md b/cuda_core/docs/source/release/0.2.0-notes.md index e1a3c4ec..57a3254d 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.md +++ b/cuda_core/docs/source/release/0.2.0-notes.md @@ -3,7 +3,7 @@ Released on , 2024 ## Hightlights -- Addition of the system singleton +- Add a `cuda.core.experimental.system` module for querying system- or process- wide information. ## Limitations From d1d6928d6be107087f534a7dc37bf9c8dbdc9463 Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Mon, 2 Dec 2024 09:44:28 -0800 Subject: [PATCH 05/16] Update cuda_core/docs/source/release.md Co-authored-by: Leo Fang --- cuda_core/docs/source/release.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release.md b/cuda_core/docs/source/release.md index 5cbaa7f2..8c810273 100644 --- a/cuda_core/docs/source/release.md +++ b/cuda_core/docs/source/release.md @@ -6,6 +6,6 @@ maxdepth: 3 --- 0.1.0 - 0.2.0 + 0.1.1 ``` From cfa9d167bcd39504fa8a6963f1fafd836cda2623 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 2 Dec 2024 09:56:23 -0800 Subject: [PATCH 06/16] address comments --- cuda_core/docs/source/release/0.2.0-notes.md | 10 ---------- cuda_core/tests/test_system.py | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) delete mode 100644 cuda_core/docs/source/release/0.2.0-notes.md diff --git a/cuda_core/docs/source/release/0.2.0-notes.md b/cuda_core/docs/source/release/0.2.0-notes.md deleted file mode 100644 index e1a3c4ec..00000000 --- a/cuda_core/docs/source/release/0.2.0-notes.md +++ /dev/null @@ -1,10 +0,0 @@ -# `cuda.core` Release notes - -Released on , 2024 - -## Hightlights -- Addition of the system singleton - -## Limitations - - diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index a093dc94..893d1206 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -11,7 +11,7 @@ def test_system_singleton(): system1 = system system2 = system - assert system1 is system2, "system is not a singleton" + assert id(system1) == id(system2), "system is not a singleton" def test_driver_version(): driver_version = system.driver_version From 8e43cd26b30d0b34526260c5cd60bdadeecb3e4d Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 2 Dec 2024 09:57:16 -0800 Subject: [PATCH 07/16] rename release file --- cuda_core/docs/source/release/0.1.1-notes.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 cuda_core/docs/source/release/0.1.1-notes.md diff --git a/cuda_core/docs/source/release/0.1.1-notes.md b/cuda_core/docs/source/release/0.1.1-notes.md new file mode 100644 index 00000000..404ecb85 --- /dev/null +++ b/cuda_core/docs/source/release/0.1.1-notes.md @@ -0,0 +1,7 @@ +# `cuda.core` Release notes + +Released on , 2024 + +## Hightlights +- Add a `cuda.core.experimental.system` module for querying system- or process- wide information. + From bff2627fa70a446c337fc987d8165f78987feae9 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 2 Dec 2024 10:33:30 -0800 Subject: [PATCH 08/16] update link style to match other PR --- cuda_core/cuda/core/experimental/_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/experimental/_system.py index c1ce9402..2cecbd98 100644 --- a/cuda_core/cuda/core/experimental/_system.py +++ b/cuda_core/cuda/core/experimental/_system.py @@ -64,5 +64,5 @@ def devices(self) -> tuple: system = System() system.__doc__ = """ -Singleton instance of the :obj:`~cuda.core.experimental._system.System` class. +Singleton instance of the :obj:`_system.System` class. """ From 5e3bdcd97e6ccc13513f15849f106addab9fb72a Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 2 Dec 2024 13:26:15 -0800 Subject: [PATCH 09/16] add copyright header --- cuda_core/cuda/core/experimental/_system.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/experimental/_system.py index 2cecbd98..258f9bcd 100644 --- a/cuda_core/cuda/core/experimental/_system.py +++ b/cuda_core/cuda/core/experimental/_system.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + from typing import Tuple from cuda import cuda, cudart From 8118f68bb6753e8f6faeb71350244138d04685a5 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 6 Dec 2024 06:15:50 +0000 Subject: [PATCH 10/16] add PY313 build pipelines --- .github/workflows/ci-gh.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 1975c3b5..0b965a52 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -27,6 +27,7 @@ jobs: upload-enabled: - false python-version: + - "3.13" - "3.12" - "3.11" - "3.10" From 1a3f1e64f55b2f0f4840ed547c31efa57c775c97 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 06:22:27 +0000 Subject: [PATCH 11/16] fix docs --- cuda_core/cuda/core/experimental/__init__.py | 6 +- cuda_core/cuda/core/experimental/_linker.py | 2 +- cuda_core/cuda/core/experimental/_system.py | 139 +++++++++---------- cuda_core/docs/source/api.rst | 12 +- cuda_core/docs/source/conf.py | 17 +++ cuda_core/docs/source/release.md | 1 - cuda_core/tests/test_system.py | 71 +++++----- 7 files changed, 138 insertions(+), 110 deletions(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 982226c7..15df70bb 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -9,4 +9,8 @@ from cuda.core.experimental._linker import Linker, LinkerOptions from cuda.core.experimental._program import Program from cuda.core.experimental._stream import Stream, StreamOptions -from cuda.core.experimental._system import system +from cuda.core.experimental._system import System + +system = System() +__import__("sys").modules[__spec__.name + ".system"] = system +del System diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 09a237a4..2beeb168 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -443,7 +443,7 @@ def link(self, target_type) -> ObjectCode: return ObjectCode(bytes(code), target_type) def get_error_log(self) -> str: - """ Get the error log generated by the linker. + """Get the error log generated by the linker. Returns ------- diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/experimental/_system.py index 258f9bcd..31c7af6f 100644 --- a/cuda_core/cuda/core/experimental/_system.py +++ b/cuda_core/cuda/core/experimental/_system.py @@ -1,72 +1,67 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - -from typing import Tuple - -from cuda import cuda, cudart -from cuda.core.experimental._device import Device -from cuda.core.experimental._utils import handle_return - - -class System: - """ Provide information about the cuda system. - This class is a singleton and should not be instantiated directly. - """ - - _instance = None - - def __new__(cls): - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__(self): - if hasattr(self, '_initialized') and self._initialized: - return - self._initialized = True - - @property - def driver_version(self) -> Tuple[int, int]: - """ - Query the CUDA driver version. - - Returns - ------- - tuple of int - A 2-tuple of (major, minor) version numbers. - """ - version = handle_return(cuda.cuDriverGetVersion()) - major = version // 1000 - minor = (version % 1000) // 10 - return (major, minor) - - @property - def num_devices(self) -> int: - """ - Query the number of available GPUs. - - Returns - ------- - int - The number of available GPU devices. - """ - return handle_return(cudart.cudaGetDeviceCount()) - - @property - def devices(self) -> tuple: - """ - Query the available device instances. - - Returns - ------- - tuple of Device - A tuple containing instances of available devices. - """ - total = self.num_devices - return tuple(Device(device_id) for device_id in range(total)) - -system = System() -system.__doc__ = """ -Singleton instance of the :obj:`_system.System` class. -""" +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from typing import Tuple + +from cuda import cuda, cudart +from cuda.core.experimental._device import Device +from cuda.core.experimental._utils import handle_return + + +class System: + """Provide information about the cuda system. + This class is a singleton and should not be instantiated directly. + """ + + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if hasattr(self, "_initialized") and self._initialized: + return + self._initialized = True + + @property + def driver_version(self) -> Tuple[int, int]: + """ + Query the CUDA driver version. + + Returns + ------- + tuple of int + A 2-tuple of (major, minor) version numbers. + """ + version = handle_return(cuda.cuDriverGetVersion()) + major = version // 1000 + minor = (version % 1000) // 10 + return (major, minor) + + @property + def num_devices(self) -> int: + """ + Query the number of available GPUs. + + Returns + ------- + int + The number of available GPU devices. + """ + return handle_return(cudart.cudaGetDeviceCount()) + + @property + def devices(self) -> tuple: + """ + Query the available device instances. + + Returns + ------- + tuple of Device + A tuple containing instances of available devices. + """ + total = self.num_devices + return tuple(Device(device_id) for device_id in range(total)) diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index bd63f0f0..4b30c6ef 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -16,7 +16,6 @@ CUDA runtime Device launch - system :template: dataclass.rst @@ -39,6 +38,17 @@ CUDA compilation toolchain LinkerOptions +CUDA system information +----------------------- + +.. autodata:: cuda.core.experimental.system.driver_version + :no-value: +.. autodata:: cuda.core.experimental.system.num_devices + :no-value: +.. autodata:: cuda.core.experimental.system.devices + :no-value: + + .. module:: cuda.core.experimental.utils Utility functions diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py index 4621e887..3a7afc09 100644 --- a/cuda_core/docs/source/conf.py +++ b/cuda_core/docs/source/conf.py @@ -91,3 +91,20 @@ napoleon_google_docstring = False napoleon_numpy_docstring = True + + +def autodoc_process_docstring(app, what, name, obj, options, lines): + if name.startswith("cuda.core.experimental.system"): + # patch the docstring (in lines) *in-place* + attr = name.split(".")[-1] + from cuda.core.experimental._system import System + + lines_new = getattr(System, attr).__doc__.split("\n") + n_pops = len(lines) + lines.extend(lines_new) + for _ in range(n_pops): + lines.pop(0) + + +def setup(app): + app.connect("autodoc-process-docstring", autodoc_process_docstring) diff --git a/cuda_core/docs/source/release.md b/cuda_core/docs/source/release.md index 11accb59..a9e16d6e 100644 --- a/cuda_core/docs/source/release.md +++ b/cuda_core/docs/source/release.md @@ -7,6 +7,5 @@ maxdepth: 3 0.1.1 0.1.0 - 0.1.1 ``` diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index 893d1206..7a39388f 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -1,34 +1,37 @@ -try: - from cuda.bindings import driver, runtime -except ImportError: - from cuda import cuda as driver - from cuda import cudart as runtime - -from cuda.core.experimental import Device, system -from cuda.core.experimental._utils import handle_return - - -def test_system_singleton(): - system1 = system - system2 = system - assert id(system1) == id(system2), "system is not a singleton" - -def test_driver_version(): - driver_version = system.driver_version - print(driver_version) - version = handle_return(driver.cuDriverGetVersion()) - expected_driver_version = (version // 1000, (version % 1000) // 10) - assert driver_version == expected_driver_version, "Driver version does not match expected value" - -def test_num_devices(): - num_devices = system.num_devices - expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) - assert num_devices == expected_num_devices, "Number of devices does not match expected value" - -def test_devices(): - devices = system.devices - expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) - expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices)) - assert len(devices) == len(expected_devices), "Number of devices does not match expected value" - for device, expected_device in zip(devices, expected_devices): - assert device.device_id == expected_device.device_id, "Device ID does not match expected value" +try: + from cuda.bindings import driver, runtime +except ImportError: + from cuda import cuda as driver + from cuda import cudart as runtime + +from cuda.core.experimental import Device, system +from cuda.core.experimental._utils import handle_return + + +def test_system_singleton(): + system1 = system + system2 = system + assert id(system1) == id(system2), "system is not a singleton" + + +def test_driver_version(): + driver_version = system.driver_version + print(driver_version) + version = handle_return(driver.cuDriverGetVersion()) + expected_driver_version = (version // 1000, (version % 1000) // 10) + assert driver_version == expected_driver_version, "Driver version does not match expected value" + + +def test_num_devices(): + num_devices = system.num_devices + expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) + assert num_devices == expected_num_devices, "Number of devices does not match expected value" + + +def test_devices(): + devices = system.devices + expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) + expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices)) + assert len(devices) == len(expected_devices), "Number of devices does not match expected value" + for device, expected_device in zip(devices, expected_devices): + assert device.device_id == expected_device.device_id, "Device ID does not match expected value" From 6a595945a4a25f35cc4cfd312ac797fd30bc435c Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 9 Dec 2024 12:06:56 -0800 Subject: [PATCH 12/16] fix build warning and output format of docs --- cuda_core/docs/source/conf.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py index 3a7afc09..4b3e17ae 100644 --- a/cuda_core/docs/source/conf.py +++ b/cuda_core/docs/source/conf.py @@ -93,15 +93,26 @@ napoleon_numpy_docstring = True +section_titles = ["Returns"] def autodoc_process_docstring(app, what, name, obj, options, lines): if name.startswith("cuda.core.experimental.system"): - # patch the docstring (in lines) *in-place* + # patch the docstring (in lines) *in-place*. Should docstrings include section titles other than "Returns", + # this will need to be modified to handle them. attr = name.split(".")[-1] from cuda.core.experimental._system import System lines_new = getattr(System, attr).__doc__.split("\n") + formatted_lines = [] + for line in lines_new: + title = line.strip() + if title in section_titles: + formatted_lines.append(line.replace(title, f".. rubric:: {title}")) + elif line.strip() == "-" * len(title): + formatted_lines.append(" " * len(title)) + else: + formatted_lines.append(line) n_pops = len(lines) - lines.extend(lines_new) + lines.extend(formatted_lines) for _ in range(n_pops): lines.pop(0) From 769ac6679e06b10eb609d8df1cfb19fb58cdf9c4 Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Mon, 9 Dec 2024 12:10:13 -0800 Subject: [PATCH 13/16] Update cuda_core/docs/source/api_private.rst Co-authored-by: Leo Fang --- cuda_core/docs/source/api_private.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index a833d69c..f100eb7c 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -16,7 +16,6 @@ CUDA runtime _memory.Buffer _stream.Stream _event.Event - _system.System CUDA compilation toolchain From 64cbc4c2a600cca5c9eba166fb21091a4c1f0d46 Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Tue, 10 Dec 2024 14:02:07 -0800 Subject: [PATCH 14/16] Lazy load code modules (#269) Lazy load module in ObjectCode --- cuda_core/cuda/core/experimental/_module.py | 40 ++++++++++----- cuda_core/tests/conftest.py | 10 +++- cuda_core/tests/test_module.py | 55 +++++++-------------- cuda_core/tests/test_program.py | 10 +--- 4 files changed, 53 insertions(+), 62 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 69dbcd37..7a4fc0e2 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -5,7 +5,7 @@ import importlib.metadata from cuda import cuda -from cuda.core.experimental._utils import handle_return +from cuda.core.experimental._utils import handle_return, precondition _backend = { "old": { @@ -106,30 +106,43 @@ class ObjectCode: """ - __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): if code_type not in self._supported_code_type: raise ValueError _lazy_init() + + # handle is assigned during _lazy_load self._handle = None + self._jit_options = jit_options + + self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" + self._loader = _backend[self._backend_version] + + self._code_type = code_type + self._module = module + self._sym_map = {} if symbol_mapping is None else symbol_mapping - backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" - self._loader = _backend[backend] + # TODO: do we want to unload in a finalizer? Probably not.. + def _lazy_load_module(self, *args, **kwargs): + if self._handle is not None: + return + jit_options = self._jit_options + module = self._module if isinstance(module, str): # TODO: this option is only taken by the new library APIs, but we have # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). if jit_options is not None: raise ValueError - module = module.encode() self._handle = handle_return(self._loader["file"](module)) else: assert isinstance(module, bytes) if jit_options is None: jit_options = {} - if backend == "new": + if self._backend_version == "new": args = ( module, list(jit_options.keys()), @@ -141,15 +154,15 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): 0, ) else: # "old" backend - args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values())) + args = ( + module, + len(jit_options), + list(jit_options.keys()), + list(jit_options.values()), + ) self._handle = handle_return(self._loader["data"](*args)) - self._code_type = code_type - self._module = module - self._sym_map = {} if symbol_mapping is None else symbol_mapping - - # TODO: do we want to unload in a finalizer? Probably not.. - + @precondition(_lazy_load_module) def get_kernel(self, name): """Return the :obj:`Kernel` of a specified name from this object code. @@ -168,6 +181,7 @@ def get_kernel(self, name): name = self._sym_map[name] except KeyError: name = name.encode() + data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index fe755738..30d80f6f 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -11,10 +11,10 @@ import sys try: - from cuda.bindings import driver + from cuda.bindings import driver, nvrtc except ImportError: from cuda import cuda as driver - + from cuda import nvrtc import pytest from cuda.core.experimental import Device, _device @@ -65,3 +65,9 @@ def clean_up_cffi_files(): os.remove(f) except FileNotFoundError: pass # noqa: SIM105 + + +def can_load_generated_ptx(): + _, driver_ver = driver.cuDriverGetVersion() + _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() + return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index a976726f..b2519c85 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -6,43 +6,22 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. -import importlib import pytest - -from cuda.core.experimental._module import ObjectCode - - -@pytest.mark.skipif( - int(importlib.metadata.version("cuda-python").split(".")[0]) < 12, - reason="Module loading for older drivers validate require valid module code.", -) -def test_object_code_initialization(): - # Test with supported code types - for code_type in ["cubin", "ptx", "fatbin"]: - module_data = b"dummy_data" - obj_code = ObjectCode(module_data, code_type) - assert obj_code._code_type == code_type - assert obj_code._module == module_data - assert obj_code._handle is not None - - # Test with unsupported code type - with pytest.raises(ValueError): - ObjectCode(b"dummy_data", "unsupported_code_type") - - -# TODO add ObjectCode tests which provide the appropriate data for cuLibraryLoadFromFile -def test_object_code_initialization_with_str(): - assert True - - -def test_object_code_initialization_with_jit_options(): - assert True - - -def test_object_code_get_kernel(): - assert True - - -def test_kernel_from_obj(): - assert True +from conftest import can_load_generated_ptx + +from cuda.core.experimental import Program + + +@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new") +def test_get_kernel(): + kernel = """ +extern __device__ int B(); +extern __device__ int C(int a, int b); +__global__ void A() { int result = C(B(), 1);} +""" + object_code = Program(kernel, "c++").compile("ptx", options=("-rdc=true",)) + assert object_code._handle is None + kernel = object_code.get_kernel("A") + assert object_code._handle is not None + assert kernel._handle is not None diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index f1c24b3e..cca01af5 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -7,20 +7,12 @@ # is strictly prohibited. import pytest +from conftest import can_load_generated_ptx -from cuda import cuda, nvrtc from cuda.core.experimental import Device, Program from cuda.core.experimental._module import Kernel, ObjectCode -def can_load_generated_ptx(): - _, driver_ver = cuda.cuDriverGetVersion() - _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() - if nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver: - return False - return True - - def test_program_init_valid_code_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") From 10c9db6734b12987eb4f966ab403d988ce342f27 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 11 Dec 2024 17:52:24 -0800 Subject: [PATCH 15/16] bump version to v0.1.1 --- cuda_core/cuda/core/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/_version.py b/cuda_core/cuda/core/_version.py index 11d772f4..12007cf3 100644 --- a/cuda_core/cuda/core/_version.py +++ b/cuda_core/cuda/core/_version.py @@ -2,4 +2,4 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -__version__ = "0.1.0" +__version__ = "0.1.1" From f1267cd7fa7f0aac665da8acadbbe86db5b9cb0b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Thu, 12 Dec 2024 23:09:01 -0500 Subject: [PATCH 16/16] Add GPU runner for linux-aarch64 (#289) * add linux-aarch64 GPU runner * fix test skip condition * check system * try to run on bare image * fix * change to ubuntu container * Update gh-build-and-test.yml * Update action.yml * fix apt install --- .github/actions/test/action.yml | 7 +++++++ .github/workflows/gh-build-and-test.yml | 12 +++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 66468bd1..079dd039 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -14,6 +14,13 @@ runs: shell: bash --noprofile --norc -xeuo pipefail {0} run: nvidia-smi + # The cache action needs this + - name: Install zstd + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + apt update + apt install zstd + - name: Download bindings build artifacts uses: actions/download-artifact@v4 with: diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 06f6a168..dac1ff48 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -76,17 +76,19 @@ jobs: test: # TODO: improve the name once a separate test matrix is defined name: Test (CUDA ${{ inputs.cuda-version }}) - # TODO: enable testing once linux-aarch64 & win-64 GPU runners are up + # TODO: enable testing once win-64 GPU runners are up if: ${{ (github.repository_owner == 'nvidia') && - startsWith(inputs.host-platform, 'linux-x64') }} + startsWith(inputs.host-platform, 'linux') }} permissions: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }} - # TODO: use a different (nvidia?) container, or just run on bare image + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || + (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }} + # Our self-hosted runners require a container + # TODO: use a different (nvidia?) container container: options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g - image: condaforge/miniforge3:latest + image: ubuntu:22.04 env: NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} needs: