Skip to content

Add get_cuda_native_handle #773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ cuda_bindings/cuda/bindings/runtime.pxd
cuda_bindings/cuda/bindings/runtime.pyx
cuda_bindings/cuda/bindings/nvrtc.pxd
cuda_bindings/cuda/bindings/nvrtc.pyx
cuda_bindings/cuda/bindings/utils/_get_handle.pyx

# Distribution / packaging
.Python
Expand Down Expand Up @@ -181,4 +182,4 @@ dmypy.json
cython_debug/

# Dont ignore
!.github/actions/build/
!.github/actions/build/
Empty file.
1 change: 1 addition & 0 deletions cuda_bindings/cuda/bindings/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

from ._get_handle import get_cuda_native_handle
from ._ptx_utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver
230 changes: 230 additions & 0 deletions cuda_bindings/cuda/bindings/utils/_get_handle.pyx.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# This code was automatically generated with version 12.9.0. Do not modify it directly.

from libc.stdint cimport uintptr_t
cimport cython

from cuda.bindings cimport driver, runtime, cydriver, cyruntime


cdef dict _handle_getters = None

@cython.embedsignature(True)
def get_cuda_native_handle(obj) -> int:
""" Returns the address of the provided CUDA Python object as Python int.

Parameters
----------
obj : Any
CUDA Python object

Returns
-------
int : The object address.
"""
global _handle_getters
obj_type = type(obj)
if _handle_getters is None:
_handle_getters = dict()
{{if 'CUcontext' in found_types}}
def CUcontext_getter(driver.CUcontext x): return <uintptr_t><void*><cydriver.CUcontext>(x._pvt_ptr[0])
_handle_getters[driver.CUcontext] = CUcontext_getter
{{endif}}
{{if 'CUmodule' in found_types}}
def CUmodule_getter(driver.CUmodule x): return <uintptr_t><void*><cydriver.CUmodule>(x._pvt_ptr[0])
_handle_getters[driver.CUmodule] = CUmodule_getter
{{endif}}
{{if 'CUfunction' in found_types}}
def CUfunction_getter(driver.CUfunction x): return <uintptr_t><void*><cydriver.CUfunction>(x._pvt_ptr[0])
_handle_getters[driver.CUfunction] = CUfunction_getter
{{endif}}
{{if 'CUlibrary' in found_types}}
def CUlibrary_getter(driver.CUlibrary x): return <uintptr_t><void*><cydriver.CUlibrary>(x._pvt_ptr[0])
_handle_getters[driver.CUlibrary] = CUlibrary_getter
{{endif}}
{{if 'CUkernel' in found_types}}
def CUkernel_getter(driver.CUkernel x): return <uintptr_t><void*><cydriver.CUkernel>(x._pvt_ptr[0])
_handle_getters[driver.CUkernel] = CUkernel_getter
{{endif}}
{{if 'CUarray' in found_types}}
def CUarray_getter(driver.CUarray x): return <uintptr_t><void*><cydriver.CUarray>(x._pvt_ptr[0])
_handle_getters[driver.CUarray] = CUarray_getter
{{endif}}
{{if 'CUmipmappedArray' in found_types}}
def CUmipmappedArray_getter(driver.CUmipmappedArray x): return <uintptr_t><void*><cydriver.CUmipmappedArray>(x._pvt_ptr[0])
_handle_getters[driver.CUmipmappedArray] = CUmipmappedArray_getter
{{endif}}
{{if 'CUtexref' in found_types}}
def CUtexref_getter(driver.CUtexref x): return <uintptr_t><void*><cydriver.CUtexref>(x._pvt_ptr[0])
_handle_getters[driver.CUtexref] = CUtexref_getter
{{endif}}
{{if 'CUsurfref' in found_types}}
def CUsurfref_getter(driver.CUsurfref x): return <uintptr_t><void*><cydriver.CUsurfref>(x._pvt_ptr[0])
_handle_getters[driver.CUsurfref] = CUsurfref_getter
{{endif}}
{{if 'CUevent' in found_types}}
def CUevent_getter(driver.CUevent x): return <uintptr_t><void*><cydriver.CUevent>(x._pvt_ptr[0])
_handle_getters[driver.CUevent] = CUevent_getter
{{endif}}
{{if 'CUstream' in found_types}}
def CUstream_getter(driver.CUstream x): return <uintptr_t><void*><cydriver.CUstream>(x._pvt_ptr[0])
_handle_getters[driver.CUstream] = CUstream_getter
{{endif}}
{{if 'CUgraphicsResource' in found_types}}
def CUgraphicsResource_getter(driver.CUgraphicsResource x): return <uintptr_t><void*><cydriver.CUgraphicsResource>(x._pvt_ptr[0])
_handle_getters[driver.CUgraphicsResource] = CUgraphicsResource_getter
{{endif}}
{{if 'CUexternalMemory' in found_types}}
def CUexternalMemory_getter(driver.CUexternalMemory x): return <uintptr_t><void*><cydriver.CUexternalMemory>(x._pvt_ptr[0])
_handle_getters[driver.CUexternalMemory] = CUexternalMemory_getter
{{endif}}
{{if 'CUexternalSemaphore' in found_types}}
def CUexternalSemaphore_getter(driver.CUexternalSemaphore x): return <uintptr_t><void*><cydriver.CUexternalSemaphore>(x._pvt_ptr[0])
_handle_getters[driver.CUexternalSemaphore] = CUexternalSemaphore_getter
{{endif}}
{{if 'CUgraph' in found_types}}
def CUgraph_getter(driver.CUgraph x): return <uintptr_t><void*><cydriver.CUgraph>(x._pvt_ptr[0])
_handle_getters[driver.CUgraph] = CUgraph_getter
{{endif}}
{{if 'CUgraphNode' in found_types}}
def CUgraphNode_getter(driver.CUgraphNode x): return <uintptr_t><void*><cydriver.CUgraphNode>(x._pvt_ptr[0])
_handle_getters[driver.CUgraphNode] = CUgraphNode_getter
{{endif}}
{{if 'CUgraphExec' in found_types}}
def CUgraphExec_getter(driver.CUgraphExec x): return <uintptr_t><void*><cydriver.CUgraphExec>(x._pvt_ptr[0])
_handle_getters[driver.CUgraphExec] = CUgraphExec_getter
{{endif}}
{{if 'CUmemoryPool' in found_types}}
def CUmemoryPool_getter(driver.CUmemoryPool x): return <uintptr_t><void*><cydriver.CUmemoryPool>(x._pvt_ptr[0])
_handle_getters[driver.CUmemoryPool] = CUmemoryPool_getter
{{endif}}
{{if 'CUuserObject' in found_types}}
def CUuserObject_getter(driver.CUuserObject x): return <uintptr_t><void*><cydriver.CUuserObject>(x._pvt_ptr[0])
_handle_getters[driver.CUuserObject] = CUuserObject_getter
{{endif}}
{{if 'CUgraphDeviceNode' in found_types}}
def CUgraphDeviceNode_getter(driver.CUgraphDeviceNode x): return <uintptr_t><void*><cydriver.CUgraphDeviceNode>(x._pvt_ptr[0])
_handle_getters[driver.CUgraphDeviceNode] = CUgraphDeviceNode_getter
{{endif}}
{{if 'CUasyncCallbackHandle' in found_types}}
def CUasyncCallbackHandle_getter(driver.CUasyncCallbackHandle x): return <uintptr_t><void*><cydriver.CUasyncCallbackHandle>(x._pvt_ptr[0])
_handle_getters[driver.CUasyncCallbackHandle] = CUasyncCallbackHandle_getter
{{endif}}
{{if 'CUgreenCtx' in found_types}}
def CUgreenCtx_getter(driver.CUgreenCtx x): return <uintptr_t><void*><cydriver.CUgreenCtx>(x._pvt_ptr[0])
_handle_getters[driver.CUgreenCtx] = CUgreenCtx_getter
{{endif}}
{{if 'CUlinkState' in found_types}}
def CUlinkState_getter(driver.CUlinkState x): return <uintptr_t><void*><cydriver.CUlinkState>(x._pvt_ptr[0])
_handle_getters[driver.CUlinkState] = CUlinkState_getter
{{endif}}
{{if 'CUdevResourceDesc' in found_types}}
def CUdevResourceDesc_getter(driver.CUdevResourceDesc x): return <uintptr_t><void*><cydriver.CUdevResourceDesc>(x._pvt_ptr[0])
_handle_getters[driver.CUdevResourceDesc] = CUdevResourceDesc_getter
{{endif}}
{{if 'CUlogsCallbackHandle' in found_types}}
def CUlogsCallbackHandle_getter(driver.CUlogsCallbackHandle x): return <uintptr_t><void*><cydriver.CUlogsCallbackHandle>(x._pvt_ptr[0])
_handle_getters[driver.CUlogsCallbackHandle] = CUlogsCallbackHandle_getter
{{endif}}
{{if True}}
def CUeglStreamConnection_getter(driver.CUeglStreamConnection x): return <uintptr_t><void*><cydriver.CUeglStreamConnection>(x._pvt_ptr[0])
_handle_getters[driver.CUeglStreamConnection] = CUeglStreamConnection_getter
{{endif}}
{{if True}}
def EGLImageKHR_getter(runtime.EGLImageKHR x): return <uintptr_t><void*><cyruntime.EGLImageKHR>(x._pvt_ptr[0])
_handle_getters[runtime.EGLImageKHR] = EGLImageKHR_getter
{{endif}}
{{if True}}
def EGLStreamKHR_getter(runtime.EGLStreamKHR x): return <uintptr_t><void*><cyruntime.EGLStreamKHR>(x._pvt_ptr[0])
_handle_getters[runtime.EGLStreamKHR] = EGLStreamKHR_getter
{{endif}}
{{if True}}
def EGLSyncKHR_getter(runtime.EGLSyncKHR x): return <uintptr_t><void*><cyruntime.EGLSyncKHR>(x._pvt_ptr[0])
_handle_getters[runtime.EGLSyncKHR] = EGLSyncKHR_getter
{{endif}}
{{if 'cudaArray_t' in found_types}}
def cudaArray_t_getter(runtime.cudaArray_t x): return <uintptr_t><void*><cyruntime.cudaArray_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaArray_t] = cudaArray_t_getter
{{endif}}
{{if 'cudaArray_const_t' in found_types}}
def cudaArray_const_t_getter(runtime.cudaArray_const_t x): return <uintptr_t><void*><cyruntime.cudaArray_const_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaArray_const_t] = cudaArray_const_t_getter
{{endif}}
{{if 'cudaMipmappedArray_t' in found_types}}
def cudaMipmappedArray_t_getter(runtime.cudaMipmappedArray_t x): return <uintptr_t><void*><cyruntime.cudaMipmappedArray_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaMipmappedArray_t] = cudaMipmappedArray_t_getter
{{endif}}
{{if 'cudaMipmappedArray_const_t' in found_types}}
def cudaMipmappedArray_const_t_getter(runtime.cudaMipmappedArray_const_t x): return <uintptr_t><void*><cyruntime.cudaMipmappedArray_const_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaMipmappedArray_const_t] = cudaMipmappedArray_const_t_getter
{{endif}}
{{if 'cudaStream_t' in found_types}}
def cudaStream_t_getter(runtime.cudaStream_t x): return <uintptr_t><void*><cyruntime.cudaStream_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaStream_t] = cudaStream_t_getter
{{endif}}
{{if 'cudaEvent_t' in found_types}}
def cudaEvent_t_getter(runtime.cudaEvent_t x): return <uintptr_t><void*><cyruntime.cudaEvent_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaEvent_t] = cudaEvent_t_getter
{{endif}}
{{if 'cudaGraphicsResource_t' in found_types}}
def cudaGraphicsResource_t_getter(runtime.cudaGraphicsResource_t x): return <uintptr_t><void*><cyruntime.cudaGraphicsResource_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaGraphicsResource_t] = cudaGraphicsResource_t_getter
{{endif}}
{{if 'cudaExternalMemory_t' in found_types}}
def cudaExternalMemory_t_getter(runtime.cudaExternalMemory_t x): return <uintptr_t><void*><cyruntime.cudaExternalMemory_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaExternalMemory_t] = cudaExternalMemory_t_getter
{{endif}}
{{if 'cudaExternalSemaphore_t' in found_types}}
def cudaExternalSemaphore_t_getter(runtime.cudaExternalSemaphore_t x): return <uintptr_t><void*><cyruntime.cudaExternalSemaphore_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaExternalSemaphore_t] = cudaExternalSemaphore_t_getter
{{endif}}
{{if 'cudaGraph_t' in found_types}}
def cudaGraph_t_getter(runtime.cudaGraph_t x): return <uintptr_t><void*><cyruntime.cudaGraph_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaGraph_t] = cudaGraph_t_getter
{{endif}}
{{if 'cudaGraphNode_t' in found_types}}
def cudaGraphNode_t_getter(runtime.cudaGraphNode_t x): return <uintptr_t><void*><cyruntime.cudaGraphNode_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaGraphNode_t] = cudaGraphNode_t_getter
{{endif}}
{{if 'cudaUserObject_t' in found_types}}
def cudaUserObject_t_getter(runtime.cudaUserObject_t x): return <uintptr_t><void*><cyruntime.cudaUserObject_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaUserObject_t] = cudaUserObject_t_getter
{{endif}}
{{if 'cudaFunction_t' in found_types}}
def cudaFunction_t_getter(runtime.cudaFunction_t x): return <uintptr_t><void*><cyruntime.cudaFunction_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaFunction_t] = cudaFunction_t_getter
{{endif}}
{{if 'cudaKernel_t' in found_types}}
def cudaKernel_t_getter(runtime.cudaKernel_t x): return <uintptr_t><void*><cyruntime.cudaKernel_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaKernel_t] = cudaKernel_t_getter
{{endif}}
{{if 'cudaLibrary_t' in found_types}}
def cudaLibrary_t_getter(runtime.cudaLibrary_t x): return <uintptr_t><void*><cyruntime.cudaLibrary_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaLibrary_t] = cudaLibrary_t_getter
{{endif}}
{{if 'cudaMemPool_t' in found_types}}
def cudaMemPool_t_getter(runtime.cudaMemPool_t x): return <uintptr_t><void*><cyruntime.cudaMemPool_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaMemPool_t] = cudaMemPool_t_getter
{{endif}}
{{if 'cudaGraphExec_t' in found_types}}
def cudaGraphExec_t_getter(runtime.cudaGraphExec_t x): return <uintptr_t><void*><cyruntime.cudaGraphExec_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaGraphExec_t] = cudaGraphExec_t_getter
{{endif}}
{{if 'cudaGraphDeviceNode_t' in found_types}}
def cudaGraphDeviceNode_t_getter(runtime.cudaGraphDeviceNode_t x): return <uintptr_t><void*><cyruntime.cudaGraphDeviceNode_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaGraphDeviceNode_t] = cudaGraphDeviceNode_t_getter
{{endif}}
{{if 'cudaAsyncCallbackHandle_t' in found_types}}
def cudaAsyncCallbackHandle_t_getter(runtime.cudaAsyncCallbackHandle_t x): return <uintptr_t><void*><cyruntime.cudaAsyncCallbackHandle_t>(x._pvt_ptr[0])
_handle_getters[runtime.cudaAsyncCallbackHandle_t] = cudaAsyncCallbackHandle_t_getter
{{endif}}
{{if True}}
def cudaEglStreamConnection_getter(runtime.cudaEglStreamConnection x): return <uintptr_t><void*><cyruntime.cudaEglStreamConnection>(x._pvt_ptr[0])
_handle_getters[runtime.cudaEglStreamConnection] = cudaEglStreamConnection_getter
{{endif}}
try:
return _handle_getters[obj_type](obj)
except KeyError:
raise TypeError("Unknown type: " + str(obj_type)) from None
5 changes: 3 additions & 2 deletions cuda_bindings/docs/source/module/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

.. module:: cuda.bindings.utils

Utils module
============
utils
=====

Functions
---------

.. autosummary::
:toctree: generated/

get_cuda_native_handle
get_minimal_required_cuda_ver_from_ptx_ver
get_ptx_ver
6 changes: 6 additions & 0 deletions cuda_bindings/docs/source/release/12.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ Released on MM DD, 2025
Highlights
----------

* A utility module :mod:`cuda.bindings.utils` is added

* Using ``int(cuda_obj)`` to retrieve the underlying address of a CUDA object is deprecated and
subject to future removal. Please switch to use :func:`~cuda.bindings.utils.get_cuda_native_handle`
instead.

* The ``cuda.bindings.cufile`` Python module was added, wrapping the
`cuFile C APIs <https://docs.nvidia.com/gpudirect-storage/api-reference-guide/index.html>`_.
Supported on Linux only.
Expand Down
6 changes: 6 additions & 0 deletions cuda_bindings/docs/source/tips_and_tricks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ All CUDA C types are exposed to Python as Python classes. For example, the :clas

There is an important distinction between the ``getPtr()`` method and the behaviour of ``__int__()``. Since a ``CUstream`` is itself just a pointer, calling ``instance_of_CUstream.getPtr()`` returns the pointer *to* the pointer, instead of the value of the ``CUstream`` C object that is the pointer to the underlying stream handle. ``int(instance_of_CUstream)`` returns the value of the ``CUstream`` converted to a Python int and is the actual address of the underlying handle.

.. warning::

Using ``int(cuda_obj)`` to retrieve the underlying address of a CUDA object is deprecated and
subject to future removal. Please switch to use :func:`~cuda.bindings.utils.get_cuda_native_handle`
instead.


Lifetime management of the CUDA objects
=======================================
Expand Down
3 changes: 3 additions & 0 deletions cuda_bindings/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def generate_output(infile, local):
os.path.join("cuda", "bindings", "_lib"),
os.path.join("cuda", "bindings", "_lib", "cyruntime"),
os.path.join("cuda", "bindings", "_internal"),
os.path.join("cuda", "bindings", "utils"),
]
input_files = []
for path in path_list:
Expand Down Expand Up @@ -287,6 +288,7 @@ def prep_extensions(sources, libraries):

# new path for the bindings from cybind
def rename_architecture_specific_files():
path = os.path.join("cuda", "bindings", "_internal")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: this is a bug fix exposed by the new line 224. Previously, the path variable happened to be right (leaking from the loop variable at line 227), but it's not longer correct after we append one more item to path_list.

if sys.platform == "linux":
src_files = glob.glob(os.path.join(path, "*_linux.pyx"))
elif sys.platform == "win32":
Expand Down Expand Up @@ -341,6 +343,7 @@ def do_cythonize(extensions):
(["cuda/bindings/_lib/utils.pyx", "cuda/bindings/_lib/param_packer.cpp"], None),
(["cuda/bindings/_lib/cyruntime/cyruntime.pyx"], None),
(["cuda/bindings/_lib/cyruntime/utils.pyx"], None),
(["cuda/bindings/utils/*.pyx"], None),
# public
*(([f], None) for f in cuda_bindings_files),
# public (deprecated, to be removed)
Expand Down
48 changes: 47 additions & 1 deletion cuda_bindings/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import random

import pytest

from cuda.bindings.utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver
from cuda.bindings import driver, runtime
from cuda.bindings.utils import get_cuda_native_handle, get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver

ptx_88_kernel = r"""
.version 8.8
Expand Down Expand Up @@ -41,3 +44,46 @@ def test_ptx_utils(kernel, actual_ptx_ver, min_cuda_ver):
assert ptx_ver == actual_ptx_ver
cuda_ver = get_minimal_required_cuda_ver_from_ptx_ver(ptx_ver)
assert cuda_ver == min_cuda_ver


@pytest.mark.parametrize(
"target",
(
driver.CUcontext,
driver.CUstream,
driver.CUevent,
driver.CUmodule,
driver.CUlibrary,
driver.CUfunction,
driver.CUkernel,
driver.CUgraph,
driver.CUgraphNode,
driver.CUgraphExec,
driver.CUmemoryPool,
runtime.cudaStream_t,
runtime.cudaEvent_t,
runtime.cudaGraph_t,
runtime.cudaGraphNode_t,
runtime.cudaGraphExec_t,
runtime.cudaMemPool_t,
),
)
def test_get_handle(target):
ptr = random.randint(1, 1024)
obj = target(ptr)
handle = get_cuda_native_handle(obj)
assert handle == ptr


@pytest.mark.parametrize(
"target",
(
(1, 2, 3, 4),
[5, 6],
{},
None,
),
)
def test_get_handle_error(target):
with pytest.raises(TypeError) as e:
handle = get_cuda_native_handle(target)
Loading