diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index bb4b4cdb1..5cc8219c0 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -6,8 +6,10 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. {{if 'Windows' == platform.system()}} -import win32api +import os +import site import struct +import win32api from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn @@ -44,16 +46,48 @@ cdef int cuPythonInit() except -1 nogil: # Load library {{if 'Windows' == platform.system()}} - LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 with gil: + # First check if the DLL has been loaded by 3rd parties try: - handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") except: + handle = None + + # Else try default search + if not handle: + LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 + try: + handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + except: + pass + + # Final check if DLLs can be found within pip installations + if not handle: + site_packages = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_packages: + mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, "nvrtc64_120_0.dll"), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + + # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is + # located in the same mod_path. + # Update PATH environ so that the two dlls can find each other + os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) + except: + pass + + if not handle: raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') {{else}} - handle = NULL - if handle == NULL: - handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) + handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) if handle == NULL: with gil: raise RuntimeError('Failed to dlopen libnvrtc.so.12') diff --git a/cuda_bindings/docs/source/release/12.x.y-notes.md b/cuda_bindings/docs/source/release/12.x.y-notes.md index 5cc603924..9eff5ac70 100644 --- a/cuda_bindings/docs/source/release/12.x.y-notes.md +++ b/cuda_bindings/docs/source/release/12.x.y-notes.md @@ -3,4 +3,22 @@ Released on MM DD, 20YY. ## Highlights -- Added bindings for nvJitLink. It requires nvJitLink from CUDA 12.3 or above. +- Add bindings for nvJitLink. It requires nvJitLink from CUDA 12.3 or above. +- Add optional dependencies to wheels for NVRTC and nvJitLink +- Enable discovery and loading of shared library dependencies from wheels + +## Wheels support for optional dependencies + +Optional dependencies are added for packages: + +- nvidia-nvjitlink-cuXX +- nvidia-cuda-nvrtc-cuXX + +Installing these dependencies with cuda-python can be done using: +```{code-block} shell +pip install cuda-python[all] +``` + +## Discovery and loading of shared library dependencies from wheels + +Shared library search paths for wheel builds are now extended to check site-packages. This allows users to seamlessly use their wheel installation of the CUDA Toolkit with cuda-python. diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 12de0ae80..7ea87f967 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -32,6 +32,12 @@ dependencies = [ "pywin32; sys_platform == 'win32'", ] +[project.optional-dependencies] +all = [ + "nvidia-cuda-nvrtc-cu12", + "nvidia-nvjitlink-cu12>=12.3" +] + [project.urls] Repository = "https://github.com/NVIDIA/cuda-python" Documentation = "https://nvidia.github.io/cuda-python/" diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 6217d88a8..7c8725ee8 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -20,15 +20,14 @@ from Cython.Build import cythonize from pyclibrary import CParser from setuptools import find_packages, setup +from setuptools.command.bdist_wheel import bdist_wheel from setuptools.command.build_ext import build_ext from setuptools.extension import Extension # ---------------------------------------------------------------------- # Fetch configuration options -CUDA_HOME = os.environ.get("CUDA_HOME") -if not CUDA_HOME: - CUDA_HOME = os.environ.get("CUDA_PATH") +CUDA_HOME = os.environ.get("CUDA_HOME", os.environ.get("CUDA_PATH", None)) if not CUDA_HOME: raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set") @@ -283,24 +282,51 @@ def do_cythonize(extensions): extensions += prep_extensions(sources) # --------------------------------------------------------------------- -# Custom build_ext command -# Files are build in two steps: -# 1) Cythonized (in the do_cythonize() command) -# 2) Compiled to .o files as part of build_ext -# This class is solely for passing the value of nthreads to build_ext +# Custom cmdclass extensions + +building_wheel = False + + +class WheelsBuildExtensions(bdist_wheel): + def run(self): + global building_wheel + building_wheel = True + super().run() class ParallelBuildExtensions(build_ext): def initialize_options(self): - build_ext.initialize_options(self) + super().initialize_options() if nthreads > 0: self.parallel = nthreads - def finalize_options(self): - build_ext.finalize_options(self) - - -cmdclass = {"build_ext": ParallelBuildExtensions} + def build_extension(self, ext): + if building_wheel and sys.platform == "linux": + # Strip binaries to remove debug symbols + extra_linker_flags = ["-Wl,--strip-all"] + + # Allow extensions to discover libraries at runtime + # relative their wheels installation. + if ext.name == "cuda.bindings._bindings.cynvrtc": + ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib" + elif ext.name == "cuda.bindings._internal.nvjitlink": + ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/nvjitlink/lib" + else: + ldflag = None + + if ldflag: + extra_linker_flags.append(ldflag) + else: + extra_linker_flags = [] + + ext.extra_link_args += extra_linker_flags + super().build_extension(ext) + + +cmdclass = { + "bdist_wheel": WheelsBuildExtensions, + "build_ext": ParallelBuildExtensions, + } # ---------------------------------------------------------------------- # Setup