From d6a792e81a86895db668bb79785bcebbb15e904e Mon Sep 17 00:00:00 2001 From: totaam Date: Tue, 12 Sep 2023 21:20:14 +0700 Subject: [PATCH] #3808 build CUDA kernels in advance --- packaging/rpm/xpra.spec | 43 +++++------ setup.py | 153 ++++++++-------------------------------- 2 files changed, 53 insertions(+), 143 deletions(-) diff --git a/packaging/rpm/xpra.spec b/packaging/rpm/xpra.spec index fcb3c58d90..6a886bc309 100644 --- a/packaging/rpm/xpra.spec +++ b/packaging/rpm/xpra.spec @@ -31,33 +31,17 @@ autoprov: no %endif %define CFLAGS -O2 +%if 0%{?fedora}>=39 +%global debug_package %{nil} +%define DEFAULT_BUILD_ARGS --with-Xdummy --without-Xdummy_wrapper --without-csc_cython --without-evdi --without-cuda_rebuild --without-pandoc_lua +%else %define DEFAULT_BUILD_ARGS --with-Xdummy --without-Xdummy_wrapper --without-csc_cython --without-evdi --without-cuda_rebuild +%endif %{!?nthreads: %global nthreads %(nproc)} %{!?update_firewall: %define update_firewall 1} %{!?run_tests: %define run_tests 0} %{!?with_selinux: %define with_selinux 1} -#we only enable CUDA / NVENC with 64-bit builds: -%if 0%{?with_cuda}%{?nvidia_codecs} -%define nvidia_codecs 1 -%else -#detect: -%if 0%{?fedora}>=38 -%define nvidia_codecs 0 -%else -%{!?nvidia_codecs: %define nvidia_codecs %(pkg-config --exists cuda && echo 1)} -%endif -%endif -%if 0%{?nvidia_codecs} -%define build_args %{DEFAULT_BUILD_ARGS} -%else -%if 0%{?fedora}>=39 -%global debug_package %{nil} -%define build_args %{DEFAULT_BUILD_ARGS} --without-nvidia --without-pandoc_lua -%else -%define build_args %{DEFAULT_BUILD_ARGS} --without-nvidia -%endif -%endif %global selinux_variants mls targeted %define selinux_modules cups_xpra xpra_socketactivation @@ -76,6 +60,23 @@ Vendor: https://xpra.org/ Source: https://xpra.org/src/xpra-%{version}.tar.xz #grab the full revision number from the source archive's src_info.py file: %define revision_no %(tar -OJxf %{SOURCE0} xpra-%{version}/xpra/src_info.py | grep -e "^REVISION=" | awk -F= '{print ".r"$2}' 2> /dev/null) +%{!?nvidia_codecs: %define nvidia_codecs %(pkg-config --exists cuda && echo 1)} +#Fedora 38+ cannot build the cuda kernels: +%if 0%{?fedora}>=38 +%if %{nvidia_codecs} +%define fatbin %(tar -Jtf %{SOURCE0} xpra-%{version}/fs/share/xpra/cuda | grep .fatbin | wc -l 2> /dev/null) +#we can only include cuda if we have pre-built fatbin kernels: +%if %{fatbin}==0 +%define nvidia_codecs 0 +%endif +%endif +%endif +%if 0%{?nvidia_codecs} +%define build_args %{DEFAULT_BUILD_ARGS} +%else +%define build_args %{DEFAULT_BUILD_ARGS} --without-nvidia +%endif + Release: 10%{revision_no}%{?dist} #rpm falls over itself if we try to make the top-level package noarch: #BuildArch: noarch diff --git a/setup.py b/setup.py index 6655328e76..2594a78e08 100755 --- a/setup.py +++ b/setup.py @@ -281,7 +281,7 @@ def has_header_file(name, isdir=False): nvdec_ENABLED = False nvfbc_ENABLED = nvidia_ENABLED and not ARM and pkg_config_ok("--exists", "nvfbc") cuda_kernels_ENABLED = nvidia_ENABLED and not OSX -cuda_rebuild_ENABLED = cuda_kernels_ENABLED and not WIN32 +cuda_rebuild_ENABLED = None if nvidia_ENABLED else False csc_libyuv_ENABLED = DEFAULT and pkg_config_ok("--exists", "libyuv") gstreamer_ENABLED = DEFAULT example_ENABLED = DEFAULT @@ -1218,23 +1218,11 @@ def clean(): print(f"removing Cython/build generated file: {x}") os.unlink(filename) -if 'clean' in sys.argv or 'sdist' in sys.argv: - clean() - def add_build_info(*args): cmd = [sys.executable, "./fs/bin/add_build_info.py"]+list(args) r = subprocess.Popen(cmd).wait(30) assert r==0, "'%s' returned %s" % (" ".join(cmd), r) -if "clean" not in sys.argv: - # Add build info to build_info.py file: - add_build_info("build") - if modules_ENABLED: - # ensure it is included in the module list if it didn't exist before - add_modules("xpra.build_info") - -if "sdist" in sys.argv: - add_build_info("src") if "install" in sys.argv or "build" in sys.argv: #if installing from source tree rather than @@ -1246,12 +1234,21 @@ def add_build_info(*args): add_modules("xpra.src_info") -if 'clean' in sys.argv or 'sdist' in sys.argv: +if "clean" in sys.argv or "sdist" in sys.argv: + clean() + if "sdist" in sys.argv: + add_build_info("src") #take shortcut to skip cython/pkgconfig steps: setup(**setup_options) sys.exit(0) +# Add build info to build_info.py file: +add_build_info("build") +if modules_ENABLED: + # ensure it is included in the module list if it didn't exist before + add_modules("xpra.build_info") + def glob_recurse(srcdir): m = {} @@ -2160,115 +2157,27 @@ def bundle_tests(): toggle_packages(nvidia_ENABLED, "xpra.codecs.nvidia.cuda") CUDA_BIN = f"{share_xpra}/cuda" if nvidia_ENABLED: - #find nvcc: - from xpra.util import sorted_nicely # pylint: disable=import-outside-toplevel - path_options = os.environ.get("PATH", "").split(os.path.pathsep) - if WIN32: - external_includes.append("pycuda") - nvcc_exe = "nvcc.exe" - CUDA_DIR = os.environ.get("CUDA_DIR", "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA") - path_options += ["./cuda/bin/"]+list(reversed(sorted_nicely(glob.glob(f"{CUDA_DIR}\\*\\bin")))) - else: - nvcc_exe = "nvcc" - path_options += ["/usr/local/cuda/bin", "/opt/cuda/bin"] - path_options += list(reversed(sorted_nicely(glob.glob("/usr/local/cuda*/bin")))) - path_options += list(reversed(sorted_nicely(glob.glob("/opt/cuda*/bin")))) - options = [os.path.join(x, nvcc_exe) for x in path_options] - #prefer the one we find on the $PATH, if any: - v = shutil.which(nvcc_exe) - if v and (v not in options): - options.insert(0, v) - nvcc_versions = {} - def get_nvcc_version(command): - if not os.path.exists(command): - return None - code, out, _ = get_status_output([command, "--version"]) - if code!=0: - return None - vpos = out.rfind(", V") - if vpos>0: - version = out[vpos+3:].split("\n")[0] - version_str = f" version {version}" - else: - version = "0" - version_str = " unknown version!" - print(f"found CUDA compiler: {filename}{version_str}") - return tuple(int(x) for x in version.split(".")) - for filename in options: - vnum = get_nvcc_version(filename) - if vnum: - nvcc_versions[vnum] = filename - nvcc_version = nvcc = None - if nvcc_versions: - #choose the most recent one: - nvcc_version, nvcc = list(reversed(sorted(nvcc_versions.items())))[0] - if len(nvcc_versions)>1: - print(f" using version {nvcc_version} from {nvcc}") - if cuda_kernels_ENABLED and (nvenc_ENABLED or nvjpeg_encoder_ENABLED): - assert nvcc, "cannot find nvcc compiler!" - def get_nvcc_args(): - if nvcc_version<(11, 6): - raise RuntimeError(f"nvcc version {nvcc_version} is too old, minimum is 11.6") - gcc_version = get_gcc_version() - if not CC_is_clang() and gcc_version<(9, ): - print("gcc versions older than 9 are not supported!") - for _ in range(5): - sleep(1) - print(".") - nvcc_cmd = [ - nvcc, - "-fatbin", - "-std=c++11", - "-arch=all", - "-Wno-deprecated-gpu-targets", - "-Xnvlink", - "-ignore-host-info", - ] - if gcc_version>=(12, 0): - nvcc_cmd.append("--allow-unsupported-compiler") - return nvcc_cmd - nvcc_args = get_nvcc_args() - #first compile the cuda kernels - #(using the same cuda SDK for both nvenc modules for now..) - kernels = [] - if nvenc_ENABLED: - kernels += ["XRGB_to_NV12", "XRGB_to_YUV444", "BGRX_to_NV12", "BGRX_to_YUV444"] - if nvjpeg_encoder_ENABLED: - kernels += ["BGRX_to_RGB", "RGBX_to_RGB", "RGBA_to_RGBAP", "BGRA_to_RGBAP"] - nvcc_commands = [] + kernels = ( + "XRGB_to_NV12", "XRGB_to_YUV444", "BGRX_to_NV12", "BGRX_to_YUV444", + "BGRX_to_RGB", "RGBX_to_RGB", "RGBA_to_RGBAP", "BGRA_to_RGBAP", + ) + rebuild = [] + if cuda_rebuild_ENABLED is True: + rebuild = kernels + elif cuda_rebuild_ENABLED is None: for kernel in kernels: - cuda_src = f"fs/share/xpra/cuda/{kernel}.cu" - cuda_bin = f"fs/share/xpra/cuda/{kernel}.fatbin" - if os.path.exists(cuda_bin) and (cuda_rebuild_ENABLED is False): - continue - reason = should_rebuild(cuda_src, cuda_bin) - if not reason: - continue - print(f"rebuilding {kernel}: {reason}") - kbuild_cmd = nvcc_args + ["-c", cuda_src, "-o", cuda_bin] - print(f"CUDA compiling %s ({reason})" % kernel.ljust(16)) - print(" "+" ".join(f"{x!r}" for x in kbuild_cmd)) - nvcc_commands.append(kbuild_cmd) - #parallel build: - nvcc_errors = [] - def nvcc_compile(nvcc_cmd): - c, stdout, stderr = get_status_output(nvcc_cmd) - if c!=0: - nvcc_errors.append(c) - print(f"Error: failed to compile CUDA kernel {kernel}") - print(f" using command: {nvcc_cmd}") - print(stdout or "") - print(stderr or "") - nvcc_threads = [] - for cmd in nvcc_commands: - from threading import Thread - t = Thread(target=nvcc_compile, args=(cmd,)) - t.start() - nvcc_threads.append(t) - for t in nvcc_threads: - if nvcc_errors: - sys.exit(1) - t.join() + cu_src = f"fs/share/xpra/cuda/{kernel}.cu" + fatbin = f"fs/share/xpra/cuda/{kernel}.fatbin" + assert os.path.exists(cu_src) + if reason := should_rebuild(cu_src, fatbin): + print(f"* rebuilding {kernel}: {reason}") + rebuild.append(kernel) + if rebuild: + r = subprocess.Popen(["./fs/bin/build_cuda_kernels.py"]+rebuild).wait() + if r!=0: + print(f"failed to rebuild the cuda kernels {rebuild}") + sys.exit(1) + if cuda_kernels_ENABLED: add_data_files(CUDA_BIN, [f"fs/share/xpra/cuda/{x}.fatbin" for x in kernels]) if WIN32 and (nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED or nvenc_ENABLED or nvdec_ENABLED): assert nvcc_versions