From 3a2321fc71e00f36ca1d1fbd45294e76bae6b653 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Tue, 17 Dec 2024 00:40:31 +0000 Subject: [PATCH 1/2] Add support for AMD GPU sharing AMD GPU sharing is based on ROCm, for Tumbleweed the installation can be done via: zypper addrepo https://repo.radeon.com/rocm/zyp/latest/main rocm zypper in rocm For more information: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html Signed-off-by: Paulo Gomes --- internal/profiles/profiles.go | 8 +++----- internal/runners/docker/run.go | 16 +++++++--------- internal/runners/podman/run.go | 15 +++++++-------- internal/util/gpu/gpu.go | 21 ++++++++++++++++----- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/internal/profiles/profiles.go b/internal/profiles/profiles.go index 7309d14..72f63c1 100644 --- a/internal/profiles/profiles.go +++ b/internal/profiles/profiles.go @@ -214,7 +214,7 @@ func Start(runner string, profile *types.Profile, cfg *types.Config) (err error) go images.PreemptWorkloadImages(binary, cfg) if profile.Gpus != "" { - if !gpu.Supported() { + if _, ok := gpu.Supported(runner); !ok { profile.Gpus = "" dbus.NotifyOrLog("qubesome error", "GPU support was not detected, disabling it for qubesome") } @@ -503,10 +503,8 @@ func createNewDisplay(bin string, ca, cert, key []byte, profile *types.Profile, dockerArgs = append(dockerArgs, "-v="+xdgRuntimeDir+":/run/user/1000") } if profile.HostAccess.Gpus != "" { - if strings.HasSuffix(bin, "podman") { - dockerArgs = append(dockerArgs, "--device=nvidia.com/gpu=all") - } else { - dockerArgs = append(dockerArgs, "--gpus", profile.HostAccess.Gpus) + if gpus, ok := gpu.Supported(profile.Runner); ok { + dockerArgs = append(dockerArgs, gpus) } } diff --git a/internal/runners/docker/run.go b/internal/runners/docker/run.go index b120bc3..75985b0 100644 --- a/internal/runners/docker/run.go +++ b/internal/runners/docker/run.go @@ -41,13 +41,6 @@ func Run(ew types.EffectiveWorkload) error { return fmt.Errorf("failed to get named devices: %w", err) } - if wl.HostAccess.Gpus != "" { - if !gpu.Supported() { - wl.HostAccess.Gpus = "" - dbus.NotifyOrLog("qubesome error", "GPU support was not detected, disabling it for qubesome") - } - } - var paths []string // Mount localtime into container. This file may be a symlink, if so, // mount the underlying file as well. @@ -79,8 +72,13 @@ func Run(ew types.EffectiveWorkload) error { } if wl.HostAccess.Gpus != "" { - args = append(args, "--gpus", wl.HostAccess.Gpus) - args = append(args, "--runtime=nvidia") + gpu, ok := gpu.Supported("podman") + if !ok { + wl.HostAccess.Gpus = "" + dbus.NotifyOrLog("qubesome error", "GPU support was not detected, disabling it for qubesome") + } else { + args = append(args, gpu) + } } for _, cap := range wl.HostAccess.CapsAdd { diff --git a/internal/runners/podman/run.go b/internal/runners/podman/run.go index 5677a38..c4c3b25 100644 --- a/internal/runners/podman/run.go +++ b/internal/runners/podman/run.go @@ -41,13 +41,6 @@ func Run(ew types.EffectiveWorkload) error { return fmt.Errorf("failed to get named devices: %w", err) } - if wl.HostAccess.Gpus != "" { - if !gpu.Supported() { - wl.HostAccess.Gpus = "" - dbus.NotifyOrLog("qubesome error", "GPU support was not detected, disabling it for qubesome") - } - } - var paths []string // Mount localtime into container. This file may be a symlink, if so, // mount the underlying file as well. @@ -82,7 +75,13 @@ func Run(ew types.EffectiveWorkload) error { } if wl.HostAccess.Gpus != "" { - args = append(args, "--device=nvidia.com/gpu=all") + gpu, ok := gpu.Supported("podman") + if !ok { + wl.HostAccess.Gpus = "" + dbus.NotifyOrLog("qubesome error", "GPU support was not detected, disabling it for qubesome") + } else { + args = append(args, gpu) + } } for _, cap := range wl.HostAccess.CapsAdd { diff --git a/internal/util/gpu/gpu.go b/internal/util/gpu/gpu.go index 38fd5de..64a675c 100644 --- a/internal/util/gpu/gpu.go +++ b/internal/util/gpu/gpu.go @@ -2,12 +2,23 @@ package gpu import "os/exec" -// Supported checks whether nvidia gpu sharing is supported by the system. +// Supported checks whether GPU sharing is supported by the system, based +// on either NVidia or AMD toolkits being instead. // -// At present it only checks whether nvidia-container-toolkit is in the PATH. +// At present it only checks whether nvidia-container-toolkit or +// rocm-smi are in the PATH. // In the future, it should attempt to run a container to confirm it is // properly configured and useable. -func Supported() bool { - path, err := exec.LookPath("nvidia-container-toolkit") - return path != "" && err == nil +func Supported(runner string) (string, bool) { + if path, _ := exec.LookPath("nvidia-container-toolkit"); path != "" { + if runner == "podman" { + return "--device=nvidia.com/gpu=all", true + } + return "--gpus=all", true + } + // AMD GPU. + if path, _ := exec.LookPath("rocm-smi"); path != "" { + return "--device=/dev/kfd", true + } + return "", false } From b1b77a5311868209ff51a0a21779dfaafed8de89 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Tue, 17 Dec 2024 00:41:32 +0000 Subject: [PATCH 2/2] Add Leap to README Signed-off-by: Paulo Gomes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6199884..a70922e 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ storage, etc). go install github.com/qubesome/cli/cmd/qubesome@latest ``` -##### For Tumbleweed users +##### For Leap and Tumbleweed users ``` zypper install -y qubesome ```