Skip to content

Fix compatibility with nvidia-container-cli 1.17.7 #11739

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -2067,6 +2067,10 @@ func nvproxySetup(spec *specs.Spec, conf *config.Config, goferPid int) error {
fmt.Sprintf("--pid=%d", goferPid),
fmt.Sprintf("--device=%s", devices),
}
if nvidiaContainerCliNeedsCudaCompatModeFlag(cliPath) {
// "mount" is the flag's intended default value.
argv = append(argv, "--cuda-compat-mode=mount")
}
// Pass driver capabilities allowed by configuration as flags. See
// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/main.go:doPrestart().
driverCaps, err := specutils.NVProxyDriverCapsFromEnv(spec, conf)
Expand All @@ -2091,6 +2095,51 @@ func nvproxySetup(spec *specs.Spec, conf *config.Config, goferPid int) error {
return nil
}

func nvidiaContainerCliNeedsCudaCompatModeFlag(cliPath string) bool {
cmd := exec.Cmd{
Path: cliPath,
Args: []string{cliPath, "--version"},
}
log.Debugf("Executing %q", cmd.Args)
out, err := cmd.Output()
if err != nil {
log.Warningf("Failed to execute nvidia-container-cli --version: %v", err)
return false
}
m := regexp.MustCompile(`^cli-version: (\d+)\.(\d+)\.(\d+)`).FindSubmatch(out)
if m == nil {
log.Warningf("Failed to find version number in nvidia-container-cli --version: %s", out)
return false
}
major, err := strconv.Atoi(string(m[1]))
if err != nil {
log.Warningf("Invalid major version number in nvidia-container-cli --version: %v", err)
return false
}
minor, err := strconv.Atoi(string(m[2]))
if err != nil {
log.Warningf("Invalid minor version number in nvidia-container-cli --version: %v", err)
return false
}
release, err := strconv.Atoi(string(m[3]))
if err != nil {
log.Warningf("Invalid release version number in nvidia-container-cli --version: %v", err)
return false
}
// The --cuda-compat-mode flag first appears in nvidia-container-cli
// 1.17.7. Prior to https://github.com/NVIDIA/libnvidia-container/pull/310,
// which as of this writing has not been merged into any version of
// nvidia-container-cli, failing to pass this flag to nvidia-container-cli
// configure causes all other flags to be ignored.
if major == 1 {
if minor == 17 {
return release >= 7
}
return minor > 17
}
return major > 1
}

// CheckStopped checks if the container is stopped and updates its status.
func (c *Container) CheckStopped() {
if state, err := c.Sandbox.ContainerRuntimeState(c.ID); err != nil {
Expand Down