Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add config file watch and resta mig-parted #111

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 93 additions & 3 deletions cmd/nvidia-mig-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"strings"
"sync"

"github.com/fsnotify/fsnotify"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"

Expand Down Expand Up @@ -66,6 +67,7 @@ var (
hostMigManagerStateFileFlag string
hostKubeletSystemdServiceFlag string
defaultGPUClientsNamespaceFlag string
configWatchFlag bool

cdiEnabledFlag bool
driverRoot string
Expand Down Expand Up @@ -142,6 +144,14 @@ func main() {
Destination: &configFileFlag,
EnvVars: []string{"CONFIG_FILE"},
},
&cli.BoolFlag{
Name: "config-watch",
Aliases: []string{"w"},
Value: false,
Usage: "set config file watch and reload",
Destination: &configWatchFlag,
EnvVars: []string{"CONFIG_WATCH"},
},
&cli.StringFlag{
Name: "reconfigure-script",
Aliases: []string{"s"},
Expand Down Expand Up @@ -300,17 +310,49 @@ func start(c *cli.Context) error {
stop := ContinuouslySyncMigConfigChanges(clientset, migConfig)
defer close(stop)

labelChangeSignal := make(chan string)
configFileChangeSignal := make(chan struct{})
lastReadValue := ""
migConfigGet := func(migConfig *SyncableMigConfig, signal chan string) {
defer close(labelChangeSignal)
for {
log.Infof("Waiting for change to '%s' label", MigConfigLabel)
value := migConfig.Get()
lastReadValue = value
log.Infof("Updating to MIG config: %s", value)
signal <- value
}
}
go migConfigGet(migConfig, labelChangeSignal)
var labelChangeSignalClosed bool = true
var configFileChangeSignalClosed bool = false
if configWatchFlag {
configFileChangeSignalClosed = true
go ReloadSignalByWatchConfig(configFileFlag, configFileChangeSignal)
}
for {
log.Infof("Waiting for change to '%s' label", MigConfigLabel)
value := migConfig.Get()
log.Infof("Updating to MIG config: %s", value)
var value string
select {
case value, labelChangeSignalClosed = <-labelChangeSignal:
log.Infof("labelChangeSignalClosed: %v", labelChangeSignalClosed)
case _, configFileChangeSignalClosed = <-configFileChangeSignal:
log.Infof("configFileChangeSignalClosed: %v", configFileChangeSignalClosed)
if lastReadValue == "" {
continue
}
value = lastReadValue
}
if !labelChangeSignalClosed && !configFileChangeSignalClosed {
break
}
err := runScript(value, driverLibraryPath, nvidiaSMIPath)
if err != nil {
log.Errorf("Error: %s", err)
continue
}
log.Infof("Successfully updated to MIG config: %s", value)
}
return nil
}

// getPathsForCDI discovers the paths to libnvidia-ml.so.1 and nvidia-smi
Expand Down Expand Up @@ -431,3 +473,51 @@ func ContinuouslySyncMigConfigChanges(clientset *kubernetes.Clientset, migConfig
go controller.Run(stop)
return stop
}

func ReloadSignalByWatchConfig(configFile string, signal chan struct{}) {
defer close(signal)
oldConfigYaml, err := os.ReadFile(configFile)
if err != nil {
log.Errorf("read config file %s error %+v", configFile, err)
return
}
watcher, err := fsnotify.NewWatcher()
if err != nil {
log.Fatalf("Error creating watcher: %v", err)
return
}
defer watcher.Close()
err = watcher.Add(configFile)
if err != nil {
log.Fatalf("Error adding file to watcher: %v", err)
return
}
log.Infof("Watching config file %s", configFile)
for {
select {
case event, ok := <-watcher.Events:
if !ok {
log.Infof("Watcher closed events")
return
}
if event.Op&fsnotify.Remove == fsnotify.Remove {
log.Infof("File removed: %s", event.Name)
watcher.Add(event.Name)
newConfigYaml, err := os.ReadFile(configFile)
if err != nil {
log.Errorf("read config file %s error %+v", configFile, err)
break
}
if string(oldConfigYaml) != string(newConfigYaml) {
signal <- struct{}{}
}
}
case err, ok := <-watcher.Errors:
if !ok {
log.Infof("Watcher closed errors")
return
}
log.Printf("Error: %v", err)
}
}
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ toolchain go1.22.5
require (
github.com/NVIDIA/go-nvlib v0.6.1
github.com/NVIDIA/go-nvml v0.12.4-0
github.com/fsnotify/fsnotify v1.7.0
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
Expand Down
13 changes: 13 additions & 0 deletions vendor/github.com/fsnotify/fsnotify/.cirrus.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions vendor/github.com/fsnotify/fsnotify/.editorconfig

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vendor/github.com/fsnotify/fsnotify/.gitattributes

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions vendor/github.com/fsnotify/fsnotify/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions vendor/github.com/fsnotify/fsnotify/.mailmap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading