diff --git a/internal/pkg/config/filesystem.go b/internal/pkg/config/filesystem.go index 39874e42..6bc94c7f 100644 --- a/internal/pkg/config/filesystem.go +++ b/internal/pkg/config/filesystem.go @@ -2,6 +2,7 @@ package config type FilesystemConfig struct { MGSDevice string + MGSHost string MaxMDTs uint HostGroup string AnsibleDir string @@ -14,6 +15,7 @@ func GetFilesystemConfig() FilesystemConfig { env := DefaultEnv conf := FilesystemConfig{ MGSDevice: getString(env, "DAC_MGS_DEV", "sdb"), + MGSHost: getString(env, "DAC_MGS_HOST", "localhost"), MaxMDTs: getUint(env, "DAC_MAX_MDT_COUNT", 24), HostGroup: getString(env, "DAC_HOST_GROUP", "dac-prod"), AnsibleDir: getString(env, "DAC_ANSIBLE_DIR", "/var/lib/data-acc/fs-ansible/"), diff --git a/internal/pkg/filesystem_impl/ansible.go b/internal/pkg/filesystem_impl/ansible.go index b7b3c06b..94eaae41 100644 --- a/internal/pkg/filesystem_impl/ansible.go +++ b/internal/pkg/filesystem_impl/ansible.go @@ -46,9 +46,7 @@ func (*ansibleImpl) CreateEnvironment(session datamodel.Session) (string, error) return setupAnsible(Lustre, session.FilesystemStatus.InternalName, session.AllocatedBricks) } -var conf = config.GetFilesystemConfig() - -func getFSInfo(fsType FSType, fsUuid string, allBricks []datamodel.Brick) FSInfo { +func getFSInfo(fsType FSType, fsUuid string, allBricks []datamodel.Brick, conf config.FilesystemConfig) FSInfo { // give all bricks an index, using the random ordering of allBricks var allAllocations []datamodel.BrickAllocation for i, brick := range allBricks { @@ -101,7 +99,7 @@ func getFSInfo(fsType FSType, fsUuid string, allBricks []datamodel.Brick) FSInfo hostInfo := HostInfo{hostName: string(host), OSTS: osts, MDTS: mdts} isPrimaryBrick := allocations[0].AllocatedIndex == 0 - if isPrimaryBrick { + if isPrimaryBrick && conf.MGSHost == "localhost" { if fsType == Lustre { hostInfo.MGS = conf.MGSDevice } else { @@ -111,6 +109,18 @@ func getFSInfo(fsType FSType, fsUuid string, allBricks []datamodel.Brick) FSInfo } hosts[string(host)] = hostInfo } + + // Add MGSHost override, which may or may not be an existing host + if conf.MGSHost != "localhost" && fsType == Lustre { + hostInfo, ok := hosts[conf.MGSHost] + if !ok { + hostInfo = HostInfo{hostName: conf.MGSHost} + } + hostInfo.MGS = conf.MGSDevice + hosts[conf.MGSHost] = hostInfo + mgsnode = hostInfo.MGS + } + // TODO: add attachments? fsinfo := FSInfo{ Vars: map[string]string{ @@ -126,7 +136,8 @@ func getFSInfo(fsType FSType, fsUuid string, allBricks []datamodel.Brick) FSInfo } func getInventory(fsType FSType, fsUuid string, allBricks []datamodel.Brick) string { - fsinfo := getFSInfo(fsType, fsUuid, allBricks) + conf := config.GetFilesystemConfig() + fsinfo := getFSInfo(fsType, fsUuid, allBricks, conf) fsname := fmt.Sprintf("%s", fsUuid) data := Wrapper{Dacs: FileSystems{Children: map[string]FSInfo{fsname: fsinfo}}} @@ -155,6 +166,7 @@ func getPlaybook(fsType FSType, fsUuid string) string { } func getAnsibleDir(suffix string) string { + conf := config.GetFilesystemConfig() return path.Join(conf.AnsibleDir, suffix) } @@ -251,6 +263,7 @@ func executeAnsiblePlaybook(dir string, args string) error { cmdStr := fmt.Sprintf(`cd %s; . .venv/bin/activate; ansible-playbook %s;`, dir, args) log.Println("Requested ansible:", cmdStr) + conf := config.GetFilesystemConfig() if conf.SkipAnsible { log.Println("Skip as DAC_SKIP_ANSIBLE=True") time.Sleep(time.Millisecond * 200) diff --git a/internal/pkg/filesystem_impl/ansible_test.go b/internal/pkg/filesystem_impl/ansible_test.go index 48858f97..f7715138 100644 --- a/internal/pkg/filesystem_impl/ansible_test.go +++ b/internal/pkg/filesystem_impl/ansible_test.go @@ -2,6 +2,7 @@ package filesystem_impl import ( "fmt" + "github.com/RSE-Cambridge/data-acc/internal/pkg/config" "github.com/RSE-Cambridge/data-acc/internal/pkg/datamodel" "github.com/stretchr/testify/assert" "testing" @@ -185,10 +186,13 @@ func TestPlugin_GetFSInfo_MaxMDT_lessHosts(t *testing.T) { } fsUuid := "abcdefgh" - result := getFSInfo(Lustre, fsUuid, brickAllocations) + conf := config.GetFilesystemConfig() + conf.MGSHost = "dac5" + conf.MGSDevice = "loop0" + result := getFSInfo(Lustre, fsUuid, brickAllocations, conf) resultStr := fmt.Sprintf("%+v", result.Hosts) expected := `map[` + - `dac1:{hostName:dac1 MGS:sdb MDTS:map[nvme1n1:0 nvme2n1:1 nvme3n1:2 nvme4n1:3] ` + + `dac1:{hostName:dac1 MGS: MDTS:map[nvme1n1:0 nvme2n1:1 nvme3n1:2 nvme4n1:3] ` + `OSTS:map[nvme11n1:30 nvme1n1:0 nvme2n1:1 nvme3n1:2 nvme4n1:3 nvme5n1:4 nvme6n1:5]} ` + `dac2:{hostName:dac2 MGS: MDTS:map[nvme1n1:4 nvme2n1:5 nvme3n1:6 nvme4n1:7] ` + `OSTS:map[nvme11n1:31 nvme1n1:6 nvme2n1:7 nvme3n1:8 nvme4n1:9 nvme5n1:10 nvme6n1:11]} ` + @@ -196,7 +200,24 @@ func TestPlugin_GetFSInfo_MaxMDT_lessHosts(t *testing.T) { `OSTS:map[nvme1n1:12 nvme2n1:13 nvme3n1:14 nvme4n1:15 nvme5n1:16 nvme6n1:17]} ` + `dac4:{hostName:dac4 MGS: MDTS:map[nvme1n1:12 nvme2n1:13 nvme3n1:14 nvme4n1:15] ` + `OSTS:map[nvme1n1:18 nvme2n1:19 nvme3n1:20 nvme4n1:21 nvme5n1:22 nvme6n1:23]} ` + - `dac5:{hostName:dac5 MGS: MDTS:map[nvme1n1:16 nvme2n1:17 nvme3n1:18 nvme4n1:19] ` + + `dac5:{hostName:dac5 MGS:loop0 MDTS:map[nvme1n1:16 nvme2n1:17 nvme3n1:18 nvme4n1:19] ` + `OSTS:map[nvme1n1:24 nvme2n1:25 nvme3n1:26 nvme4n1:27 nvme5n1:28 nvme6n1:29]}]` assert.Equal(t, expected, resultStr) + + conf.MGSHost = "slurmmaster1" + result2 := getFSInfo(Lustre, fsUuid, brickAllocations, conf) + resultStr2 := fmt.Sprintf("%+v", result2.Hosts) + expected2 := `map[` + + `dac1:{hostName:dac1 MGS: MDTS:map[nvme1n1:0 nvme2n1:1 nvme3n1:2 nvme4n1:3] ` + + `OSTS:map[nvme11n1:30 nvme1n1:0 nvme2n1:1 nvme3n1:2 nvme4n1:3 nvme5n1:4 nvme6n1:5]} ` + + `dac2:{hostName:dac2 MGS: MDTS:map[nvme1n1:4 nvme2n1:5 nvme3n1:6 nvme4n1:7] ` + + `OSTS:map[nvme11n1:31 nvme1n1:6 nvme2n1:7 nvme3n1:8 nvme4n1:9 nvme5n1:10 nvme6n1:11]} ` + + `dac3:{hostName:dac3 MGS: MDTS:map[nvme1n1:8 nvme2n1:9 nvme3n1:10 nvme4n1:11] ` + + `OSTS:map[nvme1n1:12 nvme2n1:13 nvme3n1:14 nvme4n1:15 nvme5n1:16 nvme6n1:17]} ` + + `dac4:{hostName:dac4 MGS: MDTS:map[nvme1n1:12 nvme2n1:13 nvme3n1:14 nvme4n1:15] ` + + `OSTS:map[nvme1n1:18 nvme2n1:19 nvme3n1:20 nvme4n1:21 nvme5n1:22 nvme6n1:23]} ` + + `dac5:{hostName:dac5 MGS: MDTS:map[nvme1n1:16 nvme2n1:17 nvme3n1:18 nvme4n1:19] ` + + `OSTS:map[nvme1n1:24 nvme2n1:25 nvme3n1:26 nvme4n1:27 nvme5n1:28 nvme6n1:29]} `+ + `slurmmaster1:{hostName:slurmmaster1 MGS:loop0 MDTS:map[] OSTS:map[]}]` + assert.Equal(t, expected2, resultStr2) } diff --git a/internal/pkg/filesystem_impl/mount.go b/internal/pkg/filesystem_impl/mount.go index 0b3ccc65..e4b4d485 100644 --- a/internal/pkg/filesystem_impl/mount.go +++ b/internal/pkg/filesystem_impl/mount.go @@ -2,6 +2,7 @@ package filesystem_impl import ( "fmt" + "github.com/RSE-Cambridge/data-acc/internal/pkg/config" "github.com/RSE-Cambridge/data-acc/internal/pkg/datamodel" "log" "os/exec" @@ -34,6 +35,7 @@ func mount(fsType FSType, sessionName datamodel.SessionName, isMultiJob bool, in // TODO: Move Lustre mount here that is done below //executeAnsibleMount(fsType, volume, brickAllocations) } + conf := config.GetFilesystemConfig() var mountDir = getMountDir(sessionName, isMultiJob, attachment.SessionName) for _, attachHost := range attachment.Hosts { @@ -224,6 +226,7 @@ func mountRemoteFilesystem(fsType FSType, hostname string, lnetSuffix string, mg func mountLustre(hostname string, lnetSuffix string, mgtHost string, fsname string, directory string) error { // We assume modprobe -v lustre is already done // First check if we are mounted already + conf := config.GetFilesystemConfig() if err := runner.Execute(hostname, true, fmt.Sprintf("grep %s /etc/mtab", directory)); err != nil || conf.SkipAnsible { if err := runner.Execute(hostname, true, fmt.Sprintf( "mount -t lustre -o flock,nodev,nosuid %s%s:/%s %s", @@ -258,6 +261,7 @@ type run struct { func (*run) Execute(hostname string, asRoot bool, cmdStr string) error { log.Println("SSH to:", hostname, "with command:", cmdStr) + conf := config.GetFilesystemConfig() if conf.SkipAnsible { log.Println("Skip as DAC_SKIP_ANSIBLE=True") time.Sleep(time.Millisecond * 200)