diff --git a/docs/gpu-feature-discovery/README.md b/docs/gpu-feature-discovery/README.md index 965740420..57e63acb7 100644 --- a/docs/gpu-feature-discovery/README.md +++ b/docs/gpu-feature-discovery/README.md @@ -194,21 +194,22 @@ Environment variables override the command line options if they conflict. This is the list of the labels generated by NVIDIA GPU Feature Discovery and their meaning: -| Label Name | Value Type | Meaning | Example | -| -------------------------------| ---------- | -------------------------------------------- | -------------- | -| nvidia.com/cuda.driver.major | Integer | Major of the version of NVIDIA driver | 418 | -| nvidia.com/cuda.driver.minor | Integer | Minor of the version of NVIDIA driver | 30 | -| nvidia.com/cuda.driver.rev | Integer | Revision of the version of NVIDIA driver | 40 | -| nvidia.com/cuda.runtime.major | Integer | Major of the version of CUDA | 10 | -| nvidia.com/cuda.runtime.minor | Integer | Minor of the version of CUDA | 1 | -| nvidia.com/gfd.timestamp | Integer | Timestamp of the generated labels (optional) | 1555019244 | -| nvidia.com/gpu.compute.major | Integer | Major of the compute capabilities | 3 | -| nvidia.com/gpu.compute.minor | Integer | Minor of the compute capabilities | 3 | -| nvidia.com/gpu.count | Integer | Number of GPUs | 2 | -| nvidia.com/gpu.family | String | Architecture family of the GPU | kepler | -| nvidia.com/gpu.machine | String | Machine type | DGX-1 | -| nvidia.com/gpu.memory | Integer | Memory of the GPU in Mb | 2048 | -| nvidia.com/gpu.product | String | Model of the GPU | GeForce-GT-710 | +| Label Name | Value Type | Meaning | Example | +| -------------------------------| ---------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------------- | +| nvidia.com/cuda.driver.major | Integer | Major of the version of NVIDIA driver | 418 | +| nvidia.com/cuda.driver.minor | Integer | Minor of the version of NVIDIA driver | 30 | +| nvidia.com/cuda.driver.rev | Integer | Revision of the version of NVIDIA driver | 40 | +| nvidia.com/cuda.runtime.major | Integer | Major of the version of CUDA | 10 | +| nvidia.com/cuda.runtime.minor | Integer | Minor of the version of CUDA | 1 | +| nvidia.com/gfd.timestamp | Integer | Timestamp of the generated labels (optional) | 1555019244 | +| nvidia.com/gpu.compute.major | Integer | Major of the compute capabilities | 3 | +| nvidia.com/gpu.compute.minor | Integer | Minor of the compute capabilities | 3 | +| nvidia.com/gpu.count | Integer | Number of GPUs | 2 | +| nvidia.com/gpu.family | String | Architecture family of the GPU | kepler | +| nvidia.com/gpu.machine | String | Machine type | DGX-1 | +| nvidia.com/gpu.memory | Integer | Memory of the GPU in Mb | 2048 | +| nvidia.com/gpu.product | String | Model of the GPU | GeForce-GT-710 | +| nvidia.com/gpu.mode | String | Display or Compute Mode of the GPU. Details of the GPU modes can be found [here](https://docs.nvidia.com/grid/13.0/grid-gpumodeswitch-user-guide/index.html#compute-and-graphics-mode) | compute | Depending on the MIG strategy used, the following set of labels may also be available (or override the default values for some of the labels listed above): diff --git a/internal/lm/nvml.go b/internal/lm/nvml.go index ed5004666..0b5ed6e9a 100644 --- a/internal/lm/nvml.go +++ b/internal/lm/nvml.go @@ -22,6 +22,10 @@ import ( "strconv" "strings" + "k8s.io/klog/v2" + + "github.com/NVIDIA/go-nvlib/pkg/nvpci" + spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1" "github.com/NVIDIA/k8s-device-plugin/internal/resource" ) @@ -71,12 +75,18 @@ func NewDeviceLabeler(manager resource.Manager, config *spec.Config) (Labeler, e return nil, fmt.Errorf("error creating resource labeler: %v", err) } + gpuModeLabeler, err := newGPUModeLabeler(devices) + if err != nil { + return nil, fmt.Errorf("error creating resource labeler: %v", err) + } + l := Merge( machineTypeLabeler, versionLabeler, migCapabilityLabeler, sharingLabeler, resourceLabeler, + gpuModeLabeler, ) return l, nil @@ -193,3 +203,54 @@ func isMPSCapable(manager resource.Manager) (bool, error) { } return true, nil } + +// newGPUModeLabeler creates a new labeler that reports the mode of GPUs on the node. +// GPUs can be in Graphics or Compute mode. +func newGPUModeLabeler(devices []resource.Device) (Labeler, error) { + classes, err := getDeviceClasses(devices) + if err != nil { + return nil, err + } + gpuMode := getModeForClasses(classes) + labels := Labels{ + "nvidia.com/gpu.mode": gpuMode, + } + return labels, nil +} + +func getModeForClasses(classes []uint32) string { + if len(classes) == 0 { + return "unknown" + } + for _, class := range classes { + if class != classes[0] { + klog.Infof("Not all GPU devices belong to the same class %#06x ", classes) + return "unknown" + } + } + switch classes[0] { + case nvpci.PCIVgaControllerClass: + return "graphics" + case nvpci.PCI3dControllerClass: + return "compute" + default: + return "unknown" + } +} + +func getDeviceClasses(devices []resource.Device) ([]uint32, error) { + seenClasses := make(map[uint32]bool) + for _, d := range devices { + class, err := d.GetPCIClass() + if err != nil { + return nil, err + } + seenClasses[class] = true + } + + var classes []uint32 + for class := range seenClasses { + classes = append(classes, class) + } + return classes, nil +} diff --git a/internal/lm/nvml_test.go b/internal/lm/nvml_test.go index f3ec71806..073721cd8 100644 --- a/internal/lm/nvml_test.go +++ b/internal/lm/nvml_test.go @@ -204,3 +204,89 @@ func TestSharingLabeler(t *testing.T) { }) } } + +func TestGPUModeLabeler(t *testing.T) { + testCases := []struct { + description string + devices []resource.Device + expectedError bool + expectedLabels map[string]string + }{ + { + description: "single device with compute PCI class", + devices: []resource.Device{ + rt.NewDeviceWithPCIClassMock(0x030000), + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.mode": "graphics", + }, + }, + { + description: "single device with graphics PCI class", + devices: []resource.Device{ + rt.NewDeviceWithPCIClassMock(0x030200), + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.mode": "compute", + }, + }, + { + description: "single device with switch PCI class", + devices: []resource.Device{ + rt.NewDeviceWithPCIClassMock(0x068000), + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.mode": "unknown", + }, + }, + { + description: "multiple device have same graphics PCI class", + devices: []resource.Device{ + rt.NewDeviceWithPCIClassMock(0x030200), + rt.NewDeviceWithPCIClassMock(0x030200), + rt.NewDeviceWithPCIClassMock(0x030200), + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.mode": "compute", + }, + }, + { + description: "multiple device have same compute PCI class", + devices: []resource.Device{ + rt.NewDeviceWithPCIClassMock(0x030000), + rt.NewDeviceWithPCIClassMock(0x030000), + rt.NewDeviceWithPCIClassMock(0x030000), + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.mode": "graphics", + }, + }, + { + description: "multiple device with some with graphics and others with compute PCI class", + devices: []resource.Device{ + rt.NewDeviceWithPCIClassMock(0x030000), + rt.NewDeviceWithPCIClassMock(0x030200), + rt.NewDeviceWithPCIClassMock(0x030000), + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.mode": "unknown", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + + gpuModeLabeler, _ := newGPUModeLabeler(tc.devices) + + labels, err := gpuModeLabeler.Labels() + if tc.expectedError { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + require.EqualValues(t, tc.expectedLabels, labels) + }) + } +} diff --git a/internal/resource/cuda-device.go b/internal/resource/cuda-device.go index b6bafc249..a4f4bc4a4 100644 --- a/internal/resource/cuda-device.go +++ b/internal/resource/cuda-device.go @@ -96,3 +96,7 @@ func (d *cudaDevice) IsMigCapable() (bool, error) { func (d *cudaDevice) IsMigEnabled() (bool, error) { return false, nil } + +func (d *cudaDevice) GetPCIClass() (uint32, error) { + return 0, nil +} diff --git a/internal/resource/device_mock.go b/internal/resource/device_mock.go index 2f48f89e0..1024ec96b 100644 --- a/internal/resource/device_mock.go +++ b/internal/resource/device_mock.go @@ -32,6 +32,9 @@ var _ Device = &DeviceMock{} // GetNameFunc: func() (string, error) { // panic("mock out the GetName method") // }, +// GetPCIClassFunc: func() (uint32, error) { +// panic("mock out the GetPCIClass method") +// }, // GetTotalMemoryMBFunc: func() (uint64, error) { // panic("mock out the GetTotalMemoryMB method") // }, @@ -63,6 +66,9 @@ type DeviceMock struct { // GetNameFunc mocks the GetName method. GetNameFunc func() (string, error) + // GetPCIClassFunc mocks the GetPCIClass method. + GetPCIClassFunc func() (uint32, error) + // GetTotalMemoryMBFunc mocks the GetTotalMemoryMB method. GetTotalMemoryMBFunc func() (uint64, error) @@ -89,6 +95,9 @@ type DeviceMock struct { // GetName holds details about calls to the GetName method. GetName []struct { } + // GetPCIClass holds details about calls to the GetPCIClass method. + GetPCIClass []struct { + } // GetTotalMemoryMB holds details about calls to the GetTotalMemoryMB method. GetTotalMemoryMB []struct { } @@ -104,6 +113,7 @@ type DeviceMock struct { lockGetDeviceHandleFromMigDeviceHandle sync.RWMutex lockGetMigDevices sync.RWMutex lockGetName sync.RWMutex + lockGetPCIClass sync.RWMutex lockGetTotalMemoryMB sync.RWMutex lockIsMigCapable sync.RWMutex lockIsMigEnabled sync.RWMutex @@ -244,6 +254,33 @@ func (mock *DeviceMock) GetNameCalls() []struct { return calls } +// GetPCIClass calls GetPCIClassFunc. +func (mock *DeviceMock) GetPCIClass() (uint32, error) { + if mock.GetPCIClassFunc == nil { + panic("DeviceMock.GetPCIClassFunc: method is nil but Device.GetPCIClass was just called") + } + callInfo := struct { + }{} + mock.lockGetPCIClass.Lock() + mock.calls.GetPCIClass = append(mock.calls.GetPCIClass, callInfo) + mock.lockGetPCIClass.Unlock() + return mock.GetPCIClassFunc() +} + +// GetPCIClassCalls gets all the calls that were made to GetPCIClass. +// Check the length with: +// +// len(mockedDevice.GetPCIClassCalls()) +func (mock *DeviceMock) GetPCIClassCalls() []struct { +} { + var calls []struct { + } + mock.lockGetPCIClass.RLock() + calls = mock.calls.GetPCIClass + mock.lockGetPCIClass.RUnlock() + return calls +} + // GetTotalMemoryMB calls GetTotalMemoryMBFunc. func (mock *DeviceMock) GetTotalMemoryMB() (uint64, error) { if mock.GetTotalMemoryMBFunc == nil { diff --git a/internal/resource/nvml-device.go b/internal/resource/nvml-device.go index 58501e09d..1184657d2 100644 --- a/internal/resource/nvml-device.go +++ b/internal/resource/nvml-device.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvlib/pkg/nvpci" "github.com/NVIDIA/go-nvml/pkg/nvml" ) @@ -86,3 +87,15 @@ func (d nvmlDevice) GetTotalMemoryMB() (uint64, error) { } return info.Total / (1024 * 1024), nil } + +func (d nvmlDevice) GetPCIClass() (uint32, error) { + pciBusID, err := d.GetPCIBusID() + if err != nil { + return 0, err + } + nvDevice, err := nvpci.New().GetGPUByPciBusID(pciBusID) + if err != nil { + return 0, err + } + return nvDevice.Class, nil +} diff --git a/internal/resource/nvml-mig-device.go b/internal/resource/nvml-mig-device.go index 599c27ce2..8ef933ff5 100644 --- a/internal/resource/nvml-mig-device.go +++ b/internal/resource/nvml-mig-device.go @@ -21,6 +21,7 @@ import ( "strings" "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvlib/pkg/nvpci" "github.com/NVIDIA/go-nvml/pkg/nvml" ) @@ -132,3 +133,8 @@ func totalMemory(attr map[string]interface{}) (uint64, error) { return 0, fmt.Errorf("unsupported attribute type %v", t) } } + +func (d nvmlMigDevice) GetPCIClass() (uint32, error) { + // GPU devices that support MIG do not support switching mode between graphics and compute, so they are always in compute mode. + return nvpci.PCI3dControllerClass, nil +} diff --git a/internal/resource/sysfs-device.go b/internal/resource/sysfs-device.go index 9cce3b1fb..105229fe4 100644 --- a/internal/resource/sysfs-device.go +++ b/internal/resource/sysfs-device.go @@ -64,3 +64,7 @@ func (d vfioDevice) IsMigEnabled() (bool, error) { func (d vfioDevice) IsMigCapable() (bool, error) { return false, nil } + +func (d vfioDevice) GetPCIClass() (uint32, error) { + return d.nvidiaPCIDevice.Class, nil +} diff --git a/internal/resource/testing/resource-testing.go b/internal/resource/testing/resource-testing.go index a19916cb7..d4dc6216d 100644 --- a/internal/resource/testing/resource-testing.go +++ b/internal/resource/testing/resource-testing.go @@ -51,6 +51,14 @@ func NewDeviceMock(migEnabled bool) *DeviceMock { IsMigEnabledFunc: func() (bool, error) { return migEnabled, nil }, IsMigCapableFunc: func() (bool, error) { return migEnabled, nil }, GetMigDevicesFunc: func() ([]resource.Device, error) { return nil, nil }, + GetPCIClassFunc: func() (uint32, error) { return 0, nil }, + }} + return &d +} + +func NewDeviceWithPCIClassMock(pciClass uint32) *DeviceMock { + d := DeviceMock{resource.DeviceMock{ + GetPCIClassFunc: func() (uint32, error) { return pciClass, nil }, }} return &d } diff --git a/internal/resource/types.go b/internal/resource/types.go index 4ef05a957..ec89ec579 100644 --- a/internal/resource/types.go +++ b/internal/resource/types.go @@ -39,4 +39,5 @@ type Device interface { GetTotalMemoryMB() (uint64, error) GetDeviceHandleFromMigDeviceHandle() (Device, error) GetCudaComputeCapability() (int, int, error) + GetPCIClass() (uint32, error) } diff --git a/tests/expected-output-mig-mixed.txt b/tests/expected-output-mig-mixed.txt index a84b82968..f80392607 100644 --- a/tests/expected-output-mig-mixed.txt +++ b/tests/expected-output-mig-mixed.txt @@ -18,6 +18,7 @@ nvidia\.com\/gpu\.sharing-strategy=[none|mps|time-slicing] nvidia\.com\/gpu\.product=[A-Za-z_-]+ nvidia\.com\/gpu\.memory=[0-9]+ nvidia\.com\/gpu\.family=[a-z]+ +nvidia\.com\/gpu\.mode=[unknown|compute|graphics] nvidia\.com\/mig\.capable=[true|false] nvidia\.com\/gpu\.compute\.major=[0-9]+ nvidia\.com\/gpu\.compute\.minor=[0-9]+ diff --git a/tests/expected-output-mig-none.txt b/tests/expected-output-mig-none.txt index d388c02d2..0f2468d77 100644 --- a/tests/expected-output-mig-none.txt +++ b/tests/expected-output-mig-none.txt @@ -18,6 +18,7 @@ nvidia\.com\/gpu\.sharing-strategy=[none|mps|time-slicing] nvidia\.com\/gpu\.product=[A-Za-z_-]+ nvidia\.com\/gpu\.memory=[0-9]+ nvidia\.com\/gpu\.family=[a-z]+ +nvidia\.com\/gpu\.mode=[unknown|compute|graphics] nvidia\.com\/mig\.capable=[true|false] nvidia\.com\/gpu\.compute\.major=[0-9]+ nvidia\.com\/gpu\.compute\.minor=[0-9]+ diff --git a/tests/expected-output-mig-single.txt b/tests/expected-output-mig-single.txt index 055d7cc99..13691a5c6 100644 --- a/tests/expected-output-mig-single.txt +++ b/tests/expected-output-mig-single.txt @@ -30,4 +30,5 @@ nvidia\.com\/gpu\.engines\.jpeg=[0-9]+ nvidia\.com\/gpu\.engines\.ofa=[0-9]+ nvidia\.com\/gpu\.slices\.gi=[0-9]+ nvidia\.com\/gpu\.slices\.ci=[0-9]+ +nvidia\.com\/gpu\.mode=[compute] nvidia\.com\/mps\.capable=[true|false] diff --git a/tests/expected-output.txt b/tests/expected-output.txt index d388c02d2..0f2468d77 100644 --- a/tests/expected-output.txt +++ b/tests/expected-output.txt @@ -18,6 +18,7 @@ nvidia\.com\/gpu\.sharing-strategy=[none|mps|time-slicing] nvidia\.com\/gpu\.product=[A-Za-z_-]+ nvidia\.com\/gpu\.memory=[0-9]+ nvidia\.com\/gpu\.family=[a-z]+ +nvidia\.com\/gpu\.mode=[unknown|compute|graphics] nvidia\.com\/mig\.capable=[true|false] nvidia\.com\/gpu\.compute\.major=[0-9]+ nvidia\.com\/gpu\.compute\.minor=[0-9]+