Azure · jackfrancis · Apr 12, 2021 · Apr 7, 2021 · Apr 7, 2021 · Apr 7, 2021
diff --git a/docs/topics/clusterdefinitions.md b/docs/topics/clusterdefinitions.md
@@ -111,7 +111,7 @@ $ aks-engine get-versions
 | kubernetes-dashboard                                                                                      | false                                                                                                                                                                                                       | 1                               | Deprecated. We recommend installing dashboard manually, see: https://github.com/kubernetes/dashboard for more info.                                                                                                                                                                   |
 | rescheduler                                                                                               | false                                                                                                                                                                                                      | 1                               | Deprecated, no longer available after aks-engine v0.60.0.                                                                                                                                                                                                                            |
 | [cluster-autoscaler](../../examples/addons/cluster-autoscaler/README.md)                                  | false                                                                                                                                                                                                      | 1                               | Delivers the Kubernetes cluster autoscaler component. See https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/azure for more info; only supported for VMSS clusters on the first agent pool.                                           |
-| [nvidia-device-plugin](../../examples/addons/nvidia-device-plugin/README.md)                              | true if using a Kubernetes cluster with an N-series agent pool                                                                                                                                             | 1                               | Delivers the Kubernetes NVIDIA device plugin component. See https://github.com/NVIDIA/k8s-device-plugin for more info                                                                                                                                                    |
+| [nvidia-device-plugin](../../examples/addons/nvidia-device-plugin/README.md)                              | Enabled by default if you're using the docker container runtime and have at least one N-series VM node pool. You may explicitly disable this addon if you want to manage your own GPU driver implementation, for example [using the nvidia gpu-operator solution](https://developer.nvidia.com/blog/announcing-containerd-support-for-the-nvidia-gpu-operator/). If you're using the containerd container runtime then this addon will be disabled, as this solution required docker.                                                                                                                                             | 1                               | Delivers the Kubernetes NVIDIA device plugin component. See https://github.com/NVIDIA/k8s-device-plugin for more info.                                                                                                                                                    |
 | container-monitoring                                                                                      | false                                                                                                                                                                                                      | 1                               | Delivers the Kubernetes container monitoring component                                                                                                                                                                                                                   |
 | [blobfuse-flexvolume](https://github.com/Azure/kubernetes-volume-drivers/tree/master/flexvolume/blobfuse) | true                                                                                                                                                                                                       | as many as linux agent nodes    | Access virtual filesystem backed by the Azure Blob storage                                                                                                                                                                                                               |
 | [smb-flexvolume](https://github.com/Azure/kubernetes-volume-drivers/tree/master/flexvolume/smb)           | false                                                                                                                                                                                                      | as many as linux agent nodes    | Access SMB server by using CIFS/SMB protocol                                                                                                                                                                                                                             |
@@ -261,6 +261,48 @@ The `calico` addon includes configurable verbosity via the `logSeverityScreen` c
 
 Available options for `logSeverityScreen` are documented [here](https://docs.projectcalico.org/reference/resources/felixconfig).
 
+#### nvidia-device-plugin
+
+The `nvidia-device-plugin` addon installs a nvidia driver listener via a daemonset for nodes backed by the docker container runtime. The daemonset will install itself as a pod container on each node that has GPU support (i.e., nodes backed by N-series VMs). For example a pod running on a GPU-enabled node:
+
+```sh
+$ kubectl logs nvidia-device-plugin-w8vnt -n kube-system
+2021/04/08 18:38:05 Loading NVML
+2021/04/08 18:38:05 Starting FS watcher.
+2021/04/08 18:38:05 Starting OS watcher.
+2021/04/08 18:38:05 Retreiving plugins.
+2021/04/08 18:38:05 Starting GRPC server for 'nvidia.com/gpu'
+2021/04/08 18:38:05 Starting to serve 'nvidia.com/gpu' on /var/lib/kubelet/device-plugins/nvidia.sock
+2021/04/08 18:38:05 Registered device plugin for 'nvidia.com/gpu' with Kubelet
+```
+
+This addon will be enabled automatically if the cluster is configured for docker, and has at least one N-series VM-configured node pool.
+
+If you're using one or more N-series node pools and using containerd, then this addon *will not* be installed, as containerd-backed clusters require the user to bring his or her own GPU drivers solution. The AKS Engine project recommends the official nvidia gpu-operator solution for containerd GPU solutions. See here:
+
+- https://developer.nvidia.com/blog/announcing-containerd-support-for-the-nvidia-gpu-operator/
+
+The gpu-operator solution is regularly tested against N-series node pool clusters backed by containerd.
+
+If you're using docker and would like to provide your own GPU drivers solution, you may disable the `nvidia-device-plugin` addon manually:
+
+```
+...
+"kubernetesConfig": {
+    "addons": [
+        ...
+        {
+          "name": "nvidia-device-plugin",
+          "enabled": false
+        }
+        ...
+    ]
+}
+...
+```
+
+Available options for `logSeverityScreen` are documented [here](https://docs.projectcalico.org/reference/resources/felixconfig).
+
 #### components
 
 `components` is an interface to allow for user-configurable core Kubernetes component implementations. Normally, you won't need to modify this configuration, as AKS Engine will use the best, known-working component implementations validated against Azure for all supported versions of Kubernetes. To support the rapid development of Azure + Kubernetes (e.g., Azure cloudprovider), this configuration vector may be useful for validating a custom build or configuration of the various core components on a running Azure Kubernetes cluster. Again, as with addons, this configurable vector is designed for *cluster creation only*. Using `aks-engine upgrade` on a cluster will override the original, user-configured settings during the upgrade operation, rendering an upgraded cluster with the AKS Engine defaults for `kube-controller-manager`, `cloud-controller-manager`, `kube-apiserver`, `kube-scheduler`, and `kube-addon-manager`.

diff --git a/parts/k8s/cloud-init/artifacts/cse_config.sh b/parts/k8s/cloud-init/artifacts/cse_config.sh
@@ -570,7 +570,7 @@ configAddons() {
   mkdir -p $ADDONS_DIR/init && cp $POD_SECURITY_POLICY_SPEC $ADDONS_DIR/init/ || exit {{GetCSEErrorCode "ERR_ADDONS_START_FAIL"}}
   {{- end}}
 }
-{{- if HasNSeriesSKU}}
+{{- if and HasNSeriesSKU IsNvidiaDevicePluginAddonEnabled}}
 {{- /* installNvidiaDrivers is idempotent, it will uninstall itself if it is already installed, and then install anew */}}
 installNvidiaDrivers() {
   local d="/var/lib/dkms/nvidia/${GPU_DV}" k log_file="/var/log/nvidia-installer-$(date +%s).log"

diff --git a/parts/k8s/cloud-init/artifacts/cse_main.sh b/parts/k8s/cloud-init/artifacts/cse_main.sh
@@ -139,7 +139,7 @@ fi
 {{/* this will capture the amount of time to install of the network plugin during cse */}}
 time_metric "InstallNetworkPlugin" installNetworkPlugin
 
-{{- if HasNSeriesSKU}}
+{{- if and HasNSeriesSKU IsNvidiaDevicePluginAddonEnabled}}
 if [[ ${GPU_NODE} == true ]]; then
   if $FULL_INSTALL_REQUIRED; then
     time_metric "DownloadGPUDrivers" downloadGPUDrivers

diff --git a/pkg/api/addons.go b/pkg/api/addons.go
@@ -209,7 +209,7 @@ func (cs *ContainerService) setAddonsConfig(isUpgrade bool) {
 
 	defaultNVIDIADevicePluginAddonsConfig := KubernetesAddon{
 		Name:    common.NVIDIADevicePluginAddonName,
-		Enabled: to.BoolPtr(cs.Properties.IsNvidiaDevicePluginCapable() && !cs.Properties.IsAzureStackCloud()),
+		Enabled: to.BoolPtr(cs.Properties.IsNvidiaDevicePluginCapable() && !cs.Properties.IsAzureStackCloud() && !cs.Properties.OrchestratorProfile.KubernetesConfig.NeedsContainerd()),
 		Containers: []KubernetesContainerSpec{
 			{
 				Name: common.NVIDIADevicePluginAddonName,

diff --git a/pkg/api/addons_test.go b/pkg/api/addons_test.go
@@ -1439,6 +1439,34 @@ func TestSetAddonsConfig(t *testing.T) {
 				},
 			}, "1.15.4"),
 		},
+		{
+			name: "containerd w/ N series SKU",
+			cs: &ContainerService{
+				Properties: &Properties{
+					OrchestratorProfile: &OrchestratorProfile{
+						OrchestratorVersion: "1.20.5",
+						KubernetesConfig: &KubernetesConfig{
+							KubernetesImageBaseType: common.KubernetesImageBaseTypeMCR,
+							DNSServiceIP:            DefaultKubernetesDNSServiceIP,
+							KubeletConfig: map[string]string{
+								"--cluster-domain": "cluster.local",
+							},
+							ClusterSubnet:    DefaultKubernetesSubnet,
+							ProxyMode:        KubeProxyModeIPTables,
+							NetworkPlugin:    NetworkPluginAzure,
+							ContainerRuntime: Containerd,
+						},
+					},
+					AgentPoolProfiles: []*AgentPoolProfile{
+						{
+							VMSize: "Standard_NC6",
+						},
+					},
+				},
+			},
+			isUpgrade:      false,
+			expectedAddons: getDefaultAddons("1.20.5", "", common.KubernetesImageBaseTypeMCR),
+		},
 		{
 			name: "container-monitoring addon enabled",
 			cs: &ContainerService{

diff --git a/pkg/engine/template_generator.go b/pkg/engine/template_generator.go
@@ -650,6 +650,9 @@ func getContainerServiceFuncMap(cs *api.ContainerService) template.FuncMap {
 		"IsPodSecurityPolicyAddonEnabled": func() bool {
 			return cs.Properties.OrchestratorProfile.KubernetesConfig.IsAddonEnabled(common.PodSecurityPolicyAddonName)
 		},
+		"IsNvidiaDevicePluginAddonEnabled": func() bool {
+			return cs.Properties.OrchestratorProfile.KubernetesConfig.IsAddonEnabled(common.NVIDIADevicePluginAddonName)
+		},
 		"GetAADPodIdentityTaintKey": func() string {
 			return common.AADPodIdentityTaintKey
 		},

diff --git a/pkg/engine/templates_generated.go b/pkg/engine/templates_generated.go
diff --git a/test/e2e/kubernetes/kubernetes_test.go b/test/e2e/kubernetes/kubernetes_test.go
@@ -1750,6 +1750,20 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
 	Describe("with a GPU-enabled agent pool", func() {
 		It("should be able to run a nvidia-gpu job", func() {
 			if eng.ExpandedDefinition.Properties.HasNSeriesSKU() {
+				if hasAddon, _ := eng.HasAddon("nvidia-device-plugin"); !hasAddon {
+					By("Installing nvidia gpu-operator helm chart")
+					commandArgsSlice := []string{"upgrade", "--install", "--wait", "gpu-operator", "--repo", "https://nvidia.github.io/gpu-operator", "gpu-operator"}
+					if eng.ExpandedDefinition.Properties.OrchestratorProfile.KubernetesConfig.NeedsContainerd() {
+						commandArgsSlice = append(commandArgsSlice, []string{"--set", "operator.defaultRuntime=containerd"}...)
+					}
+					ctx, cancel := context.WithTimeout(context.Background(), cfg.Timeout)
+					defer cancel()
+					cmd := exec.CommandContext(ctx, "helm", commandArgsSlice...)
+					out, err := cmd.CombinedOutput()
+					log.Printf("%s\n", out)
+					Expect(err).NotTo(HaveOccurred())
+				}
+				By("Running a CUDA vector job")
 				j, err := job.CreateJobFromFileWithRetry(filepath.Join(WorkloadDir, "cuda-vector-add.yaml"), "cuda-vector-add", "default", 3*time.Second, cfg.Timeout)
 				Expect(err).NotTo(HaveOccurred())
 				ready, err := j.WaitOnSucceeded(30*time.Second, cfg.Timeout)

diff --git a/test/e2e/test_cluster_configs/everything.json b/test/e2e/test_cluster_configs/everything.json
@@ -112,13 +112,6 @@
 						"2"
 					]
 				},
-				{
-					"name": "poolgpu",
-					"count": 1,
-					"availabilityProfile": "VirtualMachineScaleSets",
-					"vmSize": "Standard_NC6",
-					"vnetSubnetId": "/subscriptions/SUB_ID/resourceGroups/RG_NAME/providers/Microsoft.Network/virtualNetworks/VNET_NAME/subnets/SUBNET_NAME"
-				},
 				{
 					"name": "poolbigsku",
 					"count": 1,

diff --git a/test/e2e/test_cluster_configs/gpu.json b/test/e2e/test_cluster_configs/gpu.json
@@ -0,0 +1,34 @@
+{
+	"apiModel": {
+		"apiVersion": "vlabs",
+		"properties": {
+			"masterProfile": {
+				"count": 1,
+				"dnsPrefix": "",
+				"vmSize": "Standard_D2_v3"
+			},
+			"agentPoolProfiles": [
+				{
+					"name": "pool1",
+					"count": 1,
+					"vmSize": "Standard_D2_v3"
+				},
+				{
+					"name": "poolgpu",
+					"count": 1,
+					"vmSize": "Standard_NC6"
+				}
+			],
+			"linuxProfile": {
+				"adminUsername": "azureuser",
+				"ssh": {
+					"publicKeys": [
+						{
+							"keyData": ""
+						}
+					]
+				}
+			}
+		}
+	}
+}