diff --git a/pkg/api/addons.go b/pkg/api/addons.go index e058fdf2f1..d3444de3bd 100644 --- a/pkg/api/addons.go +++ b/pkg/api/addons.go @@ -209,7 +209,7 @@ func (cs *ContainerService) setAddonsConfig(isUpgrade bool) { defaultNVIDIADevicePluginAddonsConfig := KubernetesAddon{ Name: common.NVIDIADevicePluginAddonName, - Enabled: to.BoolPtr(cs.Properties.IsNvidiaDevicePluginCapable() && !cs.Properties.IsAzureStackCloud()), + Enabled: to.BoolPtr(cs.Properties.IsNvidiaDevicePluginCapable() && !cs.Properties.IsAzureStackCloud() && !cs.Properties.OrchestratorProfile.KubernetesConfig.NeedsContainerd()), Containers: []KubernetesContainerSpec{ { Name: common.NVIDIADevicePluginAddonName, diff --git a/pkg/api/addons_test.go b/pkg/api/addons_test.go index d5ee49f634..07109f8eaa 100644 --- a/pkg/api/addons_test.go +++ b/pkg/api/addons_test.go @@ -1439,6 +1439,34 @@ func TestSetAddonsConfig(t *testing.T) { }, }, "1.15.4"), }, + { + name: "containerd w/ N series SKU", + cs: &ContainerService{ + Properties: &Properties{ + OrchestratorProfile: &OrchestratorProfile{ + OrchestratorVersion: "1.20.5", + KubernetesConfig: &KubernetesConfig{ + KubernetesImageBaseType: common.KubernetesImageBaseTypeMCR, + DNSServiceIP: DefaultKubernetesDNSServiceIP, + KubeletConfig: map[string]string{ + "--cluster-domain": "cluster.local", + }, + ClusterSubnet: DefaultKubernetesSubnet, + ProxyMode: KubeProxyModeIPTables, + NetworkPlugin: NetworkPluginAzure, + ContainerRuntime: Containerd, + }, + }, + AgentPoolProfiles: []*AgentPoolProfile{ + { + VMSize: "Standard_NC6", + }, + }, + }, + }, + isUpgrade: false, + expectedAddons: getDefaultAddons("1.20.5", "", common.KubernetesImageBaseTypeMCR), + }, { name: "container-monitoring addon enabled", cs: &ContainerService{ diff --git a/test/e2e/kubernetes/kubernetes_test.go b/test/e2e/kubernetes/kubernetes_test.go index 1b90ce87ee..005f802d63 100644 --- a/test/e2e/kubernetes/kubernetes_test.go +++ b/test/e2e/kubernetes/kubernetes_test.go @@ -1750,6 +1750,18 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu Describe("with a GPU-enabled agent pool", func() { It("should be able to run a nvidia-gpu job", func() { if eng.ExpandedDefinition.Properties.HasNSeriesSKU() { + if hasAddon, _ := eng.HasAddon("nvidia-device-plugin"); !hasAddon { + By("Installing nvidia gpu-operator helm chart") + commandArgsSlice := []string{"upgrade", "--install", "--wait", "gpu-operator", "--repo", "https://nvidia.github.io/gpu-operator", "gpu-operator"} + if eng.ExpandedDefinition.Properties.OrchestratorProfile.KubernetesConfig.NeedsContainerd() { + commandArgsSlice = append(commandArgsSlice, []string{"--set", "operator.defaultRuntime=containerd"}...) + } + cmd := exec.Command("helm", commandArgsSlice...) + out, err := cmd.CombinedOutput() + log.Printf("%s\n", out) + Expect(err).NotTo(HaveOccurred()) + } + By("Running a CUDA vector job") j, err := job.CreateJobFromFileWithRetry(filepath.Join(WorkloadDir, "cuda-vector-add.yaml"), "cuda-vector-add", "default", 3*time.Second, cfg.Timeout) Expect(err).NotTo(HaveOccurred()) ready, err := j.WaitOnSucceeded(30*time.Second, cfg.Timeout)