AI models support (#1750)

* nodepool config * added gpu driver configuration * documentation update * regenerated docs * review comments * review comments * blocks structure * documentation update * test fix and doc update * review comments * doc * Extend inventory * Update README --------- Co-authored-by: Ludovico Magnocavallo <ludomagno@google.com> Co-authored-by: Julio Castillo <jccb@google.com>
GoogleCloudPlatform · Oct 25, 2023 · 33ce0e1 · 33ce0e1
1 parent e28f7c3
commit 33ce0e1
Show file tree

Hide file tree

Showing 7 changed files with 152 additions and 36 deletions.
diff --git a/modules/gke-cluster-standard/README.md b/modules/gke-cluster-standard/README.md
diff --git a/modules/gke-cluster-standard/main.tf b/modules/gke-cluster-standard/main.tf
@@ -83,6 +83,9 @@ resource "google_container_cluster" "cluster" {
     gcp_filestore_csi_driver_config {
       enabled = var.enable_addons.gcp_filestore_csi_driver
     }
+    gcs_fuse_csi_driver_config {
+      enabled = var.enable_addons.gcs_fuse_csi_driver
+    }
     kalm_config {
       enabled = var.enable_addons.kalm
     }

diff --git a/modules/gke-cluster-standard/variables.tf b/modules/gke-cluster-standard/variables.tf
@@ -77,6 +77,7 @@ variable "enable_addons" {
     dns_cache                      = optional(bool, false)
     gce_persistent_disk_csi_driver = optional(bool, false)
     gcp_filestore_csi_driver       = optional(bool, false)
+    gcs_fuse_csi_driver            = optional(bool, false)
     horizontal_pod_autoscaling     = optional(bool, false)
     http_load_balancing            = optional(bool, false)
     istio = optional(object({

diff --git a/modules/gke-nodepool/README.md b/modules/gke-nodepool/README.md
@@ -103,29 +103,62 @@ module "cluster-1-nodepool-1" {
 }
 # tftest modules=1 resources=2 inventory=config.yaml
 ```
+### GPU Node & node pool configuration
+
+```hcl
+module "cluster-1-nodepool-gpu-1" {
+  source       = "./fabric/modules/gke-nodepool"
+  project_id   = "myproject"
+  cluster_name = "cluster-1"
+  location     = "europe-west4-a"
+  name         = "nodepool-gpu-1"
+  labels       = { environment = "dev" }
+  service_account = {
+    create       = true
+    email        = "nodepool-gpu-1" # optional
+    oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+  }
+  node_config = {
+    machine_type        = "a2-highgpu-1g"
+    disk_size_gb        = 50
+    disk_type           = "pd-ssd"
+    ephemeral_ssd_count = 1
+    gvnic               = true
+    spot                = true
+    guest_accelerator = {
+      type  = "nvidia-tesla-a100"
+      count = 1
+      gpu_driver = {
+        version = "LATEST"
+      }
+    }
+  }
+}
+# tftest modules=1 resources=2 inventory=guest-accelerator.yaml
+```
 <!-- BEGIN TFDOC -->
 ## Variables
 
 | name | description | type | required | default |
 |---|---|:---:|:---:|:---:|
 | [cluster_name](variables.tf#L23) | Cluster name. | <code>string</code> | ✓ |  |
 | [location](variables.tf#L41) | Cluster location. | <code>string</code> | ✓ |  |
-| [project_id](variables.tf#L150) | Cluster project id. | <code>string</code> | ✓ |  |
+| [project_id](variables.tf#L166) | Cluster project id. | <code>string</code> | ✓ |  |
 | [cluster_id](variables.tf#L17) | Cluster id. Optional, but providing cluster_id is recommended to prevent cluster misconfiguration in some of the edge cases. | <code>string</code> |  | <code>null</code> |
 | [gke_version](variables.tf#L28) | Kubernetes nodes version. Ignored if auto_upgrade is set in management_config. | <code>string</code> |  | <code>null</code> |
 | [labels](variables.tf#L34) | Kubernetes labels applied to each node. | <code>map&#40;string&#41;</code> |  | <code>&#123;&#125;</code> |
 | [max_pods_per_node](variables.tf#L46) | Maximum number of pods per node. | <code>number</code> |  | <code>null</code> |
 | [name](variables.tf#L52) | Optional nodepool name. | <code>string</code> |  | <code>null</code> |
-| [node_config](variables.tf#L58) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key   &#61; optional&#40;string&#41;&#10;  disk_size_gb        &#61; optional&#40;number&#41;&#10;  disk_type           &#61; optional&#40;string&#41;&#10;  ephemeral_ssd_count &#61; optional&#40;number&#41;&#10;  gcfs                &#61; optional&#40;bool, false&#41;&#10;  guest_accelerator &#61; optional&#40;object&#40;&#123;&#10;    count              &#61; number&#10;    type               &#61; string&#10;    gpu_partition_size &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  gvnic      &#61; optional&#40;bool, false&#41;&#10;  image_type &#61; optional&#40;string&#41;&#10;  kubelet_config &#61; optional&#40;object&#40;&#123;&#10;    cpu_manager_policy   &#61; string&#10;    cpu_cfs_quota        &#61; optional&#40;bool&#41;&#10;    cpu_cfs_quota_period &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  linux_node_config_sysctls &#61; optional&#40;map&#40;string&#41;&#41;&#10;  local_ssd_count           &#61; optional&#40;number&#41;&#10;  machine_type              &#61; optional&#40;string&#41;&#10;  metadata                  &#61; optional&#40;map&#40;string&#41;&#41;&#10;  min_cpu_platform          &#61; optional&#40;string&#41;&#10;  preemptible               &#61; optional&#40;bool&#41;&#10;  sandbox_config_gvisor     &#61; optional&#40;bool&#41;&#10;  shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;    enable_integrity_monitoring &#61; optional&#40;bool&#41;&#10;    enable_secure_boot          &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  spot                          &#61; optional&#40;bool&#41;&#10;  workload_metadata_config_mode &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  disk_type &#61; &#34;pd-balanced&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [node_count](variables.tf#L97) | Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used. | <code title="object&#40;&#123;&#10;  current &#61; optional&#40;number&#41;&#10;  initial &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  initial &#61; 1&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [node_locations](variables.tf#L109) | Node locations. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
-| [nodepool_config](variables.tf#L115) | Nodepool-level configuration. | <code title="object&#40;&#123;&#10;  autoscaling &#61; optional&#40;object&#40;&#123;&#10;    location_policy &#61; optional&#40;string&#41;&#10;    max_node_count  &#61; optional&#40;number&#41;&#10;    min_node_count  &#61; optional&#40;number&#41;&#10;    use_total_nodes &#61; optional&#40;bool, false&#41;&#10;  &#125;&#41;&#41;&#10;  management &#61; optional&#40;object&#40;&#123;&#10;    auto_repair  &#61; optional&#40;bool&#41;&#10;    auto_upgrade &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;    max_surge       &#61; number&#10;    max_unavailable &#61; number&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [pod_range](variables.tf#L137) | Pod secondary range configuration. | <code title="object&#40;&#123;&#10;  secondary_pod_range &#61; object&#40;&#123;&#10;    name                 &#61; string&#10;    cidr                 &#61; optional&#40;string&#41;&#10;    create               &#61; optional&#40;bool&#41;&#10;    enable_private_nodes &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [reservation_affinity](variables.tf#L155) | Configuration of the desired reservation which instances could take capacity from. | <code title="object&#40;&#123;&#10;  consume_reservation_type &#61; string&#10;  key                      &#61; optional&#40;string&#41;&#10;  values                   &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [service_account](variables.tf#L165) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object&#40;&#123;&#10;  create       &#61; optional&#40;bool, false&#41;&#10;  email        &#61; optional&#40;string&#41;&#10;  oauth_scopes &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [sole_tenant_nodegroup](variables.tf#L176) | Sole tenant node group. | <code>string</code> |  | <code>null</code> |
-| [tags](variables.tf#L182) | Network tags applied to nodes. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
-| [taints](variables.tf#L188) | Kubernetes taints applied to all nodes. | <code title="map&#40;object&#40;&#123;&#10;  value  &#61; string&#10;  effect &#61; string&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [node_config](variables.tf#L58) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key   &#61; optional&#40;string&#41;&#10;  disk_size_gb        &#61; optional&#40;number&#41;&#10;  disk_type           &#61; optional&#40;string&#41;&#10;  ephemeral_ssd_count &#61; optional&#40;number&#41;&#10;  gcfs                &#61; optional&#40;bool, false&#41;&#10;  guest_accelerator &#61; optional&#40;object&#40;&#123;&#10;    count &#61; number&#10;    type  &#61; string&#10;    gpu_driver &#61; optional&#40;object&#40;&#123;&#10;      version                    &#61; string&#10;      partition_size             &#61; optional&#40;string&#41;&#10;      max_shared_clients_per_gpu &#61; optional&#40;number&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  local_nvme_ssd_count &#61; optional&#40;number&#41;&#10;  gvnic                &#61; optional&#40;bool, false&#41;&#10;  image_type           &#61; optional&#40;string&#41;&#10;  kubelet_config &#61; optional&#40;object&#40;&#123;&#10;    cpu_manager_policy   &#61; string&#10;    cpu_cfs_quota        &#61; optional&#40;bool&#41;&#10;    cpu_cfs_quota_period &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  linux_node_config_sysctls &#61; optional&#40;map&#40;string&#41;&#41;&#10;  local_ssd_count           &#61; optional&#40;number&#41;&#10;  machine_type              &#61; optional&#40;string&#41;&#10;  metadata                  &#61; optional&#40;map&#40;string&#41;&#41;&#10;  min_cpu_platform          &#61; optional&#40;string&#41;&#10;  preemptible               &#61; optional&#40;bool&#41;&#10;  sandbox_config_gvisor     &#61; optional&#40;bool&#41;&#10;  shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;    enable_integrity_monitoring &#61; optional&#40;bool&#41;&#10;    enable_secure_boot          &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  spot                          &#61; optional&#40;bool&#41;&#10;  workload_metadata_config_mode &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  disk_type &#61; &#34;pd-balanced&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [node_count](variables.tf#L113) | Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used. | <code title="object&#40;&#123;&#10;  current &#61; optional&#40;number&#41;&#10;  initial &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  initial &#61; 1&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [node_locations](variables.tf#L125) | Node locations. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
+| [nodepool_config](variables.tf#L131) | Nodepool-level configuration. | <code title="object&#40;&#123;&#10;  autoscaling &#61; optional&#40;object&#40;&#123;&#10;    location_policy &#61; optional&#40;string&#41;&#10;    max_node_count  &#61; optional&#40;number&#41;&#10;    min_node_count  &#61; optional&#40;number&#41;&#10;    use_total_nodes &#61; optional&#40;bool, false&#41;&#10;  &#125;&#41;&#41;&#10;  management &#61; optional&#40;object&#40;&#123;&#10;    auto_repair  &#61; optional&#40;bool&#41;&#10;    auto_upgrade &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;    max_surge       &#61; number&#10;    max_unavailable &#61; number&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [pod_range](variables.tf#L153) | Pod secondary range configuration. | <code title="object&#40;&#123;&#10;  secondary_pod_range &#61; object&#40;&#123;&#10;    name                 &#61; string&#10;    cidr                 &#61; optional&#40;string&#41;&#10;    create               &#61; optional&#40;bool&#41;&#10;    enable_private_nodes &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [reservation_affinity](variables.tf#L171) | Configuration of the desired reservation which instances could take capacity from. | <code title="object&#40;&#123;&#10;  consume_reservation_type &#61; string&#10;  key                      &#61; optional&#40;string&#41;&#10;  values                   &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [service_account](variables.tf#L181) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object&#40;&#123;&#10;  create       &#61; optional&#40;bool, false&#41;&#10;  email        &#61; optional&#40;string&#41;&#10;  oauth_scopes &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [sole_tenant_nodegroup](variables.tf#L192) | Sole tenant node group. | <code>string</code> |  | <code>null</code> |
+| [tags](variables.tf#L198) | Network tags applied to nodes. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
+| [taints](variables.tf#L204) | Kubernetes taints applied to all nodes. | <code title="map&#40;object&#40;&#123;&#10;  value  &#61; string&#10;  effect &#61; string&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> |  | <code>&#123;&#125;</code> |
 
 ## Outputs
 

diff --git a/modules/gke-nodepool/main.tf b/modules/gke-nodepool/main.tf
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022 Google LLC
+ * Copyright 2023 Google LLC
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -165,7 +165,28 @@ resource "google_container_node_pool" "nodepool" {
       content {
         count              = var.node_config.guest_accelerator.count
         type               = var.node_config.guest_accelerator.type
-        gpu_partition_size = var.node_config.guest_accelerator.gpu_partition_size
+        gpu_partition_size = var.node_config.guest_accelerator.gpu_driver == null ? null : var.node_config.guest_accelerator.gpu_driver.partition_size
+
+        dynamic "gpu_sharing_config" {
+          for_each = var.node_config.guest_accelerator.gpu_driver != null ? [""] : []
+          content {
+            gpu_sharing_strategy       = var.node_config.guest_accelerator.gpu_driver.max_shared_clients_per_gpu != null ? "TIME_SHARING" : null
+            max_shared_clients_per_gpu = var.node_config.guest_accelerator.gpu_driver.max_shared_clients_per_gpu
+          }
+        }
+
+        dynamic "gpu_driver_installation_config" {
+          for_each = var.node_config.guest_accelerator.gpu_driver != null ? [""] : []
+          content {
+            gpu_driver_version = var.node_config.guest_accelerator.gpu_driver.version
+          }
+        }
+      }
+    }
+    dynamic "local_nvme_ssd_block_config" {
+      for_each = coalesce(var.node_config.local_nvme_ssd_count, 0) > 0 ? [""] : []
+      content {
+        local_ssd_count = var.node_config.local_nvme_ssd_count
       }
     }
     dynamic "gvnic" {

diff --git a/modules/gke-nodepool/variables.tf b/modules/gke-nodepool/variables.tf
@@ -64,12 +64,17 @@ variable "node_config" {
     ephemeral_ssd_count = optional(number)
     gcfs                = optional(bool, false)
     guest_accelerator = optional(object({
-      count              = number
-      type               = string
-      gpu_partition_size = optional(string)
+      count = number
+      type  = string
+      gpu_driver = optional(object({
+        version                    = string
+        partition_size             = optional(string)
+        max_shared_clients_per_gpu = optional(number)
+      }))
     }))
-    gvnic      = optional(bool, false)
-    image_type = optional(string)
+    local_nvme_ssd_count = optional(number)
+    gvnic                = optional(bool, false)
+    image_type           = optional(string)
     kubelet_config = optional(object({
       cpu_manager_policy   = string
       cpu_cfs_quota        = optional(bool)
@@ -92,6 +97,17 @@ variable "node_config" {
   default = {
     disk_type = "pd-balanced"
   }
+  validation {
+    condition = (
+      alltrue([
+        for k, v in var.node_config.guest_accelerator[*].gpu_driver : contains([
+          "GPU_DRIVER_VERSION_UNSPECIFIED", "INSTALLATION_DISABLED",
+          "DEFAULT", "LATEST"
+        ], v.version)
+      ])
+    )
+    error_message = "Invalid GPU driver version."
+  }
 }
 
 variable "node_count" {

diff --git a/tests/modules/gke_nodepool/examples/guest-accelerator.yaml b/tests/modules/gke_nodepool/examples/guest-accelerator.yaml
@@ -0,0 +1,42 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+values:
+  module.cluster-1-nodepool-gpu-1.google_container_node_pool.nodepool:
+    cluster: cluster-1
+    location: europe-west4-a
+    name: nodepool-gpu-1
+    node_config:
+    - boot_disk_kms_key: null
+      disk_size_gb: 50
+      disk_type: pd-ssd
+      ephemeral_storage_config:
+      - local_ssd_count: 1
+      ephemeral_storage_local_ssd_config: []
+      guest_accelerator:
+      - count: 1
+        gpu_driver_installation_config:
+        - gpu_driver_version: LATEST
+        gpu_partition_size: null
+        gpu_sharing_config:
+        - gpu_sharing_strategy: null
+          max_shared_clients_per_gpu: null
+        type: nvidia-tesla-a100
+      gvnic: []
+      machine_type: a2-highgpu-1g
+      spot: true
+    project: myproject
+
+counts:
+  google_container_node_pool: 1