diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24c0039d7e..9e5cbd835d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
+## \[6.3.1\]
+
+- Add reserved property for nodeset_tpu
+- update lustre repository url
+
## \[6.3.0\]
- Upgrade installed Slurm to 23.02.7
diff --git a/ansible/roles/lustre/vars/redhat-8.yml b/ansible/roles/lustre/vars/redhat-8.yml
index 5f9518283b..1d81d51616 100644
--- a/ansible/roles/lustre/vars/redhat-8.yml
+++ b/ansible/roles/lustre/vars/redhat-8.yml
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-lustre_repo_url: https://downloads.whamcloud.com/public/lustre/latest-release/el8.8/client
+lustre_repo_url: https://downloads.whamcloud.com/public/lustre/latest-release/el8.9/client
lustre_packages:
- lustre-client
diff --git a/scripts/util.py b/scripts/util.py
index fe8fb7428b..0fe9fef6be 100755
--- a/scripts/util.py
+++ b/scripts/util.py
@@ -1148,6 +1148,10 @@ def enable_public_ip(self):
def preemptible(self):
return self._nodeset.preemptible
+ @property
+ def reserved(self):
+ return self._nodeset.reserved
+
@property
def service_account(self):
return self._nodeset.service_account
@@ -1277,6 +1281,7 @@ def create_node(self, nodename):
node.service_account.email = self.nodeset.service_account.email
node.service_account.scope = self.nodeset.service_account.scopes
node.scheduling_config.preemptible = self.preemptible
+ node.scheduling_config.reserved = self.reserved
if self.nodeset.network:
node.network_config.network = self.nodeset.network
if self.nodeset.subnetwork:
diff --git a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/README_TF.md b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/README_TF.md
index 1d9157573f..bad238161a 100644
--- a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/README_TF.md
+++ b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/README_TF.md
@@ -59,6 +59,7 @@ No modules.
| [preemptible](#input\_preemptible) | Specify whether TPU-vms in this nodeset are preemtible, see https://cloud.google.com/tpu/docs/preemptible for details. | `bool` | `false` | no |
| [preserve\_tpu](#input\_preserve\_tpu) | Specify whether TPU-vms will get preserve on suspend, if set to true, on suspend vm is stopped, on false it gets deleted | `bool` | `true` | no |
| [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes |
+| [reserved](#input\_reserved) | Specify whether TPU-vms in this nodeset are created under a reservation. | `bool` | `false` | no |
| [service\_account](#input\_service\_account) | Service account to attach to the TPU-vm.
If none is given, the default service account and scopes will be used. |
object({| `null` | no | | [subnetwork](#input\_subnetwork) | The name of the subnetwork to attach the TPU-vm of this nodeset to. | `string` | `null` | no | | [tf\_version](#input\_tf\_version) | Nodeset Tensorflow version, see https://cloud.google.com/tpu/docs/supported-tpu-configurations#tpu_vm for details. | `string` | n/a | yes | diff --git a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/main.tf b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/main.tf index fbb8988577..8ac7344536 100644 --- a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/main.tf +++ b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/main.tf @@ -111,6 +111,10 @@ resource "null_resource" "nodeset_tpu" { condition = sum([var.node_count_dynamic_max, var.node_count_static]) > 0 error_message = "Sum of node_count_dynamic_max and node_count_static must be > 0." } + precondition { + condition = !(var.preemptible && var.reserved) + error_message = "Nodeset cannot be preemptible and reserved at the same time." + } precondition { condition = !(var.subnetwork == null && !var.enable_public_ip) error_message = "Using the default subnetwork for the TPU nodeset requires enable_public_ip set to true." diff --git a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/variables.tf b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/variables.tf index 1b6c270cd8..de51f7a970 100644 --- a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/variables.tf +++ b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/variables.tf @@ -77,6 +77,12 @@ variable "preemptible" { default = false } +variable "reserved" { + description = "Specify whether TPU-vms in this nodeset are created under a reservation." + type = bool + default = false +} + variable "preserve_tpu" { description = "Specify whether TPU-vms will get preserve on suspend, if set to true, on suspend vm is stopped, on false it gets deleted" type = bool diff --git a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/versions.tf b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/versions.tf index a4f0cbde02..cc7222cae2 100644 --- a/terraform/slurm_cluster/modules/slurm_nodeset_tpu/versions.tf +++ b/terraform/slurm_cluster/modules/slurm_nodeset_tpu/versions.tf @@ -18,7 +18,13 @@ terraform { required_version = "~> 1.2" required_providers { - google = ">= 3.53, < 5.0" - null = "~> 3.0" + google = { + source = "hashicorp/google" + version = ">= 3.53, < 5.0" + } + null = { + source = "hashicorp/null" + version = "~> 3.0" + } } }
email = string
scopes = set(string)
})