From ba5d7649017298019fdd3d404a69fb850fa9bfd1 Mon Sep 17 00:00:00 2001 From: cblmemo Date: Wed, 31 Jan 2024 17:57:56 -0800 Subject: [PATCH 1/7] fix --- sky/clouds/azure.py | 67 +++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index 3cabdf65a40..44ce7caee13 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -59,7 +59,7 @@ class Azure(clouds.Cloud): # Reference: https://azure.github.io/PSRule.Rules.Azure/en/rules/Azure.ResourceGroup.Name/ # pylint: disable=line-too-long _MAX_CLUSTER_NAME_LEN_LIMIT = 42 _BEST_DISK_TIER = resources_utils.DiskTier.MEDIUM - _DEFAULT_DISK_TIER = resources_utils.DiskTier.BEST + _DEFAULT_DISK_TIER = resources_utils.DiskTier.MEDIUM # Azure does not support high disk tier. _SUPPORTED_DISK_TIERS = (set(resources_utils.DiskTier) - {resources_utils.DiskTier.HIGH}) @@ -295,6 +295,20 @@ def make_deploy_resources_variables( content: | APT::Periodic::Enable "0"; """).encode('utf-8')).decode('utf-8') + + def _failover_disk_tier() -> Optional[resources_utils.DiskTier]: + # Failover disk tier. + all_tiers = list(reversed(resources_utils.DiskTier)) + start_index = all_tiers.index( + Azure._translate_disk_tier(r.disk_tier)) + while start_index < len(all_tiers): + disk_tier = all_tiers[start_index] + ok, _ = Azure.check_disk_tier(r.instance_type, disk_tier) + if ok: + return disk_tier + start_index += 1 + return None + return { 'instance_type': r.instance_type, 'custom_resources': custom_resources, @@ -303,75 +317,38 @@ def make_deploy_resources_variables( # Azure does not support specific zones. 'zones': None, **image_config, - 'disk_tier': Azure._get_disk_type(r.disk_tier), + 'disk_tier': Azure._get_disk_type(_failover_disk_tier()), 'cloud_init_setup_commands': cloud_init_setup_commands } def _get_feasible_launchable_resources( self, resources: 'resources.Resources' ) -> Tuple[List['resources.Resources'], List[str]]: - - def failover_disk_tier( - instance_type: str, disk_tier: Optional[resources_utils.DiskTier] - ) -> Tuple[bool, Optional[resources_utils.DiskTier]]: - """Figure out the actual disk tier to be used. - - Check the disk_tier specified by the user with the instance type to - be used. If not valid, return False. - - When the disk_tier is not specified, failover through the possible - disk tiers. - - Returns: - A tuple of a boolean value and an optional string to represent - the instance_type to use. If the boolean value is False, the - specified configuration is not a valid combination, and should - not be used for launching a VM. - """ - if disk_tier is None: - # Translate first since azure's default disk tier is BEST. - disk_tier = Azure._translate_disk_tier(disk_tier) - if disk_tier != resources_utils.DiskTier.BEST: - ok, _ = Azure.check_disk_tier(instance_type, disk_tier) - return (True, disk_tier) if ok else (False, None) - # All tiers in reversed order, i.e. from best to worst. - # We will failover along this order. - all_tiers = list(reversed(resources_utils.DiskTier)) - # Translate to real disk tier. - start_index = all_tiers.index(Azure._translate_disk_tier(disk_tier)) - while start_index < len(all_tiers): - disk_tier = all_tiers[start_index] - ok, _ = Azure.check_disk_tier(instance_type, disk_tier) - if ok: - return (True, disk_tier) - start_index += 1 - return False, None - if resources.instance_type is not None: assert resources.is_launchable(), resources - ok, disk_tier = failover_disk_tier(resources.instance_type, - resources.disk_tier) + ok, _ = Azure.check_disk_tier(resources.instance_type, + resources.disk_tier) if not ok: return ([], []) # Treat Resources(Azure, Standard_NC4as_T4_v3, T4) as # Resources(Azure, Standard_NC4as_T4_v3). resources = resources.copy( accelerators=None, - disk_tier=disk_tier, + disk_tier=resources.disk_tier, ) return ([resources], []) def _make(instance_list): resource_list = [] for instance_type in instance_list: - ok, disk_tier = failover_disk_tier(instance_type, - resources.disk_tier) + ok, _ = Azure.check_disk_tier(instance_type, + resources.disk_tier) if not ok: continue r = resources.copy( cloud=Azure(), instance_type=instance_type, - disk_tier=disk_tier, + disk_tier=resources.disk_tier, # Setting this to None as Azure doesn't separately bill / # attach the accelerators. Billed as part of the VM type. accelerators=None, From ffaa8bfcdf15fef302534df74b79df284cacb55d Mon Sep 17 00:00:00 2001 From: cblmemo Date: Thu, 1 Feb 2024 03:07:08 -0800 Subject: [PATCH 2/7] only failover for none and best --- sky/clouds/azure.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index 44ce7caee13..7d3ac772908 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -297,6 +297,9 @@ def make_deploy_resources_variables( """).encode('utf-8')).decode('utf-8') def _failover_disk_tier() -> Optional[resources_utils.DiskTier]: + if (r.disk_tier is not None and + r.disk_tier != resources_utils.DiskTier.BEST): + return r.disk_tier # Failover disk tier. all_tiers = list(reversed(resources_utils.DiskTier)) start_index = all_tiers.index( From 4bcc0c96fa7ed1ebbf83a4e0f4ab8d8fd78a791d Mon Sep 17 00:00:00 2001 From: Tian Xia Date: Fri, 2 Feb 2024 16:25:18 +0800 Subject: [PATCH 3/7] Update sky/clouds/azure.py Co-authored-by: Zhanghao Wu --- sky/clouds/azure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index 7d3ac772908..db1a75efef6 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -337,7 +337,6 @@ def _get_feasible_launchable_resources( # Resources(Azure, Standard_NC4as_T4_v3). resources = resources.copy( accelerators=None, - disk_tier=resources.disk_tier, ) return ([resources], []) From 493f76cb5306592520eb72085759450feb1aaaaf Mon Sep 17 00:00:00 2001 From: Tian Xia Date: Fri, 2 Feb 2024 16:25:24 +0800 Subject: [PATCH 4/7] Update sky/clouds/azure.py Co-authored-by: Zhanghao Wu --- sky/clouds/azure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index db1a75efef6..88f5d933461 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -350,7 +350,6 @@ def _make(instance_list): r = resources.copy( cloud=Azure(), instance_type=instance_type, - disk_tier=resources.disk_tier, # Setting this to None as Azure doesn't separately bill / # attach the accelerators. Billed as part of the VM type. accelerators=None, From 03088af2cba66ee73d7d21b934adf72bc06ce1bb Mon Sep 17 00:00:00 2001 From: cblmemo Date: Fri, 2 Feb 2024 00:29:11 -0800 Subject: [PATCH 5/7] assert --- sky/clouds/azure.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index 88f5d933461..30a0359125d 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -310,7 +310,7 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]: if ok: return disk_tier start_index += 1 - return None + assert False, 'Low disk tier should always be supported on Azure.' return { 'instance_type': r.instance_type, @@ -335,9 +335,7 @@ def _get_feasible_launchable_resources( return ([], []) # Treat Resources(Azure, Standard_NC4as_T4_v3, T4) as # Resources(Azure, Standard_NC4as_T4_v3). - resources = resources.copy( - accelerators=None, - ) + resources = resources.copy(accelerators=None,) return ([resources], []) def _make(instance_list): From 8af4852e58e851f810b05fae1a3137d53ab328f0 Mon Sep 17 00:00:00 2001 From: cblmemo Date: Fri, 2 Feb 2024 00:30:28 -0800 Subject: [PATCH 6/7] nit --- sky/clouds/azure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index 30a0359125d..bd2e91a45cc 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -335,7 +335,7 @@ def _get_feasible_launchable_resources( return ([], []) # Treat Resources(Azure, Standard_NC4as_T4_v3, T4) as # Resources(Azure, Standard_NC4as_T4_v3). - resources = resources.copy(accelerators=None,) + resources = resources.copy(accelerators=None) return ([resources], []) def _make(instance_list): From c6e06a483ce357462377a71a1107d919f2b729c6 Mon Sep 17 00:00:00 2001 From: cblmemo Date: Fri, 2 Feb 2024 00:38:38 -0800 Subject: [PATCH 7/7] comments --- sky/clouds/azure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sky/clouds/azure.py b/sky/clouds/azure.py index bd2e91a45cc..3389c648cd5 100644 --- a/sky/clouds/azure.py +++ b/sky/clouds/azure.py @@ -300,7 +300,9 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]: if (r.disk_tier is not None and r.disk_tier != resources_utils.DiskTier.BEST): return r.disk_tier - # Failover disk tier. + # Failover disk tier from high to low. Default disk tier + # (Premium_LRS, medium) only support s-series instance types, + # so we failover to lower tiers for non-s-series. all_tiers = list(reversed(resources_utils.DiskTier)) start_index = all_tiers.index( Azure._translate_disk_tier(r.disk_tier))