From 2a940981f48974f33ff1e442185322694a79c0de Mon Sep 17 00:00:00 2001 From: Xu Xue <55420084+xuexu6666@users.noreply.github.com> Date: Tue, 24 Sep 2024 01:33:51 -0500 Subject: [PATCH] [AKS] `az aks nodepool delete-machines`: Add support to delete specific machines in an agent pool (#29921) --- .../azure/cli/command_modules/acs/_help.py | 12 +++ .../azure/cli/command_modules/acs/_params.py | 8 ++ .../azure/cli/command_modules/acs/commands.py | 1 + .../azure/cli/command_modules/acs/custom.py | 44 ++++++++++ .../acs/tests/latest/test_aks_commands.py | 85 +++++++++++++++++++ 5 files changed, 150 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 8b319c5f020..55428ffff8e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -1791,6 +1791,18 @@ text: az aks operation-abort -g myResourceGroup -n myAKSCluster """ +helps['aks nodepool delete-machines'] = """ + type: command + short-summary: Delete specific machines in an agentpool for a managed cluster. + parameters: + - name: --machine-names + type: string array + short-summary: Space-separated list of machine names from the agent pool to be deleted. + examples: + - name: Delete specific machines in an agent pool + text: az aks nodepool delete-machines -g myResourceGroup --nodepool-name nodepool1 --cluster-name myAKSCluster --machine-names machine1 +""" + helps['aks remove-dev-spaces'] = """ type: command short-summary: Remove Azure Dev Spaces from a managed Kubernetes cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index b29b3dcd2ca..d93a547f0f4 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -913,6 +913,14 @@ def load_arguments(self, _): c.argument('dns_zone_resource_ids', options_list=['--ids'], required=True) c.argument('attach_zones') + with self.argument_context("aks nodepool delete-machines") as c: + c.argument( + "machine_names", + nargs="+", + required=True, + help="Space-separated machine names to delete.", + ) + def _get_default_install_location(exe_name): system = platform.system() diff --git a/src/azure-cli/azure/cli/command_modules/acs/commands.py b/src/azure-cli/azure/cli/command_modules/acs/commands.py index 1e14722da9b..18634ac0d0b 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/commands.py @@ -134,6 +134,7 @@ def load_command_table(self, _): g.custom_command('start', 'aks_agentpool_start', supports_no_wait=True) g.wait_command('wait') g.custom_command('operation-abort', 'aks_agentpool_operation_abort', supports_no_wait=True) + g.custom_command('delete-machines', 'aks_agentpool_delete_machines', supports_no_wait=True) with self.command_group('aks command', managed_clusters_sdk, client_factory=cf_managed_clusters) as g: g.custom_command('invoke', 'aks_runcommand', supports_no_wait=True, diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index 7d33d2ab2cd..4934f45368f 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -90,6 +90,7 @@ ResourceNotFoundError, UnknownError, ValidationError, + RequiredArgumentMissingError, ) from azure.cli.core.commands import LongRunningOperation from azure.cli.core.commands.client_factory import get_subscription_id @@ -2648,6 +2649,49 @@ def aks_operation_abort(cmd, # pylint: disable=unused-argument return sdk_no_wait(no_wait, client.begin_abort_latest_operation, resource_group_name, name) +def aks_agentpool_delete_machines(cmd, # pylint: disable=unused-argument + client, + resource_group_name, + cluster_name, + nodepool_name, + machine_names, + no_wait=False): + agentpool_exists = False + instances = client.list(resource_group_name, cluster_name) + for agentpool_profile in instances: + if agentpool_profile.name.lower() == nodepool_name.lower(): + agentpool_exists = True + break + + if not agentpool_exists: + raise ResourceNotFoundError( + f"Node pool {nodepool_name} doesn't exist, " + "use 'az aks nodepool list' to get current node pool list" + ) + + if len(machine_names) == 0: + raise RequiredArgumentMissingError( + "--machine-names doesn't provide, " + "use 'az aks machine list' to get current machine list" + ) + + AgentPoolDeleteMachinesParameter = cmd.get_models( + "AgentPoolDeleteMachinesParameter", + resource_type=ResourceType.MGMT_CONTAINERSERVICE, + operation_group="agent_pools", + ) + + machines = AgentPoolDeleteMachinesParameter(machine_names=machine_names) + return sdk_no_wait( + no_wait, + client.begin_delete_machines, + resource_group_name, + cluster_name, + nodepool_name, + machines, + ) + + def aks_agentpool_show(cmd, client, resource_group_name, cluster_name, nodepool_name): instance = client.get(resource_group_name, cluster_name, nodepool_name) return instance diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 45f45a72d29..186fe8b8bcc 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -7923,6 +7923,91 @@ def test_aks_nodepool_snapshot(self, resource_group, resource_group_location): self.is_empty() ]) + # live only, otherwise the current recording mechanism will also record the binary files of + # kubectl and kubelogin resulting in the cassette file + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix="clitest", location="westus2") + def test_aks_nodepool_delete_machines(self, resource_group, resource_group_location): + aks_name = self.create_random_name("cliakstest", 16) + nodepool_name = self.create_random_name("c", 6) + self.kwargs.update( + { + "resource_group": resource_group, + "location": resource_group_location, + "name": aks_name, + "nodepool_name": nodepool_name, + "ssh_key_value": self.generate_ssh_keys(), + } + ) + + # create aks cluster + create_cmd = "aks create --resource-group={resource_group} --name={name} --ssh-key-value={ssh_key_value}" + self.cmd( + create_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + ], + ) + # add nodepool + self.cmd( + "aks nodepool add --resource-group={resource_group} --cluster-name={name} --name={nodepool_name} --node-count=4", + checks=[self.check("provisioningState", "Succeeded")], + ) + + # install kubectl + try: + subprocess.call(["az", "aks", "install-cli"]) + except subprocess.CalledProcessError as err: + raise CLIInternalError("Failed to install kubectl with error: '{}'!".format(err)) + + try: + # get credential + fd, browse_path = tempfile.mkstemp() + self.kwargs.update( + { + "browse_path": browse_path, + } + ) + try: + get_credential_cmd = "aks get-credentials -n {name} -g {resource_group} -f {browse_path}" + self.cmd(get_credential_cmd) + finally: + os.close(fd) + + # get machine name + label = "kubernetes.azure.com/agentpool=" + nodepool_name + k_get_node_cmd = ["kubectl", "get", "node", "-l", label, "-o", "name", "--kubeconfig", browse_path] + k_get_node_output = subprocess.check_output( + k_get_node_cmd, + universal_newlines=True, + stderr=subprocess.STDOUT, + ) + machine_names = k_get_node_output.split("\n") + + machine_name = machine_names[0].strip().strip("node/").strip() + print(f"machine_name: {machine_name}") + self.kwargs.update( + { + "machine_name": machine_name, + } + ) + + # delete machines + self.cmd( + "aks nodepool delete-machines --resource-group={resource_group} --cluster-name={name} --nodepool-name={nodepool_name} --machine-names={machine_name}" + ) + + # check count + self.cmd('aks show -g {resource_group} -n {name}', checks=[ + self.check('agentPoolProfiles[1].count', 3) + ]) + + finally: + # delete cluster + self.cmd( + 'aks delete -g {resource_group} -n {name} --yes --no-wait', checks=[self.is_empty()]) + @AllowLargeResponse() @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='centraluseuap') def test_aks_create_with_windows_gmsa(self, resource_group, resource_group_location):