Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AKS] az aks nodepool delete-machines: Add support to delete specific machines in an agent pool #29921

Merged
merged 7 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,28 @@ def _aks_table_format(result):
# use ordered dicts so headers are predictable
return parsed.search(result, Options(dict_cls=OrderedDict))

def aks_machine_list_table_format(results):
return [aks_machine_show_table_format(r) for r in results]

def aks_machine_show_table_format(result):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some unit tests for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I finally decide to use kubectl to get machine name instead of use machine api. Machine api is not GA yet.

def parser(entry):
ipv4_addresses = ""
ipv6_addresses = ""
for k in entry["properties"]["network"]["ipAddresses"]:
if k["family"].lower() == "ipv4":
ipv4_addresses += k["ip"] + ";"
elif k["family"].lower() == "ipv6":
ipv6_addresses += k["ip"] + ";"
entry["ipv4"] = ipv4_addresses
entry["ipv6"] = ipv6_addresses
parsed = compile_jmes("""{
name: name,
ipv4: ipv4,
ipv6: ipv6
}""")
return parsed.search(entry, Options(dict_cls=OrderedDict))
return parser(result)


def aks_upgrades_table_format(result):
"""Format get-upgrades results as a summary for display with "-o table"."""
Expand Down
12 changes: 12 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -1791,6 +1791,18 @@
text: az aks operation-abort -g myResourceGroup -n myAKSCluster
"""

helps['aks nodepool delete-machines'] = """
type: command
short-summary: Delete specific machines in an agentpool for a managed cluster.
parameters:
- name: --machine-names
type: string array
short-summary: Space-separated list of machine names from the agent pool to be deleted.
examples:
- name: Delete specific machines in an agent pool
text: az aks nodepool delete-machines -g myResourceGroup --nodepool-name nodepool1 --cluster-name myAKSCluster --machine-names machine1
"""

helps['aks remove-dev-spaces'] = """
type: command
short-summary: Remove Azure Dev Spaces from a managed Kubernetes cluster.
Expand Down
7 changes: 7 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,13 @@ def load_arguments(self, _):
c.argument('dns_zone_resource_ids', options_list=['--ids'], required=True)
c.argument('attach_zones')

with self.argument_context("aks nodepool delete-machines") as c:
c.argument(
"machine_names",
nargs="+",
required=True,
help="Space-separated machine names to delete.",
)

def _get_default_install_location(exe_name):
system = platform.system()
Expand Down
1 change: 1 addition & 0 deletions src/azure-cli/azure/cli/command_modules/acs/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def load_command_table(self, _):
g.custom_command('start', 'aks_agentpool_start', supports_no_wait=True)
g.wait_command('wait')
g.custom_command('operation-abort', 'aks_agentpool_operation_abort', supports_no_wait=True)
g.custom_command("delete-machines", "aks_agentpool_delete_machines", supports_no_wait=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • FAIL - HIGH severity: missing_command_test_coverage
    Repo: ./, Src Branch: HEAD, Target Branch: origin/dev - Failed.
    Missing command test coverage: aks nodepool "delete-machines"
    Please add some scenario tests for the new command
    Or add the command with missing_command_test_coverage rule in linter_exclusions.yml

Rules in CI might be a bit rigid, I guess changing "delete-machines" to 'delete-machines' would fix the issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


with self.command_group('aks command', managed_clusters_sdk, client_factory=cf_managed_clusters) as g:
g.custom_command('invoke', 'aks_runcommand', supports_no_wait=True,
Expand Down
43 changes: 43 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
ResourceNotFoundError,
UnknownError,
ValidationError,
RequiredArgumentMissingError,
)
from azure.cli.core.commands import LongRunningOperation
from azure.cli.core.commands.client_factory import get_subscription_id
Expand Down Expand Up @@ -2647,6 +2648,48 @@ def aks_operation_abort(cmd, # pylint: disable=unused-argument
instance.power_state = power_state
return sdk_no_wait(no_wait, client.begin_abort_latest_operation, resource_group_name, name)

def aks_agentpool_delete_machines(cmd, # pylint: disable=unused-argument
client,
resource_group_name,
cluster_name,
nodepool_name,
machine_names,
no_wait=False):
agentpool_exists = False
instances = client.list(resource_group_name, cluster_name)
for agentpool_profile in instances:
if agentpool_profile.name.lower() == nodepool_name.lower():
agentpool_exists = True
break

if not agentpool_exists:
raise ResourceNotFoundError(
f"Node pool {nodepool_name} doesn't exist, "
"use 'az aks nodepool list' to get current node pool list"
)

if len(machine_names) == 0:
raise RequiredArgumentMissingError(
"--machine-names doesn't provide, "
"use 'az aks machine list' to get current machine list"
)

AgentPoolDeleteMachinesParameter = cmd.get_models(
"AgentPoolDeleteMachinesParameter",
resource_type=ResourceType.MGMT_CONTAINERSERVICE,
operation_group="agent_pools",
)

machines = AgentPoolDeleteMachinesParameter(machine_names=machine_names)
return sdk_no_wait(
no_wait,
client.begin_delete_machines,
resource_group_name,
cluster_name,
nodepool_name,
machines,
)


def aks_agentpool_show(cmd, client, resource_group_name, cluster_name, nodepool_name):
instance = client.get(resource_group_name, cluster_name, nodepool_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import unittest

from azure.cli.command_modules.acs._format import version_to_tuple
from azure.cli.command_modules.acs._format import aks_machine_list_table_format
from azure.cli.command_modules.acs._helpers import (
_get_test_sp_object_id,
get_shared_control_plane_identity,
Expand Down Expand Up @@ -7923,6 +7924,69 @@ def test_aks_nodepool_snapshot(self, resource_group, resource_group_location):
self.is_empty()
])

@AllowLargeResponse()
@AKSCustomResourceGroupPreparer(
random_name_length=17, name_prefix="clitest", location="westus2"
)
def test_aks_nodepool_delete_machines(self, resource_group, resource_group_location):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Queued live test for this, you'll need to commit the recording file (would be generated by running the test case in live mode, find it from pipeline artifact) to pass CI.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://dev.azure.com/msazure/CloudNativeCompute/_build/results?buildId=103633419&view=results

This is the passed test I ran. Wondering which recording file do I need? btw, I uploaded the test_aks_nodepool_delete_machines.yaml already.

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Take another look, it seems that you performed some data plane operations in the test case, which cannot be properly replayed. Please mark the case as @live_only() (and left a comment, remove the corresponding recording file) to bypass the check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

aks_name = self.create_random_name("cliakstest", 16)
nodepool_name = self.create_random_name("c", 6)
self.kwargs.update(
{
"resource_group": resource_group,
"location": resource_group_location,
"name": aks_name,
"nodepool_name": nodepool_name,
"ssh_key_value": self.generate_ssh_keys(),
}
)

# create aks cluster
create_cmd = "aks create --resource-group={resource_group} --name={name} --ssh-key-value={ssh_key_value}"
self.cmd(
create_cmd,
checks=[
self.check("provisioningState", "Succeeded"),
],
)
# add nodepool
self.cmd(
"aks nodepool add --resource-group={resource_group} --cluster-name={name} --name={nodepool_name} --node-count=4",
checks=[self.check("provisioningState", "Succeeded")],
)
# list machines
list_cmd = 'aks machine list ' \
' --resource-group={resource_group} ' \
' --cluster-name={name} --nodepool-name={nodepool_name} -o json'
machine_list = self.cmd(list_cmd).get_output_in_json()
assert len(machine_list) == 4
aks_machine_list_table_format(machine_list)
# delete machines
machine_name1 = machine_list[0]["name"]
machine_name2 = machine_list[2]["name"]
self.kwargs.update(
{
"resource_group": resource_group,
"location": resource_group_location,
"name": aks_name,
"nodepool_name": nodepool_name,
"ssh_key_value": self.generate_ssh_keys(),
"machine_name1": machine_name1,
"machine_name2": machine_name2,
}
)
self.cmd(
"aks nodepool delete-machines --resource-group={resource_group} --cluster-name={name} --nodepool-name={nodepool_name} --machine-names {machine_name1} {machine_name2}"
)
# list machines after deletion
machine_list_after = self.cmd(list_cmd).get_output_in_json()
assert len(machine_list_after) == 2
# delete AKS cluster
self.cmd(
"aks delete -g {resource_group} -n {name} --yes --no-wait",
checks=[self.is_empty()],
)

@AllowLargeResponse()
@AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='centraluseuap')
def test_aks_create_with_windows_gmsa(self, resource_group, resource_group_location):
Expand Down
Loading