From 6b4249f17756f1e1a9854921be47fd0a6dd11058 Mon Sep 17 00:00:00 2001 From: bragi92 Date: Thu, 11 May 2023 19:02:26 -0700 Subject: [PATCH] [AKS] feat: Azure Monitor Metrics addon (v2) (Managed Prometheus) GA (#26201) --- .../azure/cli/command_modules/acs/_help.py | 41 +++++ .../azure/cli/command_modules/acs/_params.py | 19 +- .../cli/command_modules/acs/_validators.py | 29 +++ .../acs/azuremonitormetrics/__init__.py | 0 .../acs/azuremonitormetrics/addonput.py | 36 ++++ .../acs/azuremonitormetrics/amg/__init__.py | 0 .../acs/azuremonitormetrics/amg/link.py | 89 +++++++++ .../acs/azuremonitormetrics/amw/__init__.py | 0 .../acs/azuremonitormetrics/amw/create.py | 48 +++++ .../acs/azuremonitormetrics/amw/defaults.py | 45 +++++ .../acs/azuremonitormetrics/amw/helper.py | 36 ++++ .../azuremonitorprofile.py | 104 +++++++++++ .../acs/azuremonitormetrics/constants.py | 87 +++++++++ .../acs/azuremonitormetrics/dc/__init__.py | 0 .../acs/azuremonitormetrics/dc/dce_api.py | 28 +++ .../acs/azuremonitormetrics/dc/dcr_api.py | 49 +++++ .../acs/azuremonitormetrics/dc/dcra_api.py | 43 +++++ .../acs/azuremonitormetrics/dc/defaults.py | 41 +++++ .../acs/azuremonitormetrics/dc/delete.py | 79 ++++++++ .../acs/azuremonitormetrics/deaults.py | 14 ++ .../acs/azuremonitormetrics/helper.py | 100 ++++++++++ .../recordingrules/__init__.py | 0 .../recordingrules/create.py | 111 +++++++++++ .../recordingrules/delete.py | 49 +++++ .../responseparsers/__init__.py | 0 .../amwlocationresponseparser.py | 29 +++ .../azure/cli/command_modules/acs/custom.py | 15 ++ .../command_modules/acs/linter_exclusions.yml | 42 ++++- .../acs/managed_cluster_decorator.py | 173 +++++++++++++++++- .../acs/tests/latest/test_aks_commands.py | 99 +++++++--- .../latest/test_managed_cluster_decorator.py | 20 +- 31 files changed, 1392 insertions(+), 34 deletions(-) create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/__init__.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/addonput.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/__init__.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/link.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/__init__.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/create.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/defaults.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/helper.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/constants.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/__init__.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dce_api.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcr_api.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcra_api.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/defaults.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/delete.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/deaults.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/helper.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/__init__.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/create.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/delete.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/__init__.py create mode 100644 src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/amwlocationresponseparser.py diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index e9254b89818..3729e82b00d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -479,6 +479,24 @@ - name: --enable-keda type: bool short-summary: Enable KEDA workload auto-scaler. + - name: --enable-azure-monitor-metrics + type: bool + short-summary: Enable Azure Monitor Metrics Profile + - name: --azure-monitor-workspace-resource-id + type: string + short-summary: Resource ID of the Azure Monitor Workspace + - name: --ksm-metric-labels-allow-list + type: string + short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g. '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]'). + - name: --ksm-metric-annotations-allow-list + type: string + short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g.'=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]'). + - name: --grafana-resource-id + type: string + short-summary: Resource ID of the Azure Managed Grafana Workspace + - name: --enable-windows-recording-rules + type: bool + short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon examples: - name: Create a Kubernetes cluster with an existing SSH public key. @@ -549,6 +567,8 @@ text: az aks create -g MyResourceGroup -n MyManagedCluster --network-plugin none - name: Create a kubernetes cluster with KEDA workload autoscaler enabled. text: az aks create -g MyResourceGroup -n MyManagedCluster --enable-keda + - name: Create a kubernetes cluster with Azure Monitor Metrics enabled. + text: az aks create -g MyResourceGroup -n MyManagedCluster --enable-azure-monitor-metrics """ helps['aks update'] = """ @@ -791,6 +811,27 @@ - name: --disable-keda type: bool short-summary: Disable KEDA workload auto-scaler. + - name: --enable-azure-monitor-metrics + type: bool + short-summary: Enable Azure Monitor Metrics Profile + - name: --azure-monitor-workspace-resource-id + type: string + short-summary: Resource ID of the Azure Monitor Workspace + - name: --ksm-metric-labels-allow-list + type: string + short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g. '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]'). + - name: --ksm-metric-annotations-allow-list + type: string + short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g.'=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]'). + - name: --grafana-resource-id + type: string + short-summary: Resource ID of the Azure Managed Grafana Workspace + - name: --enable-windows-recording-rules + type: bool + short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon + - name: --disable-azure-monitor-metrics + type: bool + short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster. examples: - name: Reconcile the cluster back to its current state. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 607d7dd0c44..a04959cb591 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -56,7 +56,9 @@ validate_ppg, validate_priority, validate_registry_name, validate_snapshot_id, validate_snapshot_name, validate_spot_max_price, validate_ssh_key, validate_taints, validate_vm_set_type, - validate_vnet_subnet_id) + validate_vnet_subnet_id, + validate_azuremonitorworkspaceresourceid, + validate_grafanaresourceid) from azure.cli.core.commands.parameters import ( edge_zone_type, file_type, get_enum_type, get_resource_name_completion_list, get_three_state_flag, name_type, @@ -281,6 +283,13 @@ def load_arguments(self, _): c.argument('linux_os_config') c.argument('host_group_id', validator=validate_host_group_id) c.argument('gpu_instance_profile', arg_type=get_enum_type(gpu_instance_profiles)) + # azure monitor profile + c.argument('enable_azure_monitor_metrics', action='store_true') + c.argument('azure_monitor_workspace_resource_id', validator=validate_azuremonitorworkspaceresourceid) + c.argument('ksm_metric_labels_allow_list') + c.argument('ksm_metric_annotations_allow_list') + c.argument('grafana_resource_id', validator=validate_grafanaresourceid) + c.argument('enable_windows_recording_rules', action='store_true') # misc c.argument('yes', options_list=['--yes', '-y'], help='Do not prompt for confirmation.', action='store_true') @@ -361,6 +370,14 @@ def load_arguments(self, _): c.argument('max_count', type=int, validator=validate_nodes_count) c.argument('nodepool_labels', nargs='*', validator=validate_nodepool_labels, help='space-separated labels: key[=value] [key[=value] ...]. See https://aka.ms/node-labels for syntax of labels.') + # azure monitor profile + c.argument('enable_azure_monitor_metrics', action='store_true') + c.argument('azure_monitor_workspace_resource_id', validator=validate_azuremonitorworkspaceresourceid) + c.argument('ksm_metric_labels_allow_list') + c.argument('ksm_metric_annotations_allow_list') + c.argument('grafana_resource_id', validator=validate_grafanaresourceid) + c.argument('enable_windows_recording_rules', action='store_true') + c.argument('disable_azure_monitor_metrics', action='store_true') # misc c.argument('yes', options_list=['--yes', '-y'], help='Do not prompt for confirmation.', action='store_true') diff --git a/src/azure-cli/azure/cli/command_modules/acs/_validators.py b/src/azure-cli/azure/cli/command_modules/acs/_validators.py index 762aaf87426..195acd68461 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_validators.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_validators.py @@ -614,3 +614,32 @@ def validate_registry_name(cmd, namespace): if pos == -1: logger.warning("The login server endpoint suffix '%s' is automatically appended.", acr_suffix) namespace.acr = registry + acr_suffix + + +def sanitize_resource_id(resource_id): + resource_id = resource_id.strip() + if not resource_id.startswith("/"): + resource_id = "/" + resource_id + if resource_id.endswith("/"): + resource_id = resource_id.rstrip("/") + return resource_id.lower() + + +# pylint:disable=line-too-long +def validate_azuremonitorworkspaceresourceid(namespace): + resource_id = namespace.azure_monitor_workspace_resource_id + if resource_id is None: + return + resource_id = sanitize_resource_id(resource_id) + if (bool(re.match(r'/subscriptions/.*/resourcegroups/.*/providers/microsoft.monitor/accounts/.*', resource_id))) is False: + raise InvalidArgumentValueError("--azure-monitor-workspace-resource-id not in the correct format. It should match `/subscriptions//resourceGroups//providers/microsoft.monitor/accounts/`") + + +# pylint:disable=line-too-long +def validate_grafanaresourceid(namespace): + resource_id = namespace.grafana_resource_id + if resource_id is None: + return + resource_id = sanitize_resource_id(resource_id) + if (bool(re.match(r'/subscriptions/.*/resourcegroups/.*/providers/microsoft.dashboard/grafana/.*', resource_id))) is False: + raise InvalidArgumentValueError("--grafana-resource-id not in the correct format. It should match `/subscriptions//resourceGroups//providers/microsoft.dashboard/grafana/`") diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/__init__.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/addonput.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/addonput.py new file mode 100644 index 00000000000..95b92ce4b27 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/addonput.py @@ -0,0 +1,36 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.constants import AKS_CLUSTER_API +from azure.cli.core.azclierror import ( + UnknownError, + CLIError +) + + +# pylint: disable=line-too-long +def addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + from azure.cli.core.util import send_raw_request + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + feature_check_url = f"{armendpoint}/subscriptions/{cluster_subscription}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.ContainerService/managedClusters/{cluster_name}?api-version={AKS_CLUSTER_API}" + try: + headers = ['User-Agent=azuremonitormetrics.addon_get'] + r = send_raw_request(cmd.cli_ctx, "GET", feature_check_url, + body={}, headers=headers) + except CLIError as e: + raise UnknownError(e) + json_response = json.loads(r.text) + if "azureMonitorProfile" in json_response["properties"]: + if "metrics" in json_response["properties"]["azureMonitorProfile"]: + if json_response["properties"]["azureMonitorProfile"]["metrics"]["enabled"] is False: + # What if enabled doesn't exist + json_response["properties"]["azureMonitorProfile"]["metrics"]["enabled"] = True + try: + headers = ['User-Agent=azuremonitormetrics.addon_put'] + body = json.dumps(json_response) + r = send_raw_request(cmd.cli_ctx, "PUT", feature_check_url, + body=body, headers=headers) + except CLIError as e: + raise UnknownError(e) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/__init__.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/link.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/link.py new file mode 100644 index 00000000000..3a6127b70b6 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amg/link.py @@ -0,0 +1,89 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +import uuid +from knack.util import CLIError +from azure.cli.command_modules.acs.azuremonitormetrics.constants import ( + GRAFANA_API, + GRAFANA_ROLE_ASSIGNMENT_API, + GrafanaLink +) +from azure.cli.command_modules.acs.azuremonitormetrics.helper import sanitize_resource_id + + +def link_grafana_instance(cmd, raw_parameters, azure_monitor_workspace_resource_id): + from azure.cli.core.util import send_raw_request + # GET grafana principal ID + try: + grafana_resource_id = raw_parameters.get("grafana_resource_id") + if grafana_resource_id is None or grafana_resource_id == "": + return GrafanaLink.NOPARAMPROVIDED + grafana_resource_id = sanitize_resource_id(grafana_resource_id) + grafanaURI = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + grafana_resource_id, + GRAFANA_API + ) + headers = ['User-Agent=azuremonitormetrics.link_grafana_instance'] + grafanaArmResponse = send_raw_request(cmd.cli_ctx, "GET", grafanaURI, body={}, headers=headers) + servicePrincipalId = grafanaArmResponse.json()["identity"]["principalId"] + except CLIError as e: + raise CLIError(e) + # Add Role Assignment + try: + MonitoringDataReader = "b0d8363b-8ddd-447d-831f-62ca05bff136" + roleDefinitionURI = "{0}{1}/providers/Microsoft.Authorization/roleAssignments/{2}?api-version={3}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + azure_monitor_workspace_resource_id, + uuid.uuid4(), + GRAFANA_ROLE_ASSIGNMENT_API + ) + roleDefinitionId = "{0}/providers/Microsoft.Authorization/roleDefinitions/{1}".format( + azure_monitor_workspace_resource_id, + MonitoringDataReader + ) + association_body = json.dumps({ + "properties": { + "roleDefinitionId": roleDefinitionId, + "principalId": servicePrincipalId + } + }) + headers = ['User-Agent=azuremonitormetrics.add_role_assignment'] + send_raw_request(cmd.cli_ctx, "PUT", roleDefinitionURI, body=association_body, headers=headers) + except CLIError as e: + if e.response.status_code != 409: + erroString = "Role Assingment failed. Please manually assign the `Monitoring Data Reader` role\ + to the Azure Monitor Workspace ({0}) for the Azure Managed Grafana\ + System Assigned Managed Identity ({1})".format( + azure_monitor_workspace_resource_id, + servicePrincipalId + ) + print(erroString) + # Setting up AMW Integration + targetGrafanaArmPayload = grafanaArmResponse.json() + if targetGrafanaArmPayload["properties"] is None: + raise CLIError("Invalid grafana payload to add AMW integration") + if "grafanaIntegrations" not in json.dumps(targetGrafanaArmPayload): + targetGrafanaArmPayload["properties"]["grafanaIntegrations"] = {} + if "azureMonitorWorkspaceIntegrations" not in json.dumps(targetGrafanaArmPayload): + targetGrafanaArmPayload["properties"]["grafanaIntegrations"]["azureMonitorWorkspaceIntegrations"] = [] + amwIntegrations = targetGrafanaArmPayload["properties"]["grafanaIntegrations"]["azureMonitorWorkspaceIntegrations"] + if amwIntegrations != [] and azure_monitor_workspace_resource_id in json.dumps(amwIntegrations).lower(): + return GrafanaLink.ALREADYPRESENT + try: + grafanaURI = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + grafana_resource_id, + GRAFANA_API + ) + targetGrafanaArmPayload["properties"]["grafanaIntegrations"]["azureMonitorWorkspaceIntegrations"].append({ + "azureMonitorWorkspaceResourceId": azure_monitor_workspace_resource_id + }) + targetGrafanaArmPayload = json.dumps(targetGrafanaArmPayload) + headers = ['User-Agent=azuremonitormetrics.setup_amw_grafana_integration', 'Content-Type=application/json'] + send_raw_request(cmd.cli_ctx, "PUT", grafanaURI, body=targetGrafanaArmPayload, headers=headers) + except CLIError as e: + raise CLIError(e) + return GrafanaLink.SUCCESS diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/__init__.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/create.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/create.py new file mode 100644 index 00000000000..c8bac89251c --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/create.py @@ -0,0 +1,48 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json + +from azure.cli.command_modules.acs.azuremonitormetrics.constants import MAC_API +from azure.cli.command_modules.acs.azuremonitormetrics.amw.defaults import get_default_mac_name_and_region +from azure.cli.command_modules.acs._client_factory import get_resource_groups_client, get_resources_client +from azure.core.exceptions import HttpResponseError +from knack.util import CLIError + + +def create_default_mac(cmd, cluster_subscription, cluster_region): + from azure.cli.core.util import send_raw_request + default_mac_name, default_mac_region = get_default_mac_name_and_region(cmd, cluster_region) + default_resource_group_name = "DefaultResourceGroup-{0}".format(default_mac_region) + azure_monitor_workspace_resource_id = \ + "/subscriptions/{0}/resourceGroups/{1}/providers/microsoft.monitor/accounts/{2}"\ + .format( + cluster_subscription, + default_resource_group_name, + default_mac_name + ) + # Check if default resource group exists or not, if it does not then create it + resource_groups = get_resource_groups_client(cmd.cli_ctx, cluster_subscription) + resources = get_resources_client(cmd.cli_ctx, cluster_subscription) + + if resource_groups.check_existence(default_resource_group_name): + try: + resource = resources.get_by_id(azure_monitor_workspace_resource_id, MAC_API) + # If MAC already exists then return from here + return azure_monitor_workspace_resource_id, resource.location + except HttpResponseError as ex: + if ex.status_code != 404: + raise ex + else: + resource_groups.create_or_update(default_resource_group_name, {"location": default_mac_region}) + association_body = json.dumps({"location": default_mac_region, "properties": {}}) + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + association_url = f"{armendpoint}{azure_monitor_workspace_resource_id}?api-version={MAC_API}" + try: + headers = ['User-Agent=azuremonitormetrics.create_default_mac'] + send_raw_request(cmd.cli_ctx, "PUT", association_url, + body=association_body, headers=headers) + return azure_monitor_workspace_resource_id, default_mac_region + except CLIError as e: + raise e diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/defaults.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/defaults.py new file mode 100644 index 00000000000..88832f87dbd --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/defaults.py @@ -0,0 +1,45 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.deaults import get_default_region +from azure.cli.command_modules.acs.azuremonitormetrics.responseparsers.amwlocationresponseparser import ( + parseResourceProviderResponseForLocations +) +from azure.cli.command_modules.acs.azuremonitormetrics.constants import RP_LOCATION_API +from knack.util import CLIError + + +def get_supported_rp_locations(cmd, rp_name): + from azure.cli.core.util import send_raw_request + supported_locations = [] + headers = ['User-Agent=azuremonitormetrics.get_supported_rp_locations'] + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + association_url = f"{armendpoint}/providers/{rp_name}?api-version={RP_LOCATION_API}" + r = send_raw_request(cmd.cli_ctx, "GET", association_url, headers=headers) + data = json.loads(r.text) + supported_locations = parseResourceProviderResponseForLocations(data) + return supported_locations + + +def get_default_mac_region(cmd, cluster_region): + supported_locations = get_supported_rp_locations(cmd, 'Microsoft.Monitor') + if cluster_region in supported_locations: + return cluster_region + if len(supported_locations) > 0: + return supported_locations[0] + cloud_name = cmd.cli_ctx.cloud.name + if cloud_name.lower() == 'azurechinacloud': + raise CLIError("Azure China Cloud is not supported for the Azure Monitor Metrics addon") + if cloud_name.lower() == 'azureusgovernment': + return "usgovvirginia" + # default to public cloud + return get_default_region(cmd) + + +def get_default_mac_name_and_region(cmd, cluster_region): + default_mac_region = get_default_mac_region(cmd, cluster_region) + default_mac_name = "DefaultAzureMonitorWorkspace-" + default_mac_region + default_mac_name = default_mac_name[0:43] + return default_mac_name, default_mac_region diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/helper.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/helper.py new file mode 100644 index 00000000000..b88439e1694 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/amw/helper.py @@ -0,0 +1,36 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from azure.cli.command_modules.acs.azuremonitormetrics.amw.create import create_default_mac +from azure.cli.command_modules.acs.azuremonitormetrics.constants import MAC_API +from azure.cli.command_modules.acs.azuremonitormetrics.helper import sanitize_resource_id +from azure.cli.command_modules.acs._client_factory import get_resources_client +from azure.core.exceptions import HttpResponseError + + +def get_amw_region(cmd, azure_monitor_workspace_resource_id): + # region of MAC can be different from region of RG so find the location of the azure_monitor_workspace_resource_id + amw_subscription_id = azure_monitor_workspace_resource_id.split("/")[2] + resources = get_resources_client(cmd.cli_ctx, amw_subscription_id) + try: + resource = resources.get_by_id( + azure_monitor_workspace_resource_id, MAC_API) + return resource.location.lower() + except HttpResponseError as ex: + raise ex + + +def get_azure_monitor_workspace_resource(cmd, cluster_subscription, cluster_region, raw_parameters): + azure_monitor_workspace_resource_id = raw_parameters.get("azure_monitor_workspace_resource_id") + if azure_monitor_workspace_resource_id is None or azure_monitor_workspace_resource_id == "": + azure_monitor_workspace_resource_id, azure_monitor_workspace_location = create_default_mac( + cmd, + cluster_subscription, + cluster_region + ) + else: + azure_monitor_workspace_resource_id = sanitize_resource_id(azure_monitor_workspace_resource_id) + azure_monitor_workspace_location = get_amw_region(cmd, azure_monitor_workspace_resource_id) + print(f"Using Azure Monitor Workspace (stores prometheus metrics) : {azure_monitor_workspace_resource_id}") + return azure_monitor_workspace_resource_id, azure_monitor_workspace_location.lower() diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py new file mode 100644 index 00000000000..fa52b903267 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py @@ -0,0 +1,104 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from azure.cli.command_modules.acs.azuremonitormetrics.addonput import addon_put +from azure.cli.command_modules.acs.azuremonitormetrics.amg.link import link_grafana_instance +from azure.cli.command_modules.acs.azuremonitormetrics.amw.helper import get_azure_monitor_workspace_resource +from azure.cli.command_modules.acs.azuremonitormetrics.dc.dce_api import create_dce +from azure.cli.command_modules.acs.azuremonitormetrics.dc.dcr_api import create_dcr +from azure.cli.command_modules.acs.azuremonitormetrics.dc.dcra_api import create_dcra +from azure.cli.command_modules.acs.azuremonitormetrics.dc.delete import ( + delete_dc_objects_if_prometheus_enabled, + get_dc_objects_list +) +from azure.cli.command_modules.acs.azuremonitormetrics.helper import ( + check_azuremonitormetrics_profile, + rp_registrations +) +from azure.cli.command_modules.acs.azuremonitormetrics.recordingrules.create import create_rules +from azure.cli.command_modules.acs.azuremonitormetrics.recordingrules.delete import delete_rules +from azure.cli.core.azclierror import InvalidArgumentValueError +from knack.util import CLIError + + +# pylint: disable=line-too-long +def link_azure_monitor_profile_artifacts( + cmd, + cluster_subscription, + cluster_resource_group_name, + cluster_name, + cluster_region, + raw_parameters, + create_flow +): + # MAC creation if required + azure_monitor_workspace_resource_id, azure_monitor_workspace_location = get_azure_monitor_workspace_resource(cmd, cluster_subscription, cluster_region, raw_parameters) + # DCE creation + dce_resource_id = create_dce(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_location) + # DCR creation + dcr_resource_id = create_dcr(cmd, azure_monitor_workspace_location, azure_monitor_workspace_resource_id, cluster_subscription, cluster_resource_group_name, cluster_name, dce_resource_id) + # DCRA creation + create_dcra(cmd, cluster_region, cluster_subscription, cluster_resource_group_name, cluster_name, dcr_resource_id) + # Link grafana + link_grafana_instance(cmd, raw_parameters, azure_monitor_workspace_resource_id) + # create recording rules and alerts + create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, azure_monitor_workspace_location, raw_parameters) + # if aks cluster create flow -> do a PUT on the AKS cluster to enable the addon + if create_flow: + addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + + +# pylint: disable=line-too-long +def unlink_azure_monitor_profile_artifacts(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + # Remove DC* if prometheus is enabled + dc_objects_list = get_dc_objects_list(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + delete_dc_objects_if_prometheus_enabled(cmd, dc_objects_list, cluster_subscription, cluster_resource_group_name, cluster_name) + # Delete rules (Conflict({"error":{"code":"InvalidResourceLocation","message":"The resource 'NodeRecordingRulesRuleGroup-' already exists in location 'eastus2' in resource group ''. + # A resource with the same name cannot be created in location 'eastus'. Please select a new resource name."}}) + delete_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + + +# pylint: disable=too-many-locals,too-many-branches,too-many-statements,line-too-long +def ensure_azure_monitor_profile_prerequisites( + cmd, + cluster_subscription, + cluster_resource_group_name, + cluster_name, + cluster_region, + raw_parameters, + remove_azuremonitormetrics, + create_flow=False +): + cloud_name = cmd.cli_ctx.cloud.name + if cloud_name.lower() == 'azurechinacloud': + raise CLIError("Azure China Cloud is not supported for the Azure Monitor Metrics addon") + + if cloud_name.lower() == "azureusgovernment": + grafana_resource_id = raw_parameters.get("grafana_resource_id") + if grafana_resource_id is not None: + if grafana_resource_id != "": + raise InvalidArgumentValueError("Azure US Government cloud does not support Azure Managed Grarfana yet. Please follow this documenation for enabling it via the public cloud : aka.ms/ama-grafana-link-ff") + + if remove_azuremonitormetrics: + unlink_azure_monitor_profile_artifacts( + cmd, + cluster_subscription, + cluster_resource_group_name, + cluster_name + ) + else: + # Check if already onboarded + if create_flow is False: + check_azuremonitormetrics_profile(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + # Do RP registrations if required + rp_registrations(cmd, cluster_subscription) + link_azure_monitor_profile_artifacts( + cmd, + cluster_subscription, + cluster_resource_group_name, + cluster_name, + cluster_region, + raw_parameters, + create_flow + ) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/constants.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/constants.py new file mode 100644 index 00000000000..9c39a8d2aa4 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/constants.py @@ -0,0 +1,87 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from enum import Enum + +AKS_CLUSTER_API = "2023-01-01" +MAC_API = "2023-04-03" +DC_API = "2022-06-01" +GRAFANA_API = "2022-08-01" +GRAFANA_ROLE_ASSIGNMENT_API = "2022-04-01" +RULES_API = "2023-03-01" +FEATURE_API = "2020-09-01" +RP_API = "2021-04-01" +ALERTS_API = "2023-01-01-preview" +RP_LOCATION_API = "2022-01-01" + + +MapToClosestMACRegion = { + "australiacentral": "eastus", + "australiacentral2": "eastus", + "australiaeast": "eastus", + "australiasoutheast": "eastus", + "brazilsouth": "eastus", + "canadacentral": "eastus", + "canadaeast": "eastus", + "centralus": "centralus", + "centralindia": "centralindia", + "eastasia": "westeurope", + "eastus": "eastus", + "eastus2": "eastus2", + "francecentral": "westeurope", + "francesouth": "westeurope", + "japaneast": "eastus", + "japanwest": "eastus", + "koreacentral": "westeurope", + "koreasouth": "westeurope", + "northcentralus": "eastus", + "northeurope": "westeurope", + "southafricanorth": "westeurope", + "southafricawest": "westeurope", + "southcentralus": "eastus", + "southeastasia": "westeurope", + "southindia": "centralindia", + "uksouth": "westeurope", + "ukwest": "westeurope", + "westcentralus": "eastus", + "westeurope": "westeurope", + "westindia": "centralindia", + "westus": "westus", + "westus2": "westus2", + "westus3": "westus", + "norwayeast": "westeurope", + "norwaywest": "westeurope", + "switzerlandnorth": "westeurope", + "switzerlandwest": "westeurope", + "uaenorth": "westeurope", + "germanywestcentral": "westeurope", + "germanynorth": "westeurope", + "uaecentral": "westeurope", + "eastus2euap": "eastus2euap", + "centraluseuap": "westeurope", + "brazilsoutheast": "eastus", + "jioindiacentral": "centralindia", + "swedencentral": "westeurope", + "swedensouth": "westeurope", + "qatarcentral": "westeurope" +} + + +class GrafanaLink(Enum): + """ + Status of Grafana link to the Prometheus Addon + """ + SUCCESS = "SUCCESS" + FAILURE = "FAILURE" + ALREADYPRESENT = "ALREADYPRESENT" + NOPARAMPROVIDED = "NOPARAMPROVIDED" + + +class DC_TYPE(Enum): + """ + Types of DC* objects + """ + DCE = "DCE" + DCR = "DCR" + DCRA = "DCRA" diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/__init__.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dce_api.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dce_api.py new file mode 100644 index 00000000000..59f7d0b2be0 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dce_api.py @@ -0,0 +1,28 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.constants import DC_API +from azure.cli.command_modules.acs.azuremonitormetrics.dc.defaults import get_default_dce_name +from knack.util import CLIError + + +def create_dce(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, mac_region): + from azure.cli.core.util import send_raw_request + dce_name = get_default_dce_name(cmd, mac_region, cluster_name) + dce_resource_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionEndpoints/{2}"\ + .format(cluster_subscription, cluster_resource_group_name, dce_name) + try: + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + dce_url = f"{armendpoint}{dce_resource_id}?api-version={DC_API}" + dce_creation_body = json.dumps({"name": dce_name, + "location": mac_region, + "kind": "Linux", + "properties": {}}) + headers = ['User-Agent=azuremonitormetrics.create_dce'] + send_raw_request(cmd.cli_ctx, "PUT", + dce_url, body=dce_creation_body, headers=headers) + return dce_resource_id + except CLIError as error: + raise error diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcr_api.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcr_api.py new file mode 100644 index 00000000000..c6f1975a1f4 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcr_api.py @@ -0,0 +1,49 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.constants import MapToClosestMACRegion +from azure.cli.command_modules.acs.azuremonitormetrics.dc.defaults import get_default_region, sanitize_name +from azure.cli.command_modules.acs.azuremonitormetrics.constants import ( + DC_TYPE, + DC_API +) +from knack.util import CLIError + + +def get_default_dcr_name(cmd, mac_region, cluster_name): + region = get_default_region(cmd) + if dict.get(MapToClosestMACRegion, mac_region): + region = MapToClosestMACRegion[mac_region] + default_dcr_name = "MSProm-" + region + "-" + cluster_name + return sanitize_name(default_dcr_name, DC_TYPE.DCR, 64) + + +# pylint: disable=too-many-locals,too-many-branches,too-many-statements,line-too-long +def create_dcr(cmd, mac_region, azure_monitor_workspace_resource_id, cluster_subscription, cluster_resource_group_name, cluster_name, dce_resource_id): + from azure.cli.core.util import send_raw_request + dcr_name = get_default_dcr_name(cmd, mac_region, cluster_name) + dcr_resource_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionRules/{2}".format( + cluster_subscription, + cluster_resource_group_name, + dcr_name + ) + dcr_creation_body = json.dumps({"location": mac_region, + "kind": "Linux", + "properties": { + "dataCollectionEndpointId": dce_resource_id, + "dataSources": {"prometheusForwarder": [{"name": "PrometheusDataSource", "streams": ["Microsoft-PrometheusMetrics"], "labelIncludeFilter": {}}]}, + "dataFlows": [{"destinations": ["MonitoringAccount1"], "streams": ["Microsoft-PrometheusMetrics"]}], + "description": "DCR description", + "destinations": { + "monitoringAccounts": [{"accountResourceId": azure_monitor_workspace_resource_id, "name": "MonitoringAccount1"}]}}}) + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + dcr_url = f"{armendpoint}{dcr_resource_id}?api-version={DC_API}" + try: + headers = ['User-Agent=azuremonitormetrics.create_dcr'] + send_raw_request(cmd.cli_ctx, "PUT", + dcr_url, body=dcr_creation_body, headers=headers) + return dcr_resource_id + except CLIError as error: + raise error diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcra_api.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcra_api.py new file mode 100644 index 00000000000..daddb2cc589 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/dcra_api.py @@ -0,0 +1,43 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.constants import DC_API +from azure.cli.command_modules.acs.azuremonitormetrics.dc.defaults import get_default_dcra_name +from knack.util import CLIError + + +# pylint: disable=line-too-long +def create_dcra(cmd, cluster_region, cluster_subscription, cluster_resource_group_name, cluster_name, dcr_resource_id): + from azure.cli.core.util import send_raw_request + cluster_resource_id = \ + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.ContainerService/managedClusters/{2}".format( + cluster_subscription, + cluster_resource_group_name, + cluster_name + ) + dcra_name = get_default_dcra_name(cmd, cluster_region, cluster_name) + dcra_resource_id = \ + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionRuleAssociations/{2}"\ + .format( + cluster_subscription, + cluster_resource_group_name, + dcra_name + ) + description_str = "Promtheus data collection association between DCR, DCE and target AKS resource" + # only create or delete the association between the DCR and cluster + association_body = json.dumps({"location": cluster_region, + "properties": { + "dataCollectionRuleId": dcr_resource_id, + "description": description_str + }}) + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + association_url = f"{armendpoint}{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/{dcra_name}?api-version={DC_API}" + try: + headers = ['User-Agent=azuremonitormetrics.create_dcra'] + send_raw_request(cmd.cli_ctx, "PUT", association_url, + body=association_body, headers=headers) + return dcra_resource_id + except CLIError as error: + raise error diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/defaults.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/defaults.py new file mode 100644 index 00000000000..4a2007fd185 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/defaults.py @@ -0,0 +1,41 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from azure.cli.command_modules.acs.azuremonitormetrics.constants import ( + DC_TYPE, + MapToClosestMACRegion +) +from azure.cli.command_modules.acs.azuremonitormetrics.deaults import get_default_region + + +# DCR = 64, DCE = 44, DCRA = 64 +# All DC* object names should end only in alpha numeric (after `length` trim) +# DCE remove underscore from cluster name +def sanitize_name(name, objtype, length): + length = length - 1 + if objtype == DC_TYPE.DCE: + name = name.replace("_", "") + name = name[0:length] + lastIndexAlphaNumeric = len(name) - 1 + while ((name[lastIndexAlphaNumeric].isalnum() is False) and lastIndexAlphaNumeric > -1): + lastIndexAlphaNumeric = lastIndexAlphaNumeric - 1 + if lastIndexAlphaNumeric < 0: + return "" + return name[0:lastIndexAlphaNumeric + 1] + + +def get_default_dce_name(cmd, mac_region, cluster_name): + region = get_default_region(cmd) + if dict.get(MapToClosestMACRegion, mac_region): + region = MapToClosestMACRegion[mac_region] + default_dce_name = "MSProm-" + region + "-" + cluster_name + return sanitize_name(default_dce_name, DC_TYPE.DCE, 44) + + +def get_default_dcra_name(cmd, cluster_region, cluster_name): + region = get_default_region(cmd) + if dict.get(MapToClosestMACRegion, cluster_region): + region = MapToClosestMACRegion[cluster_region] + default_dcra_name = "ContainerInsightsMetricsExtension-" + region + "-" + cluster_name + return sanitize_name(default_dcra_name, DC_TYPE.DCRA, 64) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/delete.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/delete.py new file mode 100644 index 00000000000..48566bae071 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/dc/delete.py @@ -0,0 +1,79 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.constants import DC_API +from knack.util import CLIError + + +def get_dce_from_dcr(cmd, dcrId): + from azure.cli.core.util import send_raw_request + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + association_url = f"{armendpoint}{dcrId}?api-version={DC_API}" + headers = ['User-Agent=azuremonitormetrics.get_dce_from_dcr'] + r = send_raw_request(cmd.cli_ctx, "GET", association_url, headers=headers) + data = json.loads(r.text) + if 'dataCollectionEndpointId' in data['properties']: + return str(data['properties']['dataCollectionEndpointId']) + return "" + + +# pylint: disable=line-too-long +def get_dc_objects_list(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + try: + from azure.cli.core.util import send_raw_request + cluster_resource_id = \ + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.ContainerService/managedClusters/{2}".format( + cluster_subscription, + cluster_resource_group_name, + cluster_name + ) + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + association_url = f"{armendpoint}{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations?api-version={DC_API}" + headers = ['User-Agent=azuremonitormetrics.get_dcra'] + r = send_raw_request(cmd.cli_ctx, "GET", association_url, headers=headers) + data = json.loads(r.text) + dc_object_array = [] + for item in data['value']: + if 'properties' in item and 'dataCollectionRuleId' in item['properties']: + dce_id = get_dce_from_dcr(cmd, item['properties']['dataCollectionRuleId']) + dc_object_array.append({'name': item['name'], 'dataCollectionRuleId': item['properties']['dataCollectionRuleId'], 'dceId': dce_id}) + return dc_object_array + except CLIError as e: + error = e + raise CLIError(error) + + +# pylint: disable=line-too-long +def delete_dc_objects_if_prometheus_enabled(cmd, dc_objects_list, cluster_subscription, cluster_resource_group_name, cluster_name): + from azure.cli.core.util import send_raw_request + cluster_resource_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.ContainerService/managedClusters/{2}".format( + cluster_subscription, + cluster_resource_group_name, + cluster_name + ) + for item in dc_objects_list: + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + association_url = f"{armendpoint}{item['dataCollectionRuleId']}?api-version={DC_API}" + try: + headers = ['User-Agent=azuremonitormetrics.get_dcr_if_prometheus_enabled'] + r = send_raw_request(cmd.cli_ctx, "GET", association_url, headers=headers) + data = json.loads(r.text) + if 'microsoft-prometheusmetrics' in [stream.lower() for stream in data['properties']['dataFlows'][0]['streams']]: + # delete DCRA + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + url = f"{armendpoint}{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/{item['name']}?api-version={DC_API}" + headers = ['User-Agent=azuremonitormetrics.delete_dcra'] + send_raw_request(cmd.cli_ctx, "DELETE", url, headers=headers) + # delete DCR + url = f"{armendpoint}{item['dataCollectionRuleId']}?api-version={DC_API}" + headers = ['User-Agent=azuremonitormetrics.delete_dcr'] + send_raw_request(cmd.cli_ctx, "DELETE", url, headers=headers) + # delete DCE + url = f"{armendpoint}{item['dceId']}?api-version={DC_API}" + headers = ['User-Agent=azuremonitormetrics.delete_dce'] + send_raw_request(cmd.cli_ctx, "DELETE", url, headers=headers) + except CLIError as e: + error = e + raise CLIError(error) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/deaults.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/deaults.py new file mode 100644 index 00000000000..2e338217c2a --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/deaults.py @@ -0,0 +1,14 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from knack.util import CLIError + + +def get_default_region(cmd): + cloud_name = cmd.cli_ctx.cloud.name + if cloud_name.lower() == 'azurechinacloud': + raise CLIError("Azure China Cloud is not supported for the Azure Monitor Metrics addon") + if cloud_name.lower() == 'azureusgovernment': + return "usgovvirginia" + return "eastus" diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/helper.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/helper.py new file mode 100644 index 00000000000..07a913facf0 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/helper.py @@ -0,0 +1,100 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from knack.util import CLIError +from azure.cli.core.azclierror import ( + UnknownError +) +from azure.cli.command_modules.acs.azuremonitormetrics.constants import ( + RP_API, + AKS_CLUSTER_API +) + + +def sanitize_resource_id(resource_id): + resource_id = resource_id.strip() + if not resource_id.startswith("/"): + resource_id = "/" + resource_id + if resource_id.endswith("/"): + resource_id = resource_id.rstrip("/") + return resource_id.lower() + + +def post_request(cmd, subscription_id, rp_name, headers): + from azure.cli.core.util import send_raw_request + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + customUrl = "{0}/subscriptions/{1}/providers/{2}/register?api-version={3}".format( + armendpoint, + subscription_id, + rp_name, + RP_API, + ) + try: + send_raw_request(cmd.cli_ctx, "POST", customUrl, headers=headers) + except CLIError as e: + raise CLIError(e) + + +# pylint: disable=line-too-long +def rp_registrations(cmd, subscription_id): + from azure.cli.core.util import send_raw_request + # Get list of RP's for RP's subscription + try: + headers = ['User-Agent=azuremonitormetrics.get_mac_sub_list'] + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + customUrl = "{0}/subscriptions/{1}/providers?api-version={2}&$select=namespace,registrationstate".format( + armendpoint, + subscription_id, + RP_API + ) + r = send_raw_request(cmd.cli_ctx, "GET", customUrl, headers=headers) + except CLIError as e: + raise CLIError(e) + isInsightsRpRegistered = False + isAlertsManagementRpRegistered = False + isMoniotrRpRegistered = False + isDashboardRpRegistered = False + json_response = json.loads(r.text) + values_array = json_response["value"] + for value in values_array: + if value["namespace"].lower() == "microsoft.insights" and value["registrationState"].lower() == "registered": + isInsightsRpRegistered = True + if value["namespace"].lower() == "microsoft.alertsmanagement" and value["registrationState"].lower() == "registered": + isAlertsManagementRpRegistered = True + if value["namespace"].lower() == "microsoft.monitor" and value["registrationState"].lower() == "registered": + isAlertsManagementRpRegistered = True + if value["namespace"].lower() == "microsoft.dashboard" and value["registrationState"].lower() == "registered": + isAlertsManagementRpRegistered = True + if isInsightsRpRegistered is False: + headers = ['User-Agent=azuremonitormetrics.register_insights_rp'] + post_request(cmd, subscription_id, "microsoft.insights", headers) + if isAlertsManagementRpRegistered is False: + headers = ['User-Agent=azuremonitormetrics.register_alertsmanagement_rp'] + post_request(cmd, subscription_id, "microsoft.alertsmanagement", headers) + if isMoniotrRpRegistered is False: + headers = ['User-Agent=azuremonitormetrics.register_monitor_rp'] + post_request(cmd, subscription_id, "microsoft.monitor", headers) + if isDashboardRpRegistered is False: + headers = ['User-Agent=azuremonitormetrics.register_dashboard_rp'] + post_request(cmd, subscription_id, "microsoft.dashboard", headers) + + +# pylint: disable=line-too-long +def check_azuremonitormetrics_profile(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + from azure.cli.core.util import send_raw_request + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + feature_check_url = f"{armendpoint}/subscriptions/{cluster_subscription}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.ContainerService/managedClusters/{cluster_name}?api-version={AKS_CLUSTER_API}" + try: + headers = ['User-Agent=azuremonitormetrics.check_azuremonitormetrics_profile'] + r = send_raw_request(cmd.cli_ctx, "GET", feature_check_url, + body={}, headers=headers) + except CLIError as e: + raise UnknownError(e) + json_response = json.loads(r.text) + values_array = json_response["properties"] + if "azureMonitorProfile" in values_array: + if "metrics" in values_array["azureMonitorProfile"]: + if values_array["azureMonitorProfile"]["metrics"]["enabled"] is True: + raise CLIError(f"Azure Monitor Metrics is already enabled for this cluster. Please use `az aks update --disable-azure-monitor-metrics -g {cluster_resource_group_name} -n {cluster_name}` and then try enabling.") diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/__init__.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/create.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/create.py new file mode 100644 index 00000000000..8471a65d19c --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/create.py @@ -0,0 +1,111 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import json +from azure.cli.command_modules.acs.azuremonitormetrics.constants import ALERTS_API, RULES_API +from knack.util import CLIError + + +# pylint: disable=line-too-long +def get_recording_rules_template(cmd, azure_monitor_workspace_resource_id): + from azure.cli.core.util import send_raw_request + headers = ['User-Agent=azuremonitormetrics.get_recording_rules_template'] + armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager + url = f"{armendpoint}{azure_monitor_workspace_resource_id}/providers/microsoft.alertsManagement/alertRuleRecommendations?api-version={ALERTS_API}" + r = send_raw_request(cmd.cli_ctx, "GET", url, headers=headers) + data = json.loads(r.text) + return data['value'] + + +# pylint: disable=line-too-long +def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, enable_rules, i): + from azure.cli.core.util import send_raw_request + body = json.dumps({ + "id": default_rule_group_id, + "name": default_rule_group_name, + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "location": mac_region, + "properties": { + "scopes": [ + azure_monitor_workspace_resource_id + ], + "enabled": enable_rules, + "clusterName": cluster_name, + "interval": "PT1M", + "rules": default_rules_template[i]["properties"]["rulesArmTemplate"]["resources"][0]["properties"]["rules"] + } + }) + for _ in range(3): + try: + headers = ['User-Agent=azuremonitormetrics.put_rules.' + default_rule_group_name] + send_raw_request(cmd.cli_ctx, "PUT", url, + body=body, headers=headers) + break + except CLIError as e: + error = e + else: + raise error + + +# pylint: disable=line-too-long +def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region, raw_parameters): + # with urllib.request.urlopen("https://defaultrulessc.blob.core.windows.net/defaultrules/ManagedPrometheusDefaultRecordingRules.json") as url: + # default_rules_template = json.loads(url.read().decode()) + default_rules_template = get_recording_rules_template(cmd, azure_monitor_workspace_resource_id) + default_rule_group_name = "NodeRecordingRulesRuleGroup-{0}".format(cluster_name) + default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( + cluster_subscription, + cluster_resource_group_name, + default_rule_group_name + ) + url = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + default_rule_group_id, + RULES_API + ) + put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 0) + + default_rule_group_name = "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name) + default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( + cluster_subscription, + cluster_resource_group_name, + default_rule_group_name + ) + url = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + default_rule_group_id, + RULES_API + ) + put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 1) + + enable_windows_recording_rules = raw_parameters.get("enable_windows_recording_rules") + + if enable_windows_recording_rules is not True: + enable_windows_recording_rules = False + + default_rule_group_name = "NodeRecordingRulesRuleGroup-Win-{0}".format(cluster_name) + default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( + cluster_subscription, + cluster_resource_group_name, + default_rule_group_name + ) + url = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + default_rule_group_id, + RULES_API + ) + put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, enable_windows_recording_rules, 2) + + default_rule_group_name = "NodeAndKubernetesRecordingRulesRuleGroup-Win-{0}".format(cluster_name) + default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( + cluster_subscription, + cluster_resource_group_name, + default_rule_group_name + ) + url = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + default_rule_group_id, + RULES_API + ) + put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, enable_windows_recording_rules, 3) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/delete.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/delete.py new file mode 100644 index 00000000000..e0552e867e2 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/recordingrules/delete.py @@ -0,0 +1,49 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from azure.cli.command_modules.acs.azuremonitormetrics.constants import RULES_API + + +def delete_rule(cmd, cluster_subscription, cluster_resource_group_name, default_rule_group_name): + from azure.cli.core.util import send_raw_request + default_rule_group_id = \ + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( + cluster_subscription, + cluster_resource_group_name, + default_rule_group_name + ) + headers = ['User-Agent=azuremonitormetrics.delete_rule.' + default_rule_group_name] + url = "{0}{1}?api-version={2}".format( + cmd.cli_ctx.cloud.endpoints.resource_manager, + default_rule_group_id, + RULES_API + ) + send_raw_request(cmd.cli_ctx, "DELETE", url, headers=headers) + + +def delete_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + delete_rule( + cmd, + cluster_subscription, + cluster_resource_group_name, + "NodeRecordingRulesRuleGroup-{0}".format(cluster_name) + ) + delete_rule( + cmd, + cluster_subscription, + cluster_resource_group_name, + "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name) + ) + delete_rule( + cmd, + cluster_subscription, + cluster_resource_group_name, + "NodeRecordingRulesRuleGroup-Win-{0}".format(cluster_name) + ) + delete_rule( + cmd, + cluster_subscription, + cluster_resource_group_name, + "NodeAndKubernetesRecordingRulesRuleGroup-Win-{0}".format(cluster_name) + ) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/__init__.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/amwlocationresponseparser.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/amwlocationresponseparser.py new file mode 100644 index 00000000000..ab0fbe9df8e --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/responseparsers/amwlocationresponseparser.py @@ -0,0 +1,29 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from typing import List + + +def parseResourceProviderResponseForLocations(resourceProviderResponse): + supportedLocationMap = {} + if not resourceProviderResponse.get('resourceTypes'): + return supportedLocationMap + resourceTypesRawArr = resourceProviderResponse['resourceTypes'] + for resourceTypeResponse in resourceTypesRawArr: + if resourceTypeResponse['resourceType'] == 'accounts': + supportedLocationMap = parseLocations(resourceTypeResponse['locations']) + return supportedLocationMap + + +def parseLocations(locations: List[str]) -> List[str]: + if not locations or len(locations) == 0: + return [] + return [reduceLocation(location) for location in locations] + + +def reduceLocation(location: str) -> str: + if not location: + return location + location = location.replace(' ', '').lower() + return location diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index dc290287b19..b527beb8cff 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -493,6 +493,13 @@ def aks_create( linux_os_config=None, host_group_id=None, gpu_instance_profile=None, + # azure monitor profile + enable_azure_monitor_metrics=False, + azure_monitor_workspace_resource_id=None, + ksm_metric_labels_allow_list=None, + ksm_metric_annotations_allow_list=None, + grafana_resource_id=None, + enable_windows_recording_rules=False, # misc yes=False, no_wait=False, @@ -611,6 +618,14 @@ def aks_update( min_count=None, max_count=None, nodepool_labels=None, + # azure monitor profile + enable_azure_monitor_metrics=False, + azure_monitor_workspace_resource_id=None, + ksm_metric_labels_allow_list=None, + ksm_metric_annotations_allow_list=None, + grafana_resource_id=None, + enable_windows_recording_rules=False, + disable_azure_monitor_metrics=False, # misc yes=False, no_wait=False, diff --git a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml index 12b1549e918..f92f123db6e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml @@ -24,7 +24,25 @@ aks create: - option_length_too_long image_cleaner_interval_hours: rule_exclusions: - - option_length_too_long + - option_length_too_long + azure_monitor_workspace_resource_id: + rule_exclusions: + - option_length_too_long + disable_azure_monitor_metrics: + rule_exclusions: + - option_length_too_long + enable_azure_monitor_metrics: + rule_exclusions: + - option_length_too_long + ksm_metric_annotations_allow_list: + rule_exclusions: + - option_length_too_long + ksm_metric_labels_allow_list: + rule_exclusions: + - option_length_too_long + enable_windows_recording_rules: + rule_exclusions: + - option_length_too_long aks enable-addons: parameters: @@ -59,7 +77,7 @@ aks update: - option_length_too_long assign_kubelet_identity: rule_exclusions: - - option_length_too_long + - option_length_too_long enable_snapshot_controller: rule_exclusions: - option_length_too_long @@ -68,5 +86,23 @@ aks update: - option_length_too_long image_cleaner_interval_hours: rule_exclusions: - - option_length_too_long + - option_length_too_long + azure_monitor_workspace_resource_id: + rule_exclusions: + - option_length_too_long + disable_azure_monitor_metrics: + rule_exclusions: + - option_length_too_long + enable_azure_monitor_metrics: + rule_exclusions: + - option_length_too_long + ksm_metric_annotations_allow_list: + rule_exclusions: + - option_length_too_long + ksm_metric_labels_allow_list: + rule_exclusions: + - option_length_too_long + enable_windows_recording_rules: + rule_exclusions: + - option_length_too_long ... diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index a366e41aa2f..e9121878ed2 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -61,6 +61,9 @@ AKSAgentPoolModels, AKSAgentPoolUpdateDecorator, ) +from azure.cli.command_modules.acs.azuremonitormetrics.azuremonitorprofile import ( + ensure_azure_monitor_profile_prerequisites +) from azure.cli.command_modules.acs.base_decorator import ( BaseAKSContext, BaseAKSManagedClusterDecorator, @@ -251,6 +254,9 @@ def external_functions(self) -> SimpleNamespace: external_functions["add_monitoring_role_assignment"] = add_monitoring_role_assignment external_functions["add_virtual_node_role_assignment"] = add_virtual_node_role_assignment external_functions["ensure_container_insights_for_monitoring"] = ensure_container_insights_for_monitoring + external_functions[ + "ensure_azure_monitor_profile_prerequisites" + ] = ensure_azure_monitor_profile_prerequisites external_functions[ "ensure_default_log_analytics_workspace_for_monitoring" ] = ensure_default_log_analytics_workspace_for_monitoring @@ -4741,6 +4747,72 @@ def get_aks_custom_headers(self) -> Dict[str, str]: # this parameter does not need validation return aks_custom_headers + def _get_enable_azure_monitor_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of enable_azure_monitor_metrics. + This function supports the option of enable_validation. + When enabled, if both enable_azure_monitor_metrics and disable_azure_monitor_metrics are + specified, raise a MutuallyExclusiveArgumentError. + + :return: bool + """ + # print("_get_enable_azure_monitor_metrics being called...") + # Read the original value passed by the command. + enable_azure_monitor_metrics = self.raw_param.get("enable_azure_monitor_metrics") + # In create mode, try to read the property value corresponding to the parameter from the `mc` object. + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + hasattr(self.mc, "azure_monitor_profile") and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics + ): + enable_azure_monitor_metrics = self.mc.azure_monitor_profile.metrics.enabled + # This parameter does not need dynamic completion. + if enable_validation: + if enable_azure_monitor_metrics and self._get_disable_azure_monitor_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-azure-monitor-metrics and --disable-azure-monitor-metrics at the same time" + ) + if enable_azure_monitor_metrics and not check_is_msi_cluster(self.mc): + raise RequiredArgumentMissingError( + "--enable-azure-monitor-metrics can only be specified for clusters with managed identity enabled" + ) + return enable_azure_monitor_metrics + + def get_enable_azure_monitor_metrics(self) -> bool: + """Obtain the value of enable_azure_monitor_metrics. + This function will verify the parameter by default. + If both enable_azure_monitor_metrics and disable_azure_monitor_metrics are specified, + raise a MutuallyExclusiveArgumentError. + :return: bool + """ + return self._get_enable_azure_monitor_metrics(enable_validation=True) + + def _get_disable_azure_monitor_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of disable_azure_monito4790r_metrics. + This function supports the option of enable_validation. + When enabled, if both enable_azure_monitor_metrics and disable_azure_monitor_metrics are + specified, raise a MutuallyExclusiveArgumentError. + :return: bool + """ + # Read the original value passed by the command. + disable_azure_monitor_metrics = self.raw_param.get("disable_azure_monitor_metrics") + if enable_validation: + if disable_azure_monitor_metrics and self._get_enable_azure_monitor_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-azure-monitor-metrics and --disable-azure-monitor-metrics at the same time" + ) + return disable_azure_monitor_metrics + + def get_disable_azure_monitor_metrics(self) -> bool: + """Obtain the value of disable_azure_monitor_metrics. + This function will verify the parameter by default. + If both enable_azure_monitor_metrics and disable_azure_monitor_metrics are specified, + raise a MutuallyExclusiveArgumentError. + :return: bool + """ + return self._get_disable_azure_monitor_metrics(enable_validation=True) + class AKSManagedClusterCreateDecorator(BaseAKSManagedClusterDecorator): def __init__( @@ -5770,6 +5842,29 @@ def set_up_node_resource_group(self, mc: ManagedCluster) -> ManagedCluster: mc.node_resource_group = self.context.get_node_resource_group() return mc + def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: + """Set up azure monitor profile for the ManagedCluster object. + :return: the ManagedCluster object + """ + self._ensure_mc(mc) + # read the original value passed by the command + ksm_metric_labels_allow_list = self.context.raw_param.get("ksm_metric_labels_allow_list") + ksm_metric_annotations_allow_list = self.context.raw_param.get("ksm_metric_annotations_allow_list") + if ksm_metric_labels_allow_list is None: + ksm_metric_labels_allow_list = "" + if ksm_metric_annotations_allow_list is None: + ksm_metric_annotations_allow_list = "" + if self.context.get_enable_azure_monitor_metrics(): + if mc.azure_monitor_profile is None: + mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() + mc.azure_monitor_profile.metrics = self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=False) + mc.azure_monitor_profile.metrics.kube_state_metrics = self.models.ManagedClusterAzureMonitorProfileKubeStateMetrics( # pylint:disable=line-too-long + metric_labels_allowlist=str(ksm_metric_labels_allow_list), + metric_annotations_allow_list=str(ksm_metric_annotations_allow_list)) + # set intermediate + self.context.set_intermediate("azuremonitormetrics_addon_enabled", True, overwrite_exists=True) + return mc + def construct_mc_profile_default(self, bypass_restore_defaults: bool = False) -> ManagedCluster: """The overall controller used to construct the default ManagedCluster profile. @@ -5835,6 +5930,8 @@ def construct_mc_profile_default(self, bypass_restore_defaults: bool = False) -> mc = self.set_up_http_proxy_config(mc) # set up workload autoscaler profile mc = self.set_up_workload_auto_scaler_profile(mc) + # set up azure monitor metrics profile + mc = self.set_up_azure_monitor_profile(mc) # DO NOT MOVE: keep this at the bottom, restore defaults if not bypass_restore_defaults: @@ -5851,6 +5948,10 @@ def check_is_postprocessing_required(self, mc: ManagedCluster) -> bool: monitoring_addon_enabled = self.context.get_intermediate("monitoring_addon_enabled", default_value=False) ingress_appgw_addon_enabled = self.context.get_intermediate("ingress_appgw_addon_enabled", default_value=False) virtual_node_addon_enabled = self.context.get_intermediate("virtual_node_addon_enabled", default_value=False) + azuremonitormetrics_addon_enabled = self.context.get_intermediate( + "azuremonitormetrics_addon_enabled", + default_value=False + ) enable_managed_identity = self.context.get_enable_managed_identity() attach_acr = self.context.get_attach_acr() need_grant_vnet_permission_to_cluster_identity = self.context.get_intermediate( @@ -5861,6 +5962,7 @@ def check_is_postprocessing_required(self, mc: ManagedCluster) -> bool: monitoring_addon_enabled or ingress_appgw_addon_enabled or virtual_node_addon_enabled or + azuremonitormetrics_addon_enabled or (enable_managed_identity and attach_acr) or need_grant_vnet_permission_to_cluster_identity ): @@ -5919,7 +6021,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.external_functions.add_monitoring_role_assignment( cluster, cluster_resource_id, self.cmd ) - else: + elif self.context.raw_param.get("enable_addons") is not None: # Create the DCR Association here addon_consts = self.context.get_addon_consts() CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") @@ -5971,6 +6073,24 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: is_service_principal=False, ) + # azure monitor metrics addon (v2) + azuremonitormetrics_addon_enabled = self.context.get_intermediate( + "azuremonitormetrics_addon_enabled", + default_value=False + ) + if azuremonitormetrics_addon_enabled: + # Create the DC* objects, AMW, recording rules and grafana link here + self.context.external_functions.ensure_azure_monitor_profile_prerequisites( + self.cmd, + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + self.__raw_parameters, + self.context.get_disable_azure_monitor_metrics(), + True + ) + def put_mc(self, mc: ManagedCluster) -> ManagedCluster: if self.check_is_postprocessing_required(mc): # send request @@ -6837,6 +6957,50 @@ def update_workload_auto_scaler_profile(self, mc: ManagedCluster) -> ManagedClus return mc + def update_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: + """Update azure monitor profile for the ManagedCluster object. + :return: the ManagedCluster object + """ + self._ensure_mc(mc) + + # read the original value passed by the command + ksm_metric_labels_allow_list = self.context.raw_param.get("ksm_metric_labels_allow_list") + ksm_metric_annotations_allow_list = self.context.raw_param.get("ksm_metric_annotations_allow_list") + + if ksm_metric_labels_allow_list is None: + ksm_metric_labels_allow_list = "" + if ksm_metric_annotations_allow_list is None: + ksm_metric_annotations_allow_list = "" + + if self.context.get_enable_azure_monitor_metrics(): + if mc.azure_monitor_profile is None: + mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() + mc.azure_monitor_profile.metrics = self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + mc.azure_monitor_profile.metrics.kube_state_metrics = self.models.ManagedClusterAzureMonitorProfileKubeStateMetrics( # pylint:disable=line-too-long + metric_labels_allowlist=str(ksm_metric_labels_allow_list), + metric_annotations_allow_list=str(ksm_metric_annotations_allow_list)) + + if self.context.get_disable_azure_monitor_metrics(): + if mc.azure_monitor_profile is None: + mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() + mc.azure_monitor_profile.metrics = self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=False) + + if ( + self.context.raw_param.get("enable_azure_monitor_metrics") or + self.context.raw_param.get("disable_azure_monitor_metrics") + ): + self.context.external_functions.ensure_azure_monitor_profile_prerequisites( + self.cmd, + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + self.__raw_parameters, + self.context.get_disable_azure_monitor_metrics(), + False) + + return mc + def update_mc_profile_default(self) -> ManagedCluster: """The overall controller used to update the default ManagedCluster profile. @@ -6898,6 +7062,8 @@ def update_mc_profile_default(self) -> ManagedCluster: mc = self.update_http_proxy_config(mc) # update workload autoscaler profile mc = self.update_workload_auto_scaler_profile(mc) + # update azure monitor metrics profile + mc = self.update_azure_monitor_profile(mc) return mc def check_is_postprocessing_required(self, mc: ManagedCluster) -> bool: @@ -6956,7 +7122,10 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.external_functions.add_monitoring_role_assignment( cluster, cluster_resource_id, self.cmd ) - else: + elif ( + self.context.raw_param.get("enable_addons") is not None or + self.context.raw_param.get("disable_addons") is not None + ): # Create the DCR Association here addon_consts = self.context.get_addon_consts() CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 45a23f8074d..2fa59aed646 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -7,7 +7,7 @@ import os import subprocess import tempfile -import time +import time import unittest from azure.cli.command_modules.acs._consts import CONST_KUBE_DASHBOARD_ADDON_NAME @@ -2038,7 +2038,7 @@ def test_aks_create_with_paid_sku(self, resource_group, resource_group_location, # delete self.cmd( 'aks delete -g {resource_group} -n {name} --yes --no-wait', checks=[self.is_empty()]) - + @AllowLargeResponse() @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') @AKSCustomRoleBasedServicePrincipalPreparer() @@ -6323,44 +6323,97 @@ def test_aks_update_with_defender(self, resource_group, resource_group_location) self.cmd( 'aks delete -g {resource_group} -n {name} --yes --no-wait', checks=[self.is_empty()]) + @live_only() @AllowLargeResponse() @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') - def test_aks_create_with_custom_monitoring_workspace(self, resource_group, resource_group_location): + def test_aks_create_with_azuremonitormetrics(self, resource_group, resource_group_location): + # reset the count so in replay mode the random names will start with 0 + self.test_resources_count = 0 + # kwargs for string formatting aks_name = self.create_random_name('cliakstest', 16) - workspace_name = self.create_random_name('cliaksworkspace', 20) + + node_vm_size = 'standard_d2s_v3' self.kwargs.update({ - 'name': aks_name, 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'resource_type': 'Microsoft.ContainerService/ManagedClusters', 'ssh_key_value': self.generate_ssh_keys(), - 'workspace_name': workspace_name + 'node_vm_size': node_vm_size }) - # create workspace - create_workspace_cmd = 'monitor log-analytics workspace create -g {resource_group} -n {workspace_name}' - self.cmd(create_workspace_cmd, checks=[ - # self.check('provisioningState', 'Succeeded'), - self.check('name', workspace_name) + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-azure-monitor-metrics --enable-windows-recording-rules --output=json' + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), ]) - # get workspace id - subscription_id = self.get_subscription_id() - workspace_id = "/subscriptions/{}/resourcegroups/{}/providers/microsoft.operationalinsights/workspaces/{}".format(subscription_id, resource_group, workspace_name) + # azuremonitor metrics will be set to false after initial creation command as its in the + # postprocessing step that we do an update to enable it. Adding a wait for the second put request + # in addonput.py which enables the Azure Monitor Metrics addon as all the DC* resources + # have now been created. + wait_cmd = ' '.join([ + 'aks', 'wait', '--resource-group={resource_group}', '--name={name}', '--updated', + '--interval 60', '--timeout 300', + ]) + self.cmd(wait_cmd, checks=[ + self.is_empty(), + ]) + + self.cmd('aks show -g {resource_group} -n {name} --output=json', checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', True), + ]) + + # delete + cmd = 'aks delete --resource-group={resource_group} --name={name} --yes --no-wait' + self.cmd(cmd, checks=[ + self.is_empty(), + ]) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_update_with_azuremonitormetrics(self, resource_group, resource_group_location): + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' self.kwargs.update({ - 'workspace_id': workspace_id + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, }) - # create - create_cmd = 'aks create --resource-group={resource_group} --name={name} ' \ - '--ssh-key-value={ssh_key_value} -a monitoring --workspace-resource-id {workspace_id}' + # create: without enable-azure-monitor-metrics + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} --ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity --output=json' self.cmd(create_cmd, checks=[ self.check('provisioningState', 'Succeeded'), - self.check('addonProfiles.omsagent.enabled', True), - self.check('addonProfiles.omsagent.config.logAnalyticsWorkspaceResourceID', workspace_id) + self.not_exists('azureMonitorProfile.metrics'), + ]) + + # update: enable-azure-monitor-metrics + update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-azure-monitor-metrics --enable-managed-identity --enable-windows-recording-rules' + self.cmd(update_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', True), + ]) + + # update: disable-azure-monitor-metrics + update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--disable-azure-monitor-metrics' + self.cmd(update_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', False), ]) # delete - self.cmd( - 'aks delete -g {resource_group} -n {name} --yes --no-wait', checks=[self.is_empty()]) + cmd = 'aks delete --resource-group={resource_group} --name={name} --yes --no-wait' + self.cmd(cmd, checks=[ + self.is_empty(), + ]) # live only due to dependency `_add_role_assignment` is not mocked @live_only() @@ -8468,7 +8521,7 @@ def test_aks_create_dualstack_with_default_network(self, resource_group, resourc # delete self.cmd( 'aks delete -g {resource_group} -n {name} --yes --no-wait', checks=[self.is_empty()]) - + @AllowLargeResponse() @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2', preserve_default_location=True) def test_aks_create_and_update_ipv6_count(self, resource_group, resource_group_location): diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index cd10b93694b..b97dc97eecc 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -1737,7 +1737,7 @@ def test_get_network_plugin_mode(self): DecoratorMode.CREATE, ) self.assertEqual(ctx_3.get_network_plugin_mode(), "overlay") - + def test_get_network_plugin(self): # default ctx_1 = AKSManagedClusterContext( @@ -2175,7 +2175,7 @@ def test_get_enable_addons(self): # fail on aci_subnet_name/vnet_subnet_id not specified with self.assertRaises(RequiredArgumentMissingError): ctx_6.get_enable_addons() - + def test_get_http_proxy_config(self): # default ctx_1 = AKSManagedClusterContext( @@ -7227,7 +7227,12 @@ def test_postprocessing_after_mc_created(self): dec_2 = AKSManagedClusterCreateDecorator( self.cmd, self.client, - {"resource_group_name": "test_rg_name", "name": "test_name", "enable_msi_auth_for_monitoring": True}, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": True, + "enable_addons": "monitoring" + }, ResourceType.MGMT_CONTAINERSERVICE, ) monitoring_addon_profile_2 = self.models.ManagedClusterAddonProfile( @@ -7378,7 +7383,7 @@ def test_create_mc(self): return_value=mc_1, ): self.assertEqual(dec_1.create_mc(mc_1), mc_1) - + def test_set_up_http_proxy_config(self): dec_1 = AKSManagedClusterCreateDecorator( self.cmd, @@ -9816,7 +9821,12 @@ def test_postprocessing_after_mc_created(self): dec_2 = AKSManagedClusterUpdateDecorator( self.cmd, self.client, - {"resource_group_name": "test_rg_name", "name": "test_name", "enable_msi_auth_for_monitoring": True}, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": True, + "enable_addons": "monitoring" + }, ResourceType.MGMT_CONTAINERSERVICE, ) monitoring_addon_profile_2 = self.models.ManagedClusterAddonProfile(