Skip to content

Commit

Permalink
[AKS] feat: Azure Monitor Metrics addon (v2) (Managed Prometheus) GA (A…
Browse files Browse the repository at this point in the history
  • Loading branch information
bragi92 authored and yash-nisar committed May 16, 2023
1 parent f391873 commit 6b4249f
Show file tree
Hide file tree
Showing 31 changed files with 1,392 additions and 34 deletions.
41 changes: 41 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,24 @@
- name: --enable-keda
type: bool
short-summary: Enable KEDA workload auto-scaler.
- name: --enable-azure-monitor-metrics
type: bool
short-summary: Enable Azure Monitor Metrics Profile
- name: --azure-monitor-workspace-resource-id
type: string
short-summary: Resource ID of the Azure Monitor Workspace
- name: --ksm-metric-labels-allow-list
type: string
short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g. '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]').
- name: --ksm-metric-annotations-allow-list
type: string
short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g.'=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]').
- name: --grafana-resource-id
type: string
short-summary: Resource ID of the Azure Managed Grafana Workspace
- name: --enable-windows-recording-rules
type: bool
short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon
examples:
- name: Create a Kubernetes cluster with an existing SSH public key.
Expand Down Expand Up @@ -549,6 +567,8 @@
text: az aks create -g MyResourceGroup -n MyManagedCluster --network-plugin none
- name: Create a kubernetes cluster with KEDA workload autoscaler enabled.
text: az aks create -g MyResourceGroup -n MyManagedCluster --enable-keda
- name: Create a kubernetes cluster with Azure Monitor Metrics enabled.
text: az aks create -g MyResourceGroup -n MyManagedCluster --enable-azure-monitor-metrics
"""

helps['aks update'] = """
Expand Down Expand Up @@ -791,6 +811,27 @@
- name: --disable-keda
type: bool
short-summary: Disable KEDA workload auto-scaler.
- name: --enable-azure-monitor-metrics
type: bool
short-summary: Enable Azure Monitor Metrics Profile
- name: --azure-monitor-workspace-resource-id
type: string
short-summary: Resource ID of the Azure Monitor Workspace
- name: --ksm-metric-labels-allow-list
type: string
short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g. '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]').
- name: --ksm-metric-annotations-allow-list
type: string
short-summary: Comma-separated list of additional Kubernetes label keys that will be used in the resource' labels metric. By default the metric contains only name and namespace labels. To include additional labels provide a list of resource names in their plural form and Kubernetes label keys you would like to allow for them (e.g.'=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. A single '*' can be provided per resource instead to allow any labels, but that has severe performance implications (e.g. '=pods=[*]').
- name: --grafana-resource-id
type: string
short-summary: Resource ID of the Azure Managed Grafana Workspace
- name: --enable-windows-recording-rules
type: bool
short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon
- name: --disable-azure-monitor-metrics
type: bool
short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster.
examples:
- name: Reconcile the cluster back to its current state.
Expand Down
19 changes: 18 additions & 1 deletion src/azure-cli/azure/cli/command_modules/acs/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@
validate_ppg, validate_priority, validate_registry_name,
validate_snapshot_id, validate_snapshot_name, validate_spot_max_price,
validate_ssh_key, validate_taints, validate_vm_set_type,
validate_vnet_subnet_id)
validate_vnet_subnet_id,
validate_azuremonitorworkspaceresourceid,
validate_grafanaresourceid)
from azure.cli.core.commands.parameters import (
edge_zone_type, file_type, get_enum_type,
get_resource_name_completion_list, get_three_state_flag, name_type,
Expand Down Expand Up @@ -281,6 +283,13 @@ def load_arguments(self, _):
c.argument('linux_os_config')
c.argument('host_group_id', validator=validate_host_group_id)
c.argument('gpu_instance_profile', arg_type=get_enum_type(gpu_instance_profiles))
# azure monitor profile
c.argument('enable_azure_monitor_metrics', action='store_true')
c.argument('azure_monitor_workspace_resource_id', validator=validate_azuremonitorworkspaceresourceid)
c.argument('ksm_metric_labels_allow_list')
c.argument('ksm_metric_annotations_allow_list')
c.argument('grafana_resource_id', validator=validate_grafanaresourceid)
c.argument('enable_windows_recording_rules', action='store_true')
# misc
c.argument('yes', options_list=['--yes', '-y'], help='Do not prompt for confirmation.', action='store_true')

Expand Down Expand Up @@ -361,6 +370,14 @@ def load_arguments(self, _):
c.argument('max_count', type=int, validator=validate_nodes_count)
c.argument('nodepool_labels', nargs='*', validator=validate_nodepool_labels,
help='space-separated labels: key[=value] [key[=value] ...]. See https://aka.ms/node-labels for syntax of labels.')
# azure monitor profile
c.argument('enable_azure_monitor_metrics', action='store_true')
c.argument('azure_monitor_workspace_resource_id', validator=validate_azuremonitorworkspaceresourceid)
c.argument('ksm_metric_labels_allow_list')
c.argument('ksm_metric_annotations_allow_list')
c.argument('grafana_resource_id', validator=validate_grafanaresourceid)
c.argument('enable_windows_recording_rules', action='store_true')
c.argument('disable_azure_monitor_metrics', action='store_true')
# misc
c.argument('yes', options_list=['--yes', '-y'], help='Do not prompt for confirmation.', action='store_true')

Expand Down
29 changes: 29 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,3 +614,32 @@ def validate_registry_name(cmd, namespace):
if pos == -1:
logger.warning("The login server endpoint suffix '%s' is automatically appended.", acr_suffix)
namespace.acr = registry + acr_suffix


def sanitize_resource_id(resource_id):
resource_id = resource_id.strip()
if not resource_id.startswith("/"):
resource_id = "/" + resource_id
if resource_id.endswith("/"):
resource_id = resource_id.rstrip("/")
return resource_id.lower()


# pylint:disable=line-too-long
def validate_azuremonitorworkspaceresourceid(namespace):
resource_id = namespace.azure_monitor_workspace_resource_id
if resource_id is None:
return
resource_id = sanitize_resource_id(resource_id)
if (bool(re.match(r'/subscriptions/.*/resourcegroups/.*/providers/microsoft.monitor/accounts/.*', resource_id))) is False:
raise InvalidArgumentValueError("--azure-monitor-workspace-resource-id not in the correct format. It should match `/subscriptions/<subscriptionId>/resourceGroups/<resourceGroupName>/providers/microsoft.monitor/accounts/<resourceName>`")


# pylint:disable=line-too-long
def validate_grafanaresourceid(namespace):
resource_id = namespace.grafana_resource_id
if resource_id is None:
return
resource_id = sanitize_resource_id(resource_id)
if (bool(re.match(r'/subscriptions/.*/resourcegroups/.*/providers/microsoft.dashboard/grafana/.*', resource_id))) is False:
raise InvalidArgumentValueError("--grafana-resource-id not in the correct format. It should match `/subscriptions/<subscriptionId>/resourceGroups/<resourceGroupName>/providers/microsoft.dashboard/grafana/<resourceName>`")
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
import json
from azure.cli.command_modules.acs.azuremonitormetrics.constants import AKS_CLUSTER_API
from azure.cli.core.azclierror import (
UnknownError,
CLIError
)


# pylint: disable=line-too-long
def addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name):
from azure.cli.core.util import send_raw_request
armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager
feature_check_url = f"{armendpoint}/subscriptions/{cluster_subscription}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.ContainerService/managedClusters/{cluster_name}?api-version={AKS_CLUSTER_API}"
try:
headers = ['User-Agent=azuremonitormetrics.addon_get']
r = send_raw_request(cmd.cli_ctx, "GET", feature_check_url,
body={}, headers=headers)
except CLIError as e:
raise UnknownError(e)
json_response = json.loads(r.text)
if "azureMonitorProfile" in json_response["properties"]:
if "metrics" in json_response["properties"]["azureMonitorProfile"]:
if json_response["properties"]["azureMonitorProfile"]["metrics"]["enabled"] is False:
# What if enabled doesn't exist
json_response["properties"]["azureMonitorProfile"]["metrics"]["enabled"] = True
try:
headers = ['User-Agent=azuremonitormetrics.addon_put']
body = json.dumps(json_response)
r = send_raw_request(cmd.cli_ctx, "PUT", feature_check_url,
body=body, headers=headers)
except CLIError as e:
raise UnknownError(e)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
import json
import uuid
from knack.util import CLIError
from azure.cli.command_modules.acs.azuremonitormetrics.constants import (
GRAFANA_API,
GRAFANA_ROLE_ASSIGNMENT_API,
GrafanaLink
)
from azure.cli.command_modules.acs.azuremonitormetrics.helper import sanitize_resource_id


def link_grafana_instance(cmd, raw_parameters, azure_monitor_workspace_resource_id):
from azure.cli.core.util import send_raw_request
# GET grafana principal ID
try:
grafana_resource_id = raw_parameters.get("grafana_resource_id")
if grafana_resource_id is None or grafana_resource_id == "":
return GrafanaLink.NOPARAMPROVIDED
grafana_resource_id = sanitize_resource_id(grafana_resource_id)
grafanaURI = "{0}{1}?api-version={2}".format(
cmd.cli_ctx.cloud.endpoints.resource_manager,
grafana_resource_id,
GRAFANA_API
)
headers = ['User-Agent=azuremonitormetrics.link_grafana_instance']
grafanaArmResponse = send_raw_request(cmd.cli_ctx, "GET", grafanaURI, body={}, headers=headers)
servicePrincipalId = grafanaArmResponse.json()["identity"]["principalId"]
except CLIError as e:
raise CLIError(e)
# Add Role Assignment
try:
MonitoringDataReader = "b0d8363b-8ddd-447d-831f-62ca05bff136"
roleDefinitionURI = "{0}{1}/providers/Microsoft.Authorization/roleAssignments/{2}?api-version={3}".format(
cmd.cli_ctx.cloud.endpoints.resource_manager,
azure_monitor_workspace_resource_id,
uuid.uuid4(),
GRAFANA_ROLE_ASSIGNMENT_API
)
roleDefinitionId = "{0}/providers/Microsoft.Authorization/roleDefinitions/{1}".format(
azure_monitor_workspace_resource_id,
MonitoringDataReader
)
association_body = json.dumps({
"properties": {
"roleDefinitionId": roleDefinitionId,
"principalId": servicePrincipalId
}
})
headers = ['User-Agent=azuremonitormetrics.add_role_assignment']
send_raw_request(cmd.cli_ctx, "PUT", roleDefinitionURI, body=association_body, headers=headers)
except CLIError as e:
if e.response.status_code != 409:
erroString = "Role Assingment failed. Please manually assign the `Monitoring Data Reader` role\
to the Azure Monitor Workspace ({0}) for the Azure Managed Grafana\
System Assigned Managed Identity ({1})".format(
azure_monitor_workspace_resource_id,
servicePrincipalId
)
print(erroString)
# Setting up AMW Integration
targetGrafanaArmPayload = grafanaArmResponse.json()
if targetGrafanaArmPayload["properties"] is None:
raise CLIError("Invalid grafana payload to add AMW integration")
if "grafanaIntegrations" not in json.dumps(targetGrafanaArmPayload):
targetGrafanaArmPayload["properties"]["grafanaIntegrations"] = {}
if "azureMonitorWorkspaceIntegrations" not in json.dumps(targetGrafanaArmPayload):
targetGrafanaArmPayload["properties"]["grafanaIntegrations"]["azureMonitorWorkspaceIntegrations"] = []
amwIntegrations = targetGrafanaArmPayload["properties"]["grafanaIntegrations"]["azureMonitorWorkspaceIntegrations"]
if amwIntegrations != [] and azure_monitor_workspace_resource_id in json.dumps(amwIntegrations).lower():
return GrafanaLink.ALREADYPRESENT
try:
grafanaURI = "{0}{1}?api-version={2}".format(
cmd.cli_ctx.cloud.endpoints.resource_manager,
grafana_resource_id,
GRAFANA_API
)
targetGrafanaArmPayload["properties"]["grafanaIntegrations"]["azureMonitorWorkspaceIntegrations"].append({
"azureMonitorWorkspaceResourceId": azure_monitor_workspace_resource_id
})
targetGrafanaArmPayload = json.dumps(targetGrafanaArmPayload)
headers = ['User-Agent=azuremonitormetrics.setup_amw_grafana_integration', 'Content-Type=application/json']
send_raw_request(cmd.cli_ctx, "PUT", grafanaURI, body=targetGrafanaArmPayload, headers=headers)
except CLIError as e:
raise CLIError(e)
return GrafanaLink.SUCCESS
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
import json

from azure.cli.command_modules.acs.azuremonitormetrics.constants import MAC_API
from azure.cli.command_modules.acs.azuremonitormetrics.amw.defaults import get_default_mac_name_and_region
from azure.cli.command_modules.acs._client_factory import get_resource_groups_client, get_resources_client
from azure.core.exceptions import HttpResponseError
from knack.util import CLIError


def create_default_mac(cmd, cluster_subscription, cluster_region):
from azure.cli.core.util import send_raw_request
default_mac_name, default_mac_region = get_default_mac_name_and_region(cmd, cluster_region)
default_resource_group_name = "DefaultResourceGroup-{0}".format(default_mac_region)
azure_monitor_workspace_resource_id = \
"/subscriptions/{0}/resourceGroups/{1}/providers/microsoft.monitor/accounts/{2}"\
.format(
cluster_subscription,
default_resource_group_name,
default_mac_name
)
# Check if default resource group exists or not, if it does not then create it
resource_groups = get_resource_groups_client(cmd.cli_ctx, cluster_subscription)
resources = get_resources_client(cmd.cli_ctx, cluster_subscription)

if resource_groups.check_existence(default_resource_group_name):
try:
resource = resources.get_by_id(azure_monitor_workspace_resource_id, MAC_API)
# If MAC already exists then return from here
return azure_monitor_workspace_resource_id, resource.location
except HttpResponseError as ex:
if ex.status_code != 404:
raise ex
else:
resource_groups.create_or_update(default_resource_group_name, {"location": default_mac_region})
association_body = json.dumps({"location": default_mac_region, "properties": {}})
armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager
association_url = f"{armendpoint}{azure_monitor_workspace_resource_id}?api-version={MAC_API}"
try:
headers = ['User-Agent=azuremonitormetrics.create_default_mac']
send_raw_request(cmd.cli_ctx, "PUT", association_url,
body=association_body, headers=headers)
return azure_monitor_workspace_resource_id, default_mac_region
except CLIError as e:
raise e
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
import json
from azure.cli.command_modules.acs.azuremonitormetrics.deaults import get_default_region
from azure.cli.command_modules.acs.azuremonitormetrics.responseparsers.amwlocationresponseparser import (
parseResourceProviderResponseForLocations
)
from azure.cli.command_modules.acs.azuremonitormetrics.constants import RP_LOCATION_API
from knack.util import CLIError


def get_supported_rp_locations(cmd, rp_name):
from azure.cli.core.util import send_raw_request
supported_locations = []
headers = ['User-Agent=azuremonitormetrics.get_supported_rp_locations']
armendpoint = cmd.cli_ctx.cloud.endpoints.resource_manager
association_url = f"{armendpoint}/providers/{rp_name}?api-version={RP_LOCATION_API}"
r = send_raw_request(cmd.cli_ctx, "GET", association_url, headers=headers)
data = json.loads(r.text)
supported_locations = parseResourceProviderResponseForLocations(data)
return supported_locations


def get_default_mac_region(cmd, cluster_region):
supported_locations = get_supported_rp_locations(cmd, 'Microsoft.Monitor')
if cluster_region in supported_locations:
return cluster_region
if len(supported_locations) > 0:
return supported_locations[0]
cloud_name = cmd.cli_ctx.cloud.name
if cloud_name.lower() == 'azurechinacloud':
raise CLIError("Azure China Cloud is not supported for the Azure Monitor Metrics addon")
if cloud_name.lower() == 'azureusgovernment':
return "usgovvirginia"
# default to public cloud
return get_default_region(cmd)


def get_default_mac_name_and_region(cmd, cluster_region):
default_mac_region = get_default_mac_region(cmd, cluster_region)
default_mac_name = "DefaultAzureMonitorWorkspace-" + default_mac_region
default_mac_name = default_mac_name[0:43]
return default_mac_name, default_mac_region
Loading

0 comments on commit 6b4249f

Please sign in to comment.