From e88dfd59ae68fd52fee98924ecbdd7a2ed57360c Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Mon, 18 Apr 2022 19:02:54 +0300 Subject: [PATCH 01/16] Add a script that updates central grafana datasources --- deployer/update_grafana_datasources.py | 210 +++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 deployer/update_grafana_datasources.py diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py new file mode 100644 index 0000000000..b08c785e02 --- /dev/null +++ b/deployer/update_grafana_datasources.py @@ -0,0 +1,210 @@ +""" +### Summary + +This is a helper script that will make sure the central grafana at https://grafana.pilot.2i2c.cloud is configured to use as datasource the authenticated prometheus instances of all the clusters that we run. + +### Run the script + +$ python deployer/grafana_datasources_manager.py + +""" + +import json +import requests + +from file_acquisition import find_absolute_path_to_cluster_file, get_decrypted_file +from helm_upgrade_decision import get_all_cluster_yaml_files +from utils import print_colour + +from ruamel.yaml import YAML + +yaml = YAML(typ="safe") + +DATASOURCE_ENDPOINT = "https://grafana.pilot.2i2c.cloud/api/datasources" +DESIGNATED_CENTRAL_CLUSTER = "2i2c" + + +def build_datasource_details(cluster_name): + """Builds the payload needed to create an authenticated datasource in Grafana for `cluster_name`. + + Args: + cluster_name: name of the cluster + Returns: + dict object: req payload to be consumed by Grafana + """ + # Get the prometheus address for cluster_name + datasource_url = get_cluster_prometheus_address(cluster_name) + + # Get the credentials of this prometheus instance + prometheus_creds = get_cluster_prometheus_creds(cluster_name) + + datasource_details = { + "name": f"{cluster_name}", + "type": "prometheus", + "url": f"https://{datasource_url}", + "access": "proxy", + "basicAuth": True, + "basicAuthUser": prometheus_creds["username"], + "secureJsonData": {"basicAuthPassword": prometheus_creds["password"]}, + } + + return datasource_details + + +def get_cluster_prometheus_address(cluster_name): + """ Retrieves the address of the prometheus instance running on the `cluster_name` cluster. + This address is stored in the `support.values.yaml` file of each cluster config directory. + + Args: + cluster_name: name of the cluster + Returns: + string object: https address of the prometheus instance + Raises ValueError if + - `prometheusIngressAuthSecret` isn't configured + - `support["prometheus"]["server"]["ingress"]["tls"]` doesn't exist + """ + cluster_config_dir_path = find_absolute_path_to_cluster_file(cluster_name).parent + + config_file = cluster_config_dir_path.joinpath(f"support.values.yaml") + with open(config_file) as f: + support_config = yaml.load(f) + + # Don't return the address if the prometheus instance wasn't securely exposed to the outside. + if not support_config.get("prometheusIngressAuthSecret", {}).get("enabled", False): + raise ValueError( + f"`prometheusIngressAuthSecret` wasn't configured for {cluster_name}" + ) + + tls_config = ( + support_config.get("prometheus", {}) + .get("server", {}) + .get("ingress", {}) + .get("tls", []) + ) + + if not tls_config: + raise ValueError( + f"No tls config was found for the prometheus instance of {cluster_name}" + ) + + # We only have one tls host right now. Re-think this if this changes. + return tls_config[0]["hosts"][0] + + +def get_cluster_prometheus_creds(cluster_name): + """Retrieves the credentials of the prometheus instance running on the `cluster_name` cluster. + These credentials are stored in `enc-support.secret.values.yaml` file of each cluster config directory. + + Args: + cluster_name: name of the cluster + Returns: + dict object: {username: `username`, password: `password`} + """ + cluster_config_dir_path = find_absolute_path_to_cluster_file(cluster_name).parent + + config_filename = cluster_config_dir_path.joinpath( + f"enc-support.secret.values.yaml" + ) + + with get_decrypted_file(config_filename) as decrypted_path: + with open(decrypted_path) as f: + prometheus_config = yaml.load(f) + + return prometheus_config.get("prometheusIngressAuthSecret", {}) + + +def get_central_grafana_token(): + """Returns the access token of the central Grafana, i.e. the 2i2c one. + This access token should have enough permissions to create datasources. + """ + # Get the location of the file that stores the central grafana token + cluster_config_dir_path = find_absolute_path_to_cluster_file( + DESIGNATED_CENTRAL_CLUSTER + ).parent + + grafana_token_file = (cluster_config_dir_path).joinpath( + "enc-grafana-token.secret.yaml" + ) + + # Read the secret grafana token file + with get_decrypted_file(grafana_token_file) as decrypted_file_path: + with open(decrypted_file_path) as f: + config = yaml.load(f) + + return config["grafana_token"] + + +def build_request_headers(): + token = get_central_grafana_token() + + headers = { + "Accept": "application/json", + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + + return headers + + +def get_clusters_used_as_datasources(): + """Returns a list of cluster names that have prometheus instances already defined as datasources of the centralized Grafana. + """ + headers = build_request_headers() + # Get a list of all the currently existing datasources + response = requests.get(DATASOURCE_ENDPOINT, headers=headers) + + if response.status_code != 200: + print( + f"An error occured when retrieving the datasources from {DATASOURCE_ENDPOINT}. \n Error was {response.text}." + ) + response.raise_for_status() + + print(f"Successfully retrieved the datasources!") + datasources = response.json() + return [datasource["name"] for datasource in datasources] + + +def main(): + # Get a list of the clusters that already have their prometheus instances used as datasources + datasources = get_clusters_used_as_datasources() + print(datasources) + + # Get a list of filepaths to all cluster.yaml files in the repo + cluster_files = get_all_cluster_yaml_files() + + for cluster_file in cluster_files: + # Read in the cluster.yaml file + with open(cluster_file) as f: + cluster_config = yaml.load(f) + + # Get the cluster's name + cluster_name = cluster_config.get("name", {}) + if cluster_name and cluster_name not in datasources: + print_colour(f"Checking cluster {cluster_name}...") + # Build the datasource details for the instances that aren't configures as datasources + try: + datasource_details = build_datasource_details(cluster_name) + req_body = json.dumps(datasource_details) + print(req_body) + # Tell Grafana to create and register a datasource for this cluster + headers = build_request_headers() + response = requests.post( + DATASOURCE_ENDPOINT, json=req_body, headers=headers + ) + if response.status_code != 200: + print( + f"An error occured when creating the datasource. \nError was {response.text}." + ) + response.raise_for_status() + print_colour( + f"Successfully created a new datasource for {cluster_name}!" + ) + except Exception as e: + print( + f"An error occured for {cluster_name}.\nError was: {e}.\nSkipping..." + ) + pass + + +if __name__ == "__main__": + main() From d6ef46532a1e8bb094c377419f5613a5925a524b Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Mon, 18 Apr 2022 19:21:51 +0300 Subject: [PATCH 02/16] Use the data field of requests --- deployer/update_grafana_datasources.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index b08c785e02..bc50fcf66d 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -39,13 +39,15 @@ def build_datasource_details(cluster_name): prometheus_creds = get_cluster_prometheus_creds(cluster_name) datasource_details = { - "name": f"{cluster_name}", + "name": cluster_name, "type": "prometheus", - "url": f"https://{datasource_url}", "access": "proxy", + "url": f"https://{datasource_url}", "basicAuth": True, "basicAuthUser": prometheus_creds["username"], - "secureJsonData": {"basicAuthPassword": prometheus_creds["password"]}, + "secureJsonData": { + "basicAuthPassword": prometheus_creds["password"] + }, } return datasource_details @@ -186,10 +188,12 @@ def main(): datasource_details = build_datasource_details(cluster_name) req_body = json.dumps(datasource_details) print(req_body) + + # Tell Grafana to create and register a datasource for this cluster headers = build_request_headers() response = requests.post( - DATASOURCE_ENDPOINT, json=req_body, headers=headers + DATASOURCE_ENDPOINT, data=req_body, headers=headers ) if response.status_code != 200: print( From 69c3fd07f49a574cb51ca6076a6d49e8959e12f7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Apr 2022 16:29:06 +0000 Subject: [PATCH 03/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deployer/update_grafana_datasources.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index bc50fcf66d..3eb05fd769 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -10,13 +10,12 @@ """ import json -import requests +import requests from file_acquisition import find_absolute_path_to_cluster_file, get_decrypted_file from helm_upgrade_decision import get_all_cluster_yaml_files -from utils import print_colour - from ruamel.yaml import YAML +from utils import print_colour yaml = YAML(typ="safe") @@ -45,16 +44,14 @@ def build_datasource_details(cluster_name): "url": f"https://{datasource_url}", "basicAuth": True, "basicAuthUser": prometheus_creds["username"], - "secureJsonData": { - "basicAuthPassword": prometheus_creds["password"] - }, + "secureJsonData": {"basicAuthPassword": prometheus_creds["password"]}, } return datasource_details def get_cluster_prometheus_address(cluster_name): - """ Retrieves the address of the prometheus instance running on the `cluster_name` cluster. + """Retrieves the address of the prometheus instance running on the `cluster_name` cluster. This address is stored in the `support.values.yaml` file of each cluster config directory. Args: @@ -149,8 +146,7 @@ def build_request_headers(): def get_clusters_used_as_datasources(): - """Returns a list of cluster names that have prometheus instances already defined as datasources of the centralized Grafana. - """ + """Returns a list of cluster names that have prometheus instances already defined as datasources of the centralized Grafana.""" headers = build_request_headers() # Get a list of all the currently existing datasources response = requests.get(DATASOURCE_ENDPOINT, headers=headers) @@ -189,7 +185,6 @@ def main(): req_body = json.dumps(datasource_details) print(req_body) - # Tell Grafana to create and register a datasource for this cluster headers = build_request_headers() response = requests.post( From bcfc93d543fe5032be83be4014628066dca6fdde Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 12:27:45 +0300 Subject: [PATCH 04/16] No need for f-strings --- deployer/update_grafana_datasources.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 3eb05fd769..8eaab46bd5 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -64,7 +64,7 @@ def get_cluster_prometheus_address(cluster_name): """ cluster_config_dir_path = find_absolute_path_to_cluster_file(cluster_name).parent - config_file = cluster_config_dir_path.joinpath(f"support.values.yaml") + config_file = cluster_config_dir_path.joinpath("support.values.yaml") with open(config_file) as f: support_config = yaml.load(f) @@ -102,7 +102,7 @@ def get_cluster_prometheus_creds(cluster_name): cluster_config_dir_path = find_absolute_path_to_cluster_file(cluster_name).parent config_filename = cluster_config_dir_path.joinpath( - f"enc-support.secret.values.yaml" + "enc-support.secret.values.yaml" ) with get_decrypted_file(config_filename) as decrypted_path: @@ -157,7 +157,7 @@ def get_clusters_used_as_datasources(): ) response.raise_for_status() - print(f"Successfully retrieved the datasources!") + print("Successfully retrieved the datasources!") datasources = response.json() return [datasource["name"] for datasource in datasources] From 50e1cf559153c9dc7c5f1a77df72933395457165 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Apr 2022 09:38:58 +0000 Subject: [PATCH 05/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deployer/update_grafana_datasources.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 8eaab46bd5..1e9777fdac 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -101,9 +101,7 @@ def get_cluster_prometheus_creds(cluster_name): """ cluster_config_dir_path = find_absolute_path_to_cluster_file(cluster_name).parent - config_filename = cluster_config_dir_path.joinpath( - "enc-support.secret.values.yaml" - ) + config_filename = cluster_config_dir_path.joinpath("enc-support.secret.values.yaml") with get_decrypted_file(config_filename) as decrypted_path: with open(decrypted_path) as f: From b43648ee47d3742c5d2f6741b631c299a2322306 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 12:38:49 +0300 Subject: [PATCH 06/16] Remove debugging print --- deployer/update_grafana_datasources.py | 1 - 1 file changed, 1 deletion(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 1e9777fdac..6348dab293 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -163,7 +163,6 @@ def get_clusters_used_as_datasources(): def main(): # Get a list of the clusters that already have their prometheus instances used as datasources datasources = get_clusters_used_as_datasources() - print(datasources) # Get a list of filepaths to all cluster.yaml files in the repo cluster_files = get_all_cluster_yaml_files() From f9dcc718c02a3968a5384ba4a85ce2bc1f98d889 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 13:39:30 +0300 Subject: [PATCH 07/16] Pass grafana and its cluster as args --- deployer/update_grafana_datasources.py | 65 ++++++++++++++++++-------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 6348dab293..ba5e9dcf6f 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -9,6 +9,7 @@ """ +import argparse import json import requests @@ -19,9 +20,6 @@ yaml = YAML(typ="safe") -DATASOURCE_ENDPOINT = "https://grafana.pilot.2i2c.cloud/api/datasources" -DESIGNATED_CENTRAL_CLUSTER = "2i2c" - def build_datasource_details(cluster_name): """Builds the payload needed to create an authenticated datasource in Grafana for `cluster_name`. @@ -110,14 +108,12 @@ def get_cluster_prometheus_creds(cluster_name): return prometheus_config.get("prometheusIngressAuthSecret", {}) -def get_central_grafana_token(): - """Returns the access token of the central Grafana, i.e. the 2i2c one. +def get_central_grafana_token(cluster_name): + """Returns the access token of the Grafana located in `cluster_name` cluster. This access token should have enough permissions to create datasources. """ # Get the location of the file that stores the central grafana token - cluster_config_dir_path = find_absolute_path_to_cluster_file( - DESIGNATED_CENTRAL_CLUSTER - ).parent + cluster_config_dir_path = find_absolute_path_to_cluster_file(cluster_name).parent grafana_token_file = (cluster_config_dir_path).joinpath( "enc-grafana-token.secret.yaml" @@ -131,8 +127,8 @@ def get_central_grafana_token(): return config["grafana_token"] -def build_request_headers(): - token = get_central_grafana_token() +def build_request_headers(cluster_name): + token = get_central_grafana_token(cluster_name) headers = { "Accept": "application/json", @@ -143,30 +139,54 @@ def build_request_headers(): return headers -def get_clusters_used_as_datasources(): +def get_clusters_used_as_datasources(cluster_name, datasource_endpoint): """Returns a list of cluster names that have prometheus instances already defined as datasources of the centralized Grafana.""" - headers = build_request_headers() + headers = build_request_headers(cluster_name) # Get a list of all the currently existing datasources - response = requests.get(DATASOURCE_ENDPOINT, headers=headers) + response = requests.get(datasource_endpoint, headers=headers) if response.status_code != 200: print( - f"An error occured when retrieving the datasources from {DATASOURCE_ENDPOINT}. \n Error was {response.text}." + f"An error occured when retrieving the datasources from {datasource_endpoint}. \n Error was {response.text}." ) response.raise_for_status() - print("Successfully retrieved the datasources!") datasources = response.json() - return [datasource["name"] for datasource in datasources] + datasources = [datasource["name"] for datasource in datasources] + print_colour( + f"Successfully retrieved {len(datasources)} existing datasources! {datasources}" + ) + + return datasources def main(): + argparser = argparse.ArgumentParser( + description="""A command line tool to update Grafana + datasources. + """ + ) + + argparser.add_argument( + "cluster_name", type=str, help="The name of the cluster where the Grafana lives" + ) + + argparser.add_argument("grafana_url", type=str, help="The public URL of Grafana") + + args = argparser.parse_args() + cluster = args.cluster_name + grafana_url = args.grafana_url.rstrip("/") + datasource_endpoint = f"{grafana_url}/api/datasources" + # Get a list of the clusters that already have their prometheus instances used as datasources - datasources = get_clusters_used_as_datasources() + datasources = get_clusters_used_as_datasources(cluster, datasource_endpoint) # Get a list of filepaths to all cluster.yaml files in the repo cluster_files = get_all_cluster_yaml_files() + print("Searching for clusters that aren't Grafana datasources...") + # Count how many clusters we can't add as datasources for logging + exceptions = 0 for cluster_file in cluster_files: # Read in the cluster.yaml file with open(cluster_file) as f: @@ -175,7 +195,9 @@ def main(): # Get the cluster's name cluster_name = cluster_config.get("name", {}) if cluster_name and cluster_name not in datasources: - print_colour(f"Checking cluster {cluster_name}...") + print_colour( + f"Found {cluster_name} cluster. Checking if it can be added..." + ) # Build the datasource details for the instances that aren't configures as datasources try: datasource_details = build_datasource_details(cluster_name) @@ -185,7 +207,7 @@ def main(): # Tell Grafana to create and register a datasource for this cluster headers = build_request_headers() response = requests.post( - DATASOURCE_ENDPOINT, data=req_body, headers=headers + datasource_endpoint, data=req_body, headers=headers ) if response.status_code != 200: print( @@ -199,8 +221,13 @@ def main(): print( f"An error occured for {cluster_name}.\nError was: {e}.\nSkipping..." ) + exceptions += 1 pass + print_colour( + f"Failed to add {exceptions} clusters as datasources. See errors above!" + ) + if __name__ == "__main__": main() From 63d205f1d4ee33ea02e308d04540a75b05e13b90 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 13:46:27 +0300 Subject: [PATCH 08/16] Add more colour --- deployer/update_grafana_datasources.py | 18 ++++++++--------- deployer/utils.py | 28 ++++++++++++++++++-------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index ba5e9dcf6f..4a950f7a5d 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -152,12 +152,7 @@ def get_clusters_used_as_datasources(cluster_name, datasource_endpoint): response.raise_for_status() datasources = response.json() - datasources = [datasource["name"] for datasource in datasources] - print_colour( - f"Successfully retrieved {len(datasources)} existing datasources! {datasources}" - ) - - return datasources + return [datasource["name"] for datasource in datasources] def main(): @@ -195,7 +190,7 @@ def main(): # Get the cluster's name cluster_name = cluster_config.get("name", {}) if cluster_name and cluster_name not in datasources: - print_colour( + print( f"Found {cluster_name} cluster. Checking if it can be added..." ) # Build the datasource details for the instances that aren't configures as datasources @@ -218,14 +213,17 @@ def main(): f"Successfully created a new datasource for {cluster_name}!" ) except Exception as e: - print( - f"An error occured for {cluster_name}.\nError was: {e}.\nSkipping..." + print_colour( + f"An error occured for {cluster_name}.\nError was: {e}.\nSkipping...", "yellow" ) exceptions += 1 pass print_colour( - f"Failed to add {exceptions} clusters as datasources. See errors above!" + f"Failed to add {exceptions} clusters as datasources. See errors above!", "red" + ) + print_colour( + f"Successfully retrieved {len(datasources)} existing datasources! {datasources}" ) diff --git a/deployer/utils.py b/deployer/utils.py index cafe928364..14f75ecc78 100644 --- a/deployer/utils.py +++ b/deployer/utils.py @@ -2,7 +2,7 @@ import subprocess -def print_colour(msg: str): +def print_colour(msg: str, colour = "green"): """Print messages in colour to be distinguishable in CI logs See the mybinder.org deploy.py script for more details: @@ -11,12 +11,24 @@ def print_colour(msg: str): Args: msg (str): The message to print in colour """ - if os.environ.get("TERM"): - BOLD = subprocess.check_output(["tput", "bold"]).decode() - GREEN = subprocess.check_output(["tput", "setaf", "2"]).decode() - NC = subprocess.check_output(["tput", "sgr0"]).decode() - else: + if not os.environ.get("TERM"): # no term, no colors - BOLD = GREEN = NC = "" + print(msg) + + return - print(BOLD + GREEN + msg + NC, flush=True) + BOLD = subprocess.check_output(["tput", "bold"]).decode() + YELLOW = subprocess.check_output(["tput", "setaf", "3"]).decode() + GREEN = subprocess.check_output(["tput", "setaf", "2"]).decode() + RED = subprocess.check_output(["tput", "setaf", "1"]).decode() + NC = subprocess.check_output(["tput", "sgr0"]).decode() + + if colour == "green": + print(BOLD + GREEN + msg + NC, flush=True) + elif colour == "red": + print(BOLD + RED + msg + NC, flush=True) + elif colour == "yellow": + print(BOLD + YELLOW + msg + NC, flush=True) + else: + # colour not recognized, no colors + print(msg) \ No newline at end of file From a6ed563194bfa8ca9a56ae951739481e94bf60f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Apr 2022 10:46:51 +0000 Subject: [PATCH 09/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deployer/update_grafana_datasources.py | 7 +++---- deployer/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 4a950f7a5d..9446c47ddf 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -190,9 +190,7 @@ def main(): # Get the cluster's name cluster_name = cluster_config.get("name", {}) if cluster_name and cluster_name not in datasources: - print( - f"Found {cluster_name} cluster. Checking if it can be added..." - ) + print(f"Found {cluster_name} cluster. Checking if it can be added...") # Build the datasource details for the instances that aren't configures as datasources try: datasource_details = build_datasource_details(cluster_name) @@ -214,7 +212,8 @@ def main(): ) except Exception as e: print_colour( - f"An error occured for {cluster_name}.\nError was: {e}.\nSkipping...", "yellow" + f"An error occured for {cluster_name}.\nError was: {e}.\nSkipping...", + "yellow", ) exceptions += 1 pass diff --git a/deployer/utils.py b/deployer/utils.py index 14f75ecc78..fee91a77b3 100644 --- a/deployer/utils.py +++ b/deployer/utils.py @@ -2,7 +2,7 @@ import subprocess -def print_colour(msg: str, colour = "green"): +def print_colour(msg: str, colour="green"): """Print messages in colour to be distinguishable in CI logs See the mybinder.org deploy.py script for more details: @@ -31,4 +31,4 @@ def print_colour(msg: str, colour = "green"): print(BOLD + YELLOW + msg + NC, flush=True) else: # colour not recognized, no colors - print(msg) \ No newline at end of file + print(msg) From 0af305fd0501a1bccbc9b5ce6499f1a1456145ae Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 14:26:43 +0300 Subject: [PATCH 10/16] Add docs on when to call the update_grafana_datasources.py script --- docs/howto/operate/grafana.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/howto/operate/grafana.md b/docs/howto/operate/grafana.md index 0b8a295987..9f1196b878 100644 --- a/docs/howto/operate/grafana.md +++ b/docs/howto/operate/grafana.md @@ -2,6 +2,11 @@ # Grafana Dashboards Each 2i2c Hub is set up with [a Prometheus server](https://prometheus.io/) to generate metrics and information about activity on the hub, and each cluster of hubs has a [Grafana deployment](https://grafana.com/) to ingest and visualize this data. + +```{note} +The Grafana deployement in the `2i2c` cluster ingests data from all the 2i2c clusters and will soon be able to be used as "the central Grafana". +``` + This section describes how to use these dashboards for a cluster. ## Access Hub Grafana Dashboards @@ -118,7 +123,7 @@ IPv4 address), or `CNAME` records if using AWS (where external IP is a domain na **Wait a while for the DNS to propagate!** (grafana:log-in)= -### Log in to the Grafana dashboard +### Log in to the cluster-spcific Grafana dashboard Eventually, visiting `GRAFANA_URL` will present you with a login page. Here are the credentials for logging in: @@ -126,6 +131,20 @@ Here are the credentials for logging in: - **username**: `admin` - **password**: located in `helm-charts/support/enc-support.secret.values.yaml` (`sops` encrypted). +### Register the cluster's Prometheus Server with the central Grafana + +Once you have deployed the support chart, you must also register this cluster as a datasource for the central Grafana dashboard. This will allow you to visualize cluster statistics not only from the cluster-specific Grafana deployement but also from the central dashboard, that aggregates data from all the clusters. + +Run the `update_grafana_datasources.py` script in the deployer to let the central Grafana know about this new prometheus server: + +``` +$ python3 deployer/update_grafana_datasources.py +``` + +Where: +- is the name of the cluster where the central Grafana lives. Right now, this defaults to "2i2c". +- is the public URL of the Grafana dashboard. Right now, this defaults to "https://grafana.pilot.2i2c.cloud" + ### Setting up Grafana Dashboards Once you have logged into grafana as the admin user, create a new API key. From 2e1e3c3d05ddf1abafaccfd28bf994c6f9a43231 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 14:32:04 +0300 Subject: [PATCH 11/16] Add default values for the cmd args --- deployer/update_grafana_datasources.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 9446c47ddf..854a834622 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -163,10 +163,20 @@ def main(): ) argparser.add_argument( - "cluster_name", type=str, help="The name of the cluster where the Grafana lives" + "cluster_name", + type=str, + nargs="?", + help="The name of the cluster where the Grafana lives", + default="2i2c", ) - argparser.add_argument("grafana_url", type=str, help="The public URL of Grafana") + argparser.add_argument( + "grafana_url", + type=str, + nargs="?", + help="The public URL of Grafana", + default="https://grafana.pilot.2i2c.cloud", + ) args = argparser.parse_args() cluster = args.cluster_name From e9b986301072908247d5773c6871464cfb492287 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Tue, 19 Apr 2022 14:50:04 +0300 Subject: [PATCH 12/16] Fix link to deploy action --- docs/reference/ci-cd.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ci-cd.md b/docs/reference/ci-cd.md index bb790b3f31..5e6887034a 100644 --- a/docs/reference/ci-cd.md +++ b/docs/reference/ci-cd.md @@ -30,7 +30,7 @@ cluster is deployed. The following steps are path-filtered so we can trigger new deployments on specific clusters when the associated files are actually changed. -Finally, the [deploy action](https://github.com/2i2c-org/infrastructure/tree/HEAD/.github/actions/deploy/action.yml) +Finally, the [deploy action](https://github.com/2i2c-org/infrastructure/tree/HEAD/.github/actions/setup-deploy/action.yaml) is called which in turn will use the deployer script to deploy the hubs on the corresponding clusters. From 275c33545c5691b8ed80dcdecb6cfdff1decf770 Mon Sep 17 00:00:00 2001 From: Georgiana Elena Date: Wed, 20 Apr 2022 10:43:04 +0300 Subject: [PATCH 13/16] Update deployer/update_grafana_datasources.py Co-authored-by: Chris Holdgraf --- deployer/update_grafana_datasources.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index 854a834622..db317ddffe 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -1,9 +1,11 @@ """ ### Summary -This is a helper script that will make sure the central grafana at https://grafana.pilot.2i2c.cloud is configured to use as datasource the authenticated prometheus instances of all the clusters that we run. +Ensures that the central grafana at https://grafana.pilot.2i2c.cloud is configured to use as datasource the authenticated prometheus instances of all the clusters that we run. -### Run the script +### How to use + +This is meant to by run as a script from the command line, like so: $ python deployer/grafana_datasources_manager.py From 798e53f070b2662acb62404e11a39e140f67d025 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Wed, 20 Apr 2022 12:19:54 +0300 Subject: [PATCH 14/16] Get the grafana address of the central cluster from the cluster support values file --- deployer/update_grafana_datasources.py | 36 ++++++++++++++++++-------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_grafana_datasources.py index db317ddffe..dc43e287ac 100644 --- a/deployer/update_grafana_datasources.py +++ b/deployer/update_grafana_datasources.py @@ -50,6 +50,28 @@ def build_datasource_details(cluster_name): return datasource_details +def get_central_grafana_url(central_cluster_name): + cluster_config_dir_path = find_absolute_path_to_cluster_file( + central_cluster_name + ).parent + + config_file = cluster_config_dir_path.joinpath("support.values.yaml") + with open(config_file) as f: + support_config = yaml.load(f) + + grafana_tls_config = ( + support_config.get("grafana", {}).get("ingress", {}).get("tls", []) + ) + + if not grafana_tls_config: + raise ValueError( + f"No tls config was found for the Grafana instance of {central_cluster_name}. Please consider enable it before using it as the central Grafana." + ) + + # We only have one tls host right now. Modify this when things change. + return grafana_tls_config[0]["hosts"][0] + + def get_cluster_prometheus_address(cluster_name): """Retrieves the address of the prometheus instance running on the `cluster_name` cluster. This address is stored in the `support.values.yaml` file of each cluster config directory. @@ -86,7 +108,7 @@ def get_cluster_prometheus_address(cluster_name): f"No tls config was found for the prometheus instance of {cluster_name}" ) - # We only have one tls host right now. Re-think this if this changes. + # We only have one tls host right now. Modify this when things change. return tls_config[0]["hosts"][0] @@ -172,18 +194,10 @@ def main(): default="2i2c", ) - argparser.add_argument( - "grafana_url", - type=str, - nargs="?", - help="The public URL of Grafana", - default="https://grafana.pilot.2i2c.cloud", - ) - args = argparser.parse_args() cluster = args.cluster_name - grafana_url = args.grafana_url.rstrip("/") - datasource_endpoint = f"{grafana_url}/api/datasources" + grafana_host = get_central_grafana_url(cluster) + datasource_endpoint = f"https://{grafana_host}/api/datasources" # Get a list of the clusters that already have their prometheus instances used as datasources datasources = get_clusters_used_as_datasources(cluster, datasource_endpoint) From 6f0c993e444bb7f6b6dbd9890bb808ea2f341d27 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Wed, 20 Apr 2022 12:22:54 +0300 Subject: [PATCH 15/16] Change script name to something more explicit --- ..._datasources.py => update_central_grafana_datasources.py} | 0 docs/howto/operate/grafana.md | 5 ++--- 2 files changed, 2 insertions(+), 3 deletions(-) rename deployer/{update_grafana_datasources.py => update_central_grafana_datasources.py} (100%) diff --git a/deployer/update_grafana_datasources.py b/deployer/update_central_grafana_datasources.py similarity index 100% rename from deployer/update_grafana_datasources.py rename to deployer/update_central_grafana_datasources.py diff --git a/docs/howto/operate/grafana.md b/docs/howto/operate/grafana.md index 9f1196b878..06c517b2c9 100644 --- a/docs/howto/operate/grafana.md +++ b/docs/howto/operate/grafana.md @@ -135,15 +135,14 @@ Here are the credentials for logging in: Once you have deployed the support chart, you must also register this cluster as a datasource for the central Grafana dashboard. This will allow you to visualize cluster statistics not only from the cluster-specific Grafana deployement but also from the central dashboard, that aggregates data from all the clusters. -Run the `update_grafana_datasources.py` script in the deployer to let the central Grafana know about this new prometheus server: +Run the `update_central_grafana_datasources.py` script in the deployer to let the central Grafana know about this new prometheus server: ``` -$ python3 deployer/update_grafana_datasources.py +$ python3 deployer/update_central_grafana_datasources.py ``` Where: - is the name of the cluster where the central Grafana lives. Right now, this defaults to "2i2c". -- is the public URL of the Grafana dashboard. Right now, this defaults to "https://grafana.pilot.2i2c.cloud" ### Setting up Grafana Dashboards From b13d590ba0c3e7ef714b8ab5a27d231a95dda114 Mon Sep 17 00:00:00 2001 From: GeorgianaElena Date: Wed, 20 Apr 2022 12:40:23 +0300 Subject: [PATCH 16/16] Make info about central grafana more discoverable --- docs/howto/operate/grafana.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/howto/operate/grafana.md b/docs/howto/operate/grafana.md index 06c517b2c9..0a0c68660f 100644 --- a/docs/howto/operate/grafana.md +++ b/docs/howto/operate/grafana.md @@ -3,10 +3,6 @@ Each 2i2c Hub is set up with [a Prometheus server](https://prometheus.io/) to generate metrics and information about activity on the hub, and each cluster of hubs has a [Grafana deployment](https://grafana.com/) to ingest and visualize this data. -```{note} -The Grafana deployement in the `2i2c` cluster ingests data from all the 2i2c clusters and will soon be able to be used as "the central Grafana". -``` - This section describes how to use these dashboards for a cluster. ## Access Hub Grafana Dashboards @@ -18,6 +14,14 @@ To access the Grafana dashboards you'll need a **username** and **password**. These can be accessed using `sops` (see {ref}`tc:secrets:sops` for how to set up `sops` on your machine). See [](grafana:log-in) for how to find the credentials information. +## The Central Grafana + +The Grafana deployment in the `2i2c` cluster ingests data from all the 2i2c clusters and will soon be able to be used as "the central Grafana". + +```{note} +TODO: should add more info once this is ready to use. +``` + (grafana:new-grafana)= ## Set up Grafana Dashboards for a cluster