From 1adc27eb95d227ffd2117711caa05aa2a2ac45f3 Mon Sep 17 00:00:00 2001 From: Martin Bajanik Date: Tue, 20 Aug 2019 10:44:37 +0200 Subject: [PATCH] Add GCP infrastructure tracking (#118) --- docker-compose.dev.yml | 1 + docker-compose.yml | 2 + requirements.in | 2 + requirements.txt | 29 +++++++--- test-requirements.txt | 2 +- test/datacenters/test_gcp.py | 89 +++++++++++++++++++++++++++++ zoo/base/settings.py | 3 + zoo/datacenters/gcp.py | 71 +++++++++++++++++++++++ zoo/datacenters/mapping.py | 4 +- zoo/datacenters/models.py | 6 ++ zoo/datacenters/tasks.py | 8 ++- zoo/datacenters/utils/__init__.py | 2 + zoo/datacenters/utils/gcloud.py | 94 +++++++++++++++++++++++++++++++ zoo/datacenters/utils/kube.py | 82 +++++++++++++++++++++++++++ 14 files changed, 382 insertions(+), 13 deletions(-) create mode 100644 test/datacenters/test_gcp.py create mode 100644 zoo/datacenters/gcp.py create mode 100644 zoo/datacenters/utils/__init__.py create mode 100644 zoo/datacenters/utils/gcloud.py create mode 100644 zoo/datacenters/utils/kube.py diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index c1c492de..9e5c822a 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -54,6 +54,7 @@ services: worker: environment: DATABASE_URL: 'postgres://postgres:postgres@postgres/postgres' + GCP_SERVICE_KEY: ZOO_AUDITING_CHECKS: dummy_standards ZOO_DEBUG: '1' ZOO_DATADOG_API_KEY: diff --git a/docker-compose.yml b/docker-compose.yml index af4ce5e4..5fa815ab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: . environment: - DATABASE_URL + - GCP_SERVICE_KEY - SECRET_KEY - SENTRY_DSN - SENTRY_PUBLIC_DSN @@ -42,6 +43,7 @@ services: command: [celery, worker, -A, zoo] environment: - DATABASE_URL + - GCP_SERVICE_KEY - SECRET_KEY - SENTRY_DSN - SENTRY_PUBLIC_DSN diff --git a/requirements.in b/requirements.in index fcaa5212..118344ce 100644 --- a/requirements.in +++ b/requirements.in @@ -15,11 +15,13 @@ django-extensions django-silk django-stronghold dockerfile-parse +google-api-python-client graphene graphene-django gunicorn[gevent] hiredis ipython +kubernetes markdown psycopg2 pygerduty diff --git a/requirements.txt b/requirements.txt index b7619246..ce1902ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,10 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile --output-file requirements.txt requirements.in +# pip-compile requirements.in # amqp==2.5.1 # via kombu aniso8601==7.0.0 # via graphene -appnope==0.1.0 # via ipython arrow==0.15.1 attrs==19.1.0 autopep8==1.4.4 # via django-silk @@ -14,9 +13,10 @@ backcall==0.1.0 # via ipython billiard==3.6.1.0 # via celery boto3==1.9.150 botocore==1.12.150 # via boto3, s3transfer +cachetools==3.1.1 # via google-auth celery-redbeat==0.13.0 celery==4.3.0 -certifi==2019.9.11 # via requests +certifi==2019.9.11 # via kubernetes, requests chardet==3.0.4 # via requests colorama==0.4.1 datadog==0.30.0 @@ -33,8 +33,11 @@ django-stronghold==0.3.0 django==2.2.5 djangoql==0.13.0 dockerfile-parse==0.0.15 -gevent==1.4.0 # via gunicorn docutils==0.14 # via botocore +gevent==1.4.0 # via gunicorn +google-api-python-client==1.7.10 +google-auth-httplib2==0.0.3 # via google-api-python-client +google-auth==1.6.3 # via google-api-python-client, google-auth-httplib2, kubernetes gprof2dot==2017.9.19 # via django-silk graphene-django==2.5.0 graphene==2.1.8 @@ -43,6 +46,7 @@ graphql-relay==2.0.0 # via graphene greenlet==0.4.15 # via gevent gunicorn[gevent]==19.9.0 hiredis==1.0.0 +httplib2==0.13.0 # via google-api-python-client, google-auth-httplib2 idna==2.8 # via requests importlib-metadata==0.22 # via kombu ipython-genutils==0.2.0 # via traitlets @@ -51,6 +55,7 @@ jedi==0.15.1 # via ipython jinja2==2.10.1 # via django-silk jmespath==0.9.4 # via boto3, botocore kombu==4.6.4 # via celery +kubernetes==10.0.0 markdown==3.1.1 markupsafe==1.1.1 # via jinja2 more-itertools==7.2.0 # via zipp @@ -64,32 +69,40 @@ promise==2.2.1 # via graphene-django, graphql-core, graphql-relay prompt-toolkit==2.0.9 # via ipython psycopg2==2.8.3 ptyprocess==0.6.0 # via pexpect +pyasn1-modules==0.2.5 # via google-auth +pyasn1==0.4.5 # via pyasn1-modules, rsa pycodestyle==2.5.0 # via autopep8 pygerduty==0.38.2 pygithub==1.43.8 pygments==2.4.2 # via django-silk, ipython pyjwt==1.7.1 # via pygithub -python-dateutil==2.8.0 # via arrow, botocore, celery-redbeat, django-silk +python-dateutil==2.8.0 # via arrow, botocore, celery-redbeat, django-silk, kubernetes python-gitlab==1.11.0 python3-openid==3.1.0 # via django-allauth pytz==2019.2 # via celery, django, django-silk pyyaml==5.1.2 raven==6.10.0 redis==3.3.8 -requests-oauthlib==1.2.0 # via django-allauth +requests-oauthlib==1.2.0 # via django-allauth, kubernetes requests==2.22.0 requirements-parser==0.2.0 +rsa==4.0 # via google-auth rx==1.6.1 # via graphql-core s3transfer==0.2.0 # via boto3 singledispatch==3.4.0.3 # via graphene-django -six==1.12.0 # via django-extensions, dockerfile-parse, graphene, graphene-django, graphql-core, graphql-relay, promise, prompt-toolkit, pygerduty, python-dateutil, python-gitlab, singledispatch, structlog, tenacity, traitlets +six==1.12.0 # via django-extensions, dockerfile-parse, google-api-python-client, google-auth, graphene, graphene-django, graphql-core, graphql-relay, kubernetes, promise, prompt-toolkit, pygerduty, python-dateutil, python-gitlab, singledispatch, structlog, tenacity, traitlets, websocket-client sqlparse==0.3.0 # via django, django-debug-toolbar, django-silk structlog==19.1.0 tenacity==5.1.1 # via celery-redbeat traitlets==4.3.2 # via ipython -urllib3==1.24.3 # via botocore, requests +uritemplate==3.0.0 # via google-api-python-client +urllib3==1.24.3 # via botocore, kubernetes, requests vine==1.3.0 # via amqp, celery wcwidth==0.1.7 # via prompt-toolkit +websocket-client==0.56.0 # via kubernetes whitenoise==4.1.3 wrapt==1.11.2 # via deprecated zipp==0.6.0 # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +# setuptools==41.2.0 # via ipython, kubernetes, markdown diff --git a/test-requirements.txt b/test-requirements.txt index 5dd8111a..18068648 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile --output-file test-requirements.txt test-requirements.in +# pip-compile test-requirements.in # apipkg==1.5 # via execnet astroid==2.2.5 # via pylint diff --git a/test/datacenters/test_gcp.py b/test/datacenters/test_gcp.py new file mode 100644 index 00000000..dadeea67 --- /dev/null +++ b/test/datacenters/test_gcp.py @@ -0,0 +1,89 @@ +from unittest.mock import MagicMock +import pytest + +from zoo.datacenters import gcp as uut, models + +pytestmark = pytest.mark.django_db + + +def test_gcp_map_to_nodes(mocker): + mocker.patch("zoo.datacenters.utils.gcloud.GCPClient.__init__", return_value=None) + mocker.patch( + "zoo.datacenters.utils.gcloud.GCPClient.get_all_projects", + return_value=[{"projectId": "pid1"}, {"projectId": "pid2"}], + ) + mocker.patch( + "zoo.datacenters.utils.gcloud.GCPClient.get_forwarding_rules", + return_value=[ + { + "id": "test1", + "loadBalancingScheme": "EXTERNAL", + "IPAddress": "1.1.1.1", + "portRange": "443-443", + }, + { + "id": "test2", + "loadBalancingScheme": "INTERNAL", + "IPAddress": "2.2.2.2", + "portRange": "443-443", + }, + ], + ) + mocker.patch( + "zoo.datacenters.utils.GCPClient.get_all_clusters", + return_value=[{"name": "test", "zone": "europe-test"}], + ) + mocker.patch( + "zoo.datacenters.utils.kube.KubernetesClient.__init__", return_value=None + ) + + workload = MagicMock() + image1 = MagicMock() + image2 = MagicMock() + image1.image = "test/image:0.0.1" + image2.image = "test/image2:0.0.2" + + workload.metadata.namespace = "namespace-test" + workload.metadata.name = "resource-test" + workload.spec.template.spec.containers = [image1, image2] + + mocker.patch( + "zoo.datacenters.utils.kube.KubernetesClient.iter_workloads", + return_value={"test-type": [workload]}, + ) + + uut.map_to_nodes() + + root = models.InfraNode.objects.get(kind=models.NodeKind.GCP_ROOT_PROJ) + + projects = {project.value: project for project in root.targets.all()} + assert set(projects) == {"pid1", "pid2"} + + ctx = "gke_pid1_europe-test_test" + clusters = { + cluster.value: cluster + for cluster in projects["pid1"].targets.filter( + kind=models.NodeKind.GCP_CLUSTER_NAME + ) + } + assert set(clusters) == {ctx} + + ip_rules = { + cluster.value: cluster + for cluster in projects["pid1"].targets.filter( + kind=models.NodeKind.GCP_IP_RULE_NAME + ) + } + assert set(ip_rules) == {"test1:1.1.1.1:443-443"} + + workloads = { + workload.value: workload + for workload in clusters["gke_pid1_europe-test_test"].targets.all() + } + full_name = "test-type:namespace-test/resource-test" + assert set(workloads) == {f"{ctx}:{full_name}"} + + images = { + image.value: image for image in workloads[f"{ctx}:{full_name}"].targets.all() + } + assert set(images) == {"test/image:0.0.1", "test/image2:0.0.2"} diff --git a/zoo/base/settings.py b/zoo/base/settings.py index ecb43686..504db476 100644 --- a/zoo/base/settings.py +++ b/zoo/base/settings.py @@ -49,6 +49,7 @@ RANCHER_API_URL=(str, None), RANCHER_ACCESS_KEY=(str, None), RANCHER_SECRET_KEY=(str, None), + GCP_SERVICE_KEY=(dict, {}), ) SITE_ROOT = str(root) @@ -247,4 +248,6 @@ RANCHER_ACCESS_KEY = env("RANCHER_ACCESS_KEY") RANCHER_SECRET_KEY = env("RANCHER_SECRET_KEY") +GCP_SERVICE_KEY = env("GCP_SERVICE_KEY") + logs.configure_structlog(DEBUG) diff --git a/zoo/datacenters/gcp.py b/zoo/datacenters/gcp.py new file mode 100644 index 00000000..e3df456c --- /dev/null +++ b/zoo/datacenters/gcp.py @@ -0,0 +1,71 @@ +from django.db import transaction + +from .models import InfraNode, NodeKind +from .utils import GCPClient, KubernetesClient + +CLUSTER_IDENTIFIER = "gke_{project_id}_{zone}_{name}" + + +def _workload_identifier(cluster, resource_type, resource): + return f"{cluster}:{resource_type}:{resource.metadata.namespace}/{resource.metadata.name}" + + +@transaction.atomic +def map_to_nodes(): + """Map GCP projects to GCP services. + + Creates records in the InfraNode table of the following kinds: + + - ``gcp.root.proj`` - root node for all GCP projects + - ``gcp.proj.id`` - GCP project ID + - ``gcp.ip_rule.name`` - GCP forwarding rule name + - ``gcp.cluster.name`` - GCP cluster name + - ``gcp.workload.name`` - GCP workload name (including the namespace) + - ``docker.image.uuid`` - Docker image UUID + """ + root = InfraNode.get_or_create_node(kind=NodeKind.GCP_ROOT_PROJ, value="*") + gcloud = GCPClient() + + for project in gcloud.get_all_projects(): + project_node = InfraNode.get_or_create_node( + kind=NodeKind.GCP_PROJ_ID, value=project["projectId"], source=root + ) + + # currently not used anywhere + for ip_rule in gcloud.get_forwarding_rules(project["projectId"]): + if ip_rule["loadBalancingScheme"] == "EXTERNAL": + InfraNode.get_or_create_node( + kind=NodeKind.GCP_IP_RULE_NAME, + value=f"{ip_rule['id']}:{ip_rule['IPAddress']}:{ip_rule['portRange']}", + source=project_node, + ) + + for cluster in gcloud.get_all_clusters(project["projectId"]): + cluster_ctx = CLUSTER_IDENTIFIER.format( + project_id=project["projectId"], + zone=cluster["zone"], + name=cluster["name"], + ) + cluster_node = InfraNode.get_or_create_node( + kind=NodeKind.GCP_CLUSTER_NAME, value=cluster_ctx, source=project_node + ) + + kube = KubernetesClient(cluster) + workloads = kube.iter_workloads() + + for resource_type, resources in workloads.items(): + for resource in resources: + workload_node = InfraNode.get_or_create_node( + kind=NodeKind.GCP_WORKLOAD_NAME, + value=_workload_identifier( + cluster_ctx, resource_type, resource + ), + source=cluster_node, + ) + + for container in resource.spec.template.spec.containers: + InfraNode.get_or_create_node( + kind=NodeKind.DOCKER_IMAGE_UUID, + value=container.image, + source=workload_node, + ) diff --git a/zoo/datacenters/mapping.py b/zoo/datacenters/mapping.py index 362e4d36..57b79d0b 100644 --- a/zoo/datacenters/mapping.py +++ b/zoo/datacenters/mapping.py @@ -3,8 +3,9 @@ from django.db import transaction import requests -from . import amazon, models, rancher +from . import amazon, gcp, models, rancher from .models import InfraNode, NodeKind +from .utils import GCPClient, KubernetesClient def url_matches_dns(url, dns_record): @@ -32,6 +33,7 @@ def map_infra_to_nodes(): amazon.map_to_nodes() rancher.map_to_nodes() connect_aws_rancher_nodes() + gcp.map_to_nodes() class Mapper: diff --git a/zoo/datacenters/models.py b/zoo/datacenters/models.py index ac710a2e..77b44f33 100644 --- a/zoo/datacenters/models.py +++ b/zoo/datacenters/models.py @@ -21,6 +21,12 @@ class NodeKind: RANCHER_LB_PORTRULE_URI = "rancher.lb.portrule.uri" RANCHER_SERVICE_ID = "rancher.service.id" + GCP_ROOT_PROJ = "gcp.root.proj" + GCP_PROJ_ID = "gcp.project.id" + GCP_IP_RULE_NAME = "gcp.ip_rule.name" + GCP_CLUSTER_NAME = "gcp.cluster.name" + GCP_WORKLOAD_NAME = "gcp.workload.name" + DOCKER_IMAGE_UUID = "docker.image.uuid" diff --git a/zoo/datacenters/tasks.py b/zoo/datacenters/tasks.py index 8ffbc24d..33c3804e 100644 --- a/zoo/datacenters/tasks.py +++ b/zoo/datacenters/tasks.py @@ -2,15 +2,17 @@ from celery import shared_task from ..services.models import Service -from .mapping import AmazonRancherMapper, map_infra_to_nodes +from .mapping import AmazonRancherMapper, GoogleCloudPlatformMapper, map_infra_to_nodes from .models import InfraNode @shared_task def link_service_to_datacenters(service_id): service = Service.objects.get(id=service_id) - mapper = AmazonRancherMapper() - mapper.link_service_to_datacenters(service) + amazon = AmazonRancherMapper() + amazon.link_service_to_datacenters(service) + gcp = GoogleCloudPlatformMapper() + gcp.link_service_to_datacenters(service) @shared_task diff --git a/zoo/datacenters/utils/__init__.py b/zoo/datacenters/utils/__init__.py new file mode 100644 index 00000000..d823cfc5 --- /dev/null +++ b/zoo/datacenters/utils/__init__.py @@ -0,0 +1,2 @@ +from .gcloud import GCPClient +from .kube import KubernetesClient diff --git a/zoo/datacenters/utils/gcloud.py b/zoo/datacenters/utils/gcloud.py new file mode 100644 index 00000000..adacda9e --- /dev/null +++ b/zoo/datacenters/utils/gcloud.py @@ -0,0 +1,94 @@ +from django.conf import settings +from google.oauth2 import service_account +from googleapiclient import discovery + + +def _get_credentials(): + if settings.GCP_SERVICE_KEY is None: + raise RuntimeError("gcloud auth couldn't be performed, missing env variable") + + return service_account.Credentials.from_service_account_info( + settings.GCP_SERVICE_KEY + ) + + +class GCPClient: + def __init__(self): + self.credentials = _get_credentials() + + self.projectService = discovery.build( + "cloudresourcemanager", "v1", credentials=self.credentials + ) + self.computeService = discovery.build( + "compute", "v1", credentials=self.credentials + ) + self.containerService = discovery.build( + "container", "v1", credentials=self.credentials + ) + + def get_all_projects(self): + request = self.projectService.projects().list() + while request is not None: + response = request.execute() + + for project in response["projects"]: + yield project + + request = self.projectService.projects().list_next( + previous_request=request, previous_response=response + ) + + def get_forwarding_rules(self, project_id): + request = self.computeService.forwardingRules().aggregatedList( + project=project_id + ) + + while request is not None: + response = request.execute() + for ip_rules in response["items"].values(): + for ip_rule in ip_rules.get("forwardingRules", []): + yield ip_rule + + request = self.computeService.forwardingRules().list_next( + previous_request=request, previous_response=response + ) + + def get_all_clusters(self, project_id): + clusters = ( + self.containerService.projects() + .locations() + .clusters() + .list(parent="projects/{}/locations/-".format(project_id)) + ).execute() + + # clusters are not a paged resource + if "clusters" in clusters: + return clusters["clusters"] + + return [] + + def get_project_owners(self, project_id): + request = self.projectService.projects().getIamPolicy( + resource=project_id, body={"options": {"requestedPolicyVersion": 0}} + ) + bindings = request.execute()["bindings"] + + for binding in bindings: + if binding["role"] == "roles/owner": + return [ + member + for member in binding["members"] + if member.endswith("@kiwi.com") + ] + + def get_clusters_by_name(self, cluster): + # zoo.datacenters.gcp.CLUSTER_IDENTIFIER + _, projectId, zone, name = cluster.split("_") + + return ( + self.containerService.projects() + .locations() + .clusters() + .get(name=f"projects/{projectId}/locations/{zone}/clusters/{name}") + .execute() + ) diff --git a/zoo/datacenters/utils/kube.py b/zoo/datacenters/utils/kube.py new file mode 100644 index 00000000..74121bcc --- /dev/null +++ b/zoo/datacenters/utils/kube.py @@ -0,0 +1,82 @@ +from base64 import decodebytes +from tempfile import NamedTemporaryFile + +import googleapiclient +from kubernetes import client + +from . import gcloud + +BLACKLISTED_NAMESPACES = ["kube-system", "kube-public", "system"] + + +class KubernetesClient: + def __init__(self, cluster): + config = client.Configuration() + config.host = f"https://{cluster['endpoint']}" + + config.api_key_prefix["authorization"] = "Bearer" + config.api_key["authorization"] = _token( + gcloud._get_credentials(), "cloud-platform" + ) + + with NamedTemporaryFile(delete=False) as cert: + cert.write( + decodebytes(cluster["masterAuth"]["clusterCaCertificate"].encode()) + ) + config.ssl_ca_cert = cert.name + + self.client = client.ApiClient(configuration=config) + + def iter_workloads(self): + def _filter_resources(resources): + return [ + resource + for resource in resources + if resource.metadata.namespace not in BLACKLISTED_NAMESPACES + ] + + apps = client.AppsV1Api(self.client) + batch = client.BatchV1Api(self.client) + batch_v1_beta = client.BatchV1beta1Api(self.client) + + workloads = { + "deployments": _filter_resources( + apps.list_deployment_for_all_namespaces().items + ), + "statefulsets": _filter_resources( + apps.list_stateful_set_for_all_namespaces().items + ), + "daemonsets": _filter_resources( + apps.list_daemon_set_for_all_namespaces().items + ), + "jobs": _filter_resources(batch.list_job_for_all_namespaces().items), + "cronjobs": _filter_resources( + batch_v1_beta.list_cron_job_for_all_namespaces().items + ), + } + + return workloads + + def get_ingress(self, namespace): + try: + ingresses = ( + client.NetworkingV1beta1Api(self.client) + .list_namespaced_ingress(namespace) + .items + ) + except client.rest.ApiException: + ingresses = ( + client.ExtensionsV1beta1Api(self.client) + .list_namespaced_ingress(namespace) + .items + ) + + return ingresses + + +def _token(credentials, *scopes): + scopes = [f"https://www.googleapis.com/auth/{s}" for s in scopes] + scoped = googleapiclient._auth.with_scopes(credentials, scopes) + googleapiclient._auth.refresh_credentials(scoped) + + return scoped.token