From 308efdbc4a0ea7dfcc895da86c9f2c0808022194 Mon Sep 17 00:00:00 2001 From: Pavel Dedik Date: Fri, 31 May 2019 14:10:55 +0200 Subject: [PATCH] Add new app for services infrastructure tracking - Resolve #87, #89, #90 --- requirements.in | 1 + requirements.txt | 9 +- test/conftest.py | 2 + test/datacenters/__init__.py | 0 test/datacenters/test_amazon.py | 81 +++++++++ test/datacenters/test_mapping.py | 11 ++ test/datacenters/test_models.py | 46 +++++ test/datacenters/test_rancher.py | 83 +++++++++ test/factories.py | 9 + zoo/base/apps.py | 4 + zoo/base/settings.py | 22 +++ zoo/datacenters/__init__.py | 0 zoo/datacenters/admin.py | 10 ++ zoo/datacenters/amazon.py | 187 +++++++++++++++++++++ zoo/datacenters/apps.py | 5 + zoo/datacenters/mapping.py | 139 +++++++++++++++ zoo/datacenters/migrations/0001_initial.py | 170 +++++++++++++++++++ zoo/datacenters/migrations/__init__.py | 0 zoo/datacenters/models.py | 118 +++++++++++++ zoo/datacenters/rancher.py | 140 +++++++++++++++ zoo/datacenters/tasks.py | 25 +++ 21 files changed, 1060 insertions(+), 2 deletions(-) create mode 100644 test/datacenters/__init__.py create mode 100644 test/datacenters/test_amazon.py create mode 100644 test/datacenters/test_mapping.py create mode 100644 test/datacenters/test_models.py create mode 100644 test/datacenters/test_rancher.py create mode 100644 zoo/datacenters/__init__.py create mode 100644 zoo/datacenters/admin.py create mode 100644 zoo/datacenters/amazon.py create mode 100644 zoo/datacenters/apps.py create mode 100644 zoo/datacenters/mapping.py create mode 100644 zoo/datacenters/migrations/0001_initial.py create mode 100644 zoo/datacenters/migrations/__init__.py create mode 100644 zoo/datacenters/models.py create mode 100644 zoo/datacenters/rancher.py create mode 100644 zoo/datacenters/tasks.py diff --git a/requirements.in b/requirements.in index 39da0539..96d0d031 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,6 @@ arrow attrs +boto3 celery celery-redbeat colorama diff --git a/requirements.txt b/requirements.txt index 3c1771a8..2d5a118e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,8 @@ attrs==19.1.0 autopep8==1.4.4 # via django-silk backcall==0.1.0 # via ipython billiard==3.6.0.0 # via celery +boto3==1.9.150 +botocore==1.12.150 # via boto3, s3transfer celery-redbeat==0.13.0 celery==4.3.0 certifi==2019.6.16 # via requests @@ -30,6 +32,7 @@ django-stronghold==0.3.0 django==2.2.3 djangoql==0.12.6 dockerfile-parse==0.0.14 +docutils==0.14 # via botocore gprof2dot==2017.9.19 # via django-silk graphene-django==2.3.2 graphene==2.1.6 @@ -42,6 +45,7 @@ ipython-genutils==0.2.0 # via traitlets ipython==7.6.1 jedi==0.14.0 # via ipython jinja2==2.10.1 # via django-silk +jmespath==0.9.4 # via boto3, botocore kombu==4.6.3 # via celery markdown==3.1.1 markupsafe==1.1.1 # via jinja2 @@ -59,7 +63,7 @@ pycodestyle==2.5.0 # via autopep8 pygithub==1.43.7 pygments==2.4.2 # via django-silk, ipython pyjwt==1.7.1 # via pygithub -python-dateutil==2.8.0 # via arrow, celery-redbeat, django-silk +python-dateutil==2.8.0 # via arrow, botocore, celery-redbeat, django-silk python-gitlab==1.9.0 python3-openid==3.1.0 # via django-allauth pytz==2019.1 # via celery, django, django-silk @@ -70,13 +74,14 @@ requests-oauthlib==1.2.0 # via django-allauth requests==2.22.0 requirements-parser==0.2.0 rx==1.6.1 # via graphql-core +s3transfer==0.2.0 # via boto3 singledispatch==3.4.0.3 # via graphene-django six==1.12.0 # via django-extensions, dockerfile-parse, graphene, graphene-django, graphql-core, graphql-relay, promise, prompt-toolkit, python-dateutil, python-gitlab, singledispatch, structlog, tenacity, traitlets sqlparse==0.3.0 # via django, django-debug-toolbar, django-silk structlog==19.1.0 tenacity==5.1.1 # via celery-redbeat traitlets==4.3.2 # via ipython -urllib3==1.25.3 # via requests +urllib3==1.24.3 # via botocore, requests vine==1.3.0 # via amqp, celery wcwidth==0.1.7 # via prompt-toolkit whitenoise==4.1.2 diff --git a/test/conftest.py b/test/conftest.py index 47d0cfcf..b6b64ba8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -22,6 +22,7 @@ DependencyFactory, DependencyUsageFactory, KindFactory, + InfraNodeFactory, ) @@ -33,6 +34,7 @@ register(DependencyFactory) register(DependencyUsageFactory) register(KindFactory) +register(InfraNodeFactory) fake = Faker() diff --git a/test/datacenters/__init__.py b/test/datacenters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/datacenters/test_amazon.py b/test/datacenters/test_amazon.py new file mode 100644 index 00000000..df57812d --- /dev/null +++ b/test/datacenters/test_amazon.py @@ -0,0 +1,81 @@ +from faker import Faker +import pytest + +from zoo.datacenters import amazon as uut, models + +fake = Faker() +pytestmark = pytest.mark.django_db + + +def test_amazon_map_to_nodes(mocker): + mocker.patch( + "zoo.datacenters.amazon.iter_hosted_zones", + return_value=[ + {"Id": "hostedzone/1", "Name": "example.com"}, + { + "Id": "hostedzone/2", + "Name": "zoo.example.com", + "AliasTarget": {"DNSName": "elb1.example.com"}, + }, + ], + ) + mocker.patch( + "zoo.datacenters.amazon.iter_resource_record_sets", + return_value=[ + { + "Type": "A", + "Name": "test.example.com", + "AliasTarget": {"DNSName": "elb1.example.com"}, + }, + { + "Type": "CNAME", + "Name": "test2.example.com", + "ResourceRecords": [{"Value": "elb2.example.com"}], + }, + ], + ) + mocker.patch( + "zoo.datacenters.amazon.iter_load_balancers", + return_value=[ + { + "DNSName": "elb1.example.com", + "Instances": [{"InstanceId": "i-a"}, {"InstanceId": "i-b"}], + } + ], + ) + mocker.patch( + "zoo.datacenters.amazon.iter_load_balancers_v2", + return_value=[ + {"DNSName": "elb2.example.com", "Instances": [{"InstanceId": "i-c"}]} + ], + ) + mocker.patch( + "zoo.datacenters.amazon.iter_ec2_instances", + return_value=[ + {"InstanceId": "i-a", "PrivateDnsName": "ip-1"}, + {"InstanceId": "i-b", "PrivateDnsName": "ip-2"}, + {"InstanceId": "i-c", "PrivateDnsName": "ip-3"}, + ], + ) + + uut.map_to_nodes() + + root = models.InfraNode.objects.get(kind=models.NodeKind.AWS_ROOT_DNS) + + zones = {zone.value: zone for zone in root.targets.all()} + assert set(zones) == {"zoo.example.com", "example.com"} + + record_sets = {rs.value: rs for rs in zones["example.com"].targets.all()} + assert set(record_sets) == {"test.example.com", "test2.example.com"} + + [elb1_dns] = record_sets["test.example.com"].targets.all() + [elb2_dns] = record_sets["test2.example.com"].targets.all() + + [elb1] = elb1_dns.targets.all() + [elb2] = elb2_dns.targets.all() + + assert elb1.value == "elb1.example.com" + assert elb2.value == "elb2.example.com" + + assert {instance.value for instance in elb1.targets.all()} == {"ip-1", "ip-2"} + assert {instance.value for instance in elb2.targets.all()} == {"ip-3"} diff --git a/test/datacenters/test_mapping.py b/test/datacenters/test_mapping.py new file mode 100644 index 00000000..631e6d4c --- /dev/null +++ b/test/datacenters/test_mapping.py @@ -0,0 +1,11 @@ +from zoo.datacenters import mapping as uut + + +def test_url_matches_dns(): + assert uut.url_matches_dns("zoo.example.com", "*.example.com") + assert uut.url_matches_dns("zoo.example.com", "zoo.*.com") + assert uut.url_matches_dns("zoo.example.com", "zoo.example.com") + + assert not uut.url_matches_dns("zoo.example.com", "abc.example.com") + assert not uut.url_matches_dns("zoo.example.com", "abc.*.com") + assert not uut.url_matches_dns("zoo.example.com", "*.example.cz") diff --git a/test/datacenters/test_models.py b/test/datacenters/test_models.py new file mode 100644 index 00000000..c27a63ea --- /dev/null +++ b/test/datacenters/test_models.py @@ -0,0 +1,46 @@ +import pytest + +from zoo.datacenters import models as uut + +pytestmark = pytest.mark.django_db + + +def test_infra_node__get_or_create_node(): + root = uut.InfraNode.get_or_create_node(kind="kind1", value="value1") + + assert root.value == "value1" + assert list(root.sources.all()) == [] + assert list(root.targets.all()) == [] + + node = uut.InfraNode.get_or_create_node(kind="kind2", value="value2", source=root) + + assert list(node.sources.all()) == [root] + assert list(root.targets.all()) == [node] + + assert root.id == uut.InfraNode.get_or_create_node(kind="kind1", value="value1").id + + +def test_infra_node__find_sources_by_kind(): + root = uut.InfraNode.objects.create(kind="root", value="123") + + node_dns = uut.InfraNode.objects.create(kind="dns", value="a1b") + node_abc = uut.InfraNode.objects.create(kind="abc", value="a2b") + + root.targets.add(node_dns) + root.targets.add(node_abc) + + node_dns2 = uut.InfraNode.objects.create(kind="dns", value="b11") + node_dns3 = uut.InfraNode.objects.create(kind="dns", value="b22") + node_abc.targets.add(node_dns2) + node_dns2.targets.add(node_dns3) + + leaf = uut.InfraNode.objects.create(kind="leaf", value="ccc") + node_dns.targets.add(leaf) + node_dns3.targets.add(leaf) + + dns_sources = leaf.find_sources_by_kind("dns") + dns_nodes_ids = {node_dns.id, node_dns2.id, node_dns3.id} + assert {source.id for source in dns_sources} == dns_nodes_ids + + root_sources = leaf.find_sources_by_kind("root") + assert list(root_sources) == [root] diff --git a/test/datacenters/test_rancher.py b/test/datacenters/test_rancher.py new file mode 100644 index 00000000..735c6a0d --- /dev/null +++ b/test/datacenters/test_rancher.py @@ -0,0 +1,83 @@ +from faker import Faker +import pytest + +from zoo.datacenters import rancher as uut, models + +fake = Faker() +pytestmark = pytest.mark.django_db + + +def test_rancher_parse_members_from_project(): + names = [fake.name(), fake.name()] + members = uut.parse_members_from_project( + { + "id": "p2", + "members": [ + { + "type": "projectMember", + "externalId": f"cn={names[0]},ou=People,dc=example,dc=com", + "role": "owner", + }, + { + "type": "projectMember", + "externalId": f"cn={names[1]},ou=People,dc=example,dc=com", + "role": "owner", + }, + ], + } + ) + assert members == names + + +def test_rancher_map_to_nodes(mocker): + mocker.patch("zoo.datacenters.rancher.iter_projects", return_value=[{"id": "p1"}]) + mocker.patch( + "zoo.datacenters.rancher.iter_services", + return_value=[ + {"id": "s1", "launchConfig": {"imageUuid": "docker:lb"}}, + {"id": "s2", "launchConfig": {"imageUuid": "docker:zoo"}}, + ], + ) + mocker.patch( + "zoo.datacenters.rancher.iter_load_balancers", + return_value=[ + { + "id": "lb1", + "lbConfig": { + "portRules": [ + { + "hostname": "zoo.example.com", + "sourcePort": 80, + "protocol": "http", + "serviceId": "s2", + } + ] + }, + "publicEndpoints": [{"hostId": "h1"}], + } + ], + ) + mocker.patch( + "zoo.datacenters.rancher.iter_hosts", + return_value=[{"id": "h1", "hostname": "ip-127-0-0-1"}], + ) + + uut.map_to_nodes() + + project = models.InfraNode.objects.get(kind=models.NodeKind.RANCHER_PROJ_ID) + assert project.value == "p1" + + hosts = project.targets.all() + assert {host.value for host in hosts} == {"ip-127-0-0-1"} + + lbs = hosts[0].targets.all() + assert {lb.value for lb in lbs} == {"lb1"} + + portrules = lbs[0].targets.all() + assert {portrule.value for portrule in portrules} == {"zoo.example.com"} + + services = portrules[0].targets.all() + assert {service.value for service in services} == {"s2"} + + images = services[0].targets.all() + assert {image.value for image in images} == {"docker:zoo"} diff --git a/test/factories.py b/test/factories.py index c9163561..f87eb042 100644 --- a/test/factories.py +++ b/test/factories.py @@ -7,6 +7,7 @@ from zoo.api.models import ApiToken from zoo.auditing.models import Issue from zoo.auditing.check_discovery import Kind +from zoo.datacenters.models import InfraNode from zoo.repos.models import Repository from zoo.services.models import Service from zoo.analytics.models import Dependency, DependencyUsage, DependencyType @@ -94,3 +95,11 @@ class Meta: id = Faker("domain_word") title = Faker("sentence") description = Faker("paragraph") + + +class InfraNodeFactory(Factory): + class Meta: + model = InfraNode + + kind = Faker("domain_word") + value = Faker("slug") diff --git a/zoo/base/apps.py b/zoo/base/apps.py index ec3563ac..45384bb9 100644 --- a/zoo/base/apps.py +++ b/zoo/base/apps.py @@ -15,6 +15,7 @@ def ready(self): from ..analytics import tasks as analytics_tasks from ..auditing import tasks as auditing_tasks from ..objectives import tasks as objective_tasks + from ..datacenters import tasks as datacenters_tasks celery_app.add_periodic_task(timedelta(hours=1), repos_tasks.sync_repos) celery_app.add_periodic_task(timedelta(hours=1), repos_tasks.schedule_pulls) @@ -37,3 +38,6 @@ def ready(self): celery_app.add_periodic_task( timedelta(days=1), analytics_tasks.check_python_lib_licenses ) + celery_app.add_periodic_task( + timedelta(days=1), datacenters_tasks.schedule_infra_mapping + ) diff --git a/zoo/base/settings.py b/zoo/base/settings.py index 3ea6119e..56777227 100644 --- a/zoo/base/settings.py +++ b/zoo/base/settings.py @@ -6,6 +6,7 @@ For the full list of settings and their values, see https://docs.djangoproject.com/en/dev/ref/settings/ """ +import os from pathlib import Path import environ @@ -40,6 +41,13 @@ ZOO_AUDITING_DROP_ISSUES=(int, 7), ZOO_SONARQUBE_URL=(str, None), ZOO_SONARQUBE_TOKEN=(str, None), + AWS_CONFIG=(str, None), + AWS_CONFIG_FILE=(str, "/tmp/aws/config"), + AWS_SHARED_CREDENTIALS=(str, None), + AWS_SHARED_CREDENTIALS_FILE=(str, "/tmp/aws/credentials"), + RANCHER_API_URL=(str, None), + RANCHER_ACCESS_KEY=(str, None), + RANCHER_SECRET_KEY=(str, None), ) SITE_ROOT = str(root) @@ -83,6 +91,7 @@ "zoo.services.apps.ServicesConfig", "zoo.analytics.apps.AnalyticsConfig", "zoo.objectives.apps.ObjectivesConfig", + "zoo.datacenters.apps.DatacentersConfig", "zoo.api.apps.ApiConfig", "django.contrib.admin", "django.contrib.auth", @@ -221,4 +230,17 @@ ZOO_AUDITING_CHECKS = env("ZOO_AUDITING_CHECKS") ZOO_AUDITING_DROP_ISSUES = env("ZOO_AUDITING_DROP_ISSUES") +AWS_CONFIG = env("AWS_CONFIG") +AWS_CONFIG_FILE = env("AWS_CONFIG_FILE") +AWS_CREDENTIALS = env("AWS_SHARED_CREDENTIALS") +AWS_CREDENTIALS_FILE = env("AWS_SHARED_CREDENTIALS_FILE") + +# make boto3 adopt default locations of config files specified in the Zoo +os.environ.setdefault("AWS_CONFIG_FILE", AWS_CONFIG_FILE) +os.environ.setdefault("AWS_SHARED_CREDENTIALS_FILE", AWS_CREDENTIALS_FILE) + +RANCHER_API_URL = env("RANCHER_API_URL") +RANCHER_ACCESS_KEY = env("RANCHER_ACCESS_KEY") +RANCHER_SECRET_KEY = env("RANCHER_SECRET_KEY") + logs.configure_structlog(DEBUG) diff --git a/zoo/datacenters/__init__.py b/zoo/datacenters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/zoo/datacenters/admin.py b/zoo/datacenters/admin.py new file mode 100644 index 00000000..044df9fe --- /dev/null +++ b/zoo/datacenters/admin.py @@ -0,0 +1,10 @@ +from django.contrib import admin + +from . import models + + +class DatacenterAdmin(admin.ModelAdmin): + search_fields = ("provider", "region") + + +admin.site.register(models.Datacenter, DatacenterAdmin) diff --git a/zoo/datacenters/amazon.py b/zoo/datacenters/amazon.py new file mode 100644 index 00000000..b44ef74d --- /dev/null +++ b/zoo/datacenters/amazon.py @@ -0,0 +1,187 @@ +import itertools +import pathlib + +import boto3 +from django.conf import settings +from django.db import transaction + +from .models import InfraNode, NodeKind + +config_file = pathlib.Path(settings.AWS_CONFIG_FILE) +credentials_file = pathlib.Path(settings.AWS_CREDENTIALS_FILE) + +if settings.AWS_CONFIG and not config_file.is_file(): + config_file.parents[0].mkdir(parents=True, exist_ok=True) + config_file.write_text(settings.AWS_CONFIG) + +if settings.AWS_CREDENTIALS and not credentials_file.is_file(): + credentials_file.parents[0].mkdir(parents=True, exist_ok=True) + credentials_file.write_text(settings.AWS_CREDENTIALS) + + +def _get_url(value): + return value.strip().rstrip(".").replace("\\052", "*") + + +def _get_elb_dns(value): + if value.startswith("dualstack."): + value = value[10:] + return _get_url(value) + + +def get_client(service_name): + return boto3.client(service_name) + + +def iter_hosted_zones(): + route53 = get_client("route53") + paginator = route53.get_paginator("list_hosted_zones") + + for page in paginator.paginate(): + yield from page["HostedZones"] + + +def iter_resource_record_sets(hosted_zones_ids): + route53 = get_client("route53") + for zone_id in hosted_zones_ids: + paginator = route53.get_paginator("list_resource_record_sets") + + for page in paginator.paginate(HostedZoneId=zone_id): + yield from page["ResourceRecordSets"] + + +def iter_load_balancers(names=None): + elb = get_client("elb") + + paginator = elb.get_paginator("describe_load_balancers") + + if names: + page_iterator = paginator.paginate(LoadBalancerNames=names) + else: + page_iterator = paginator.paginate() + + for page in page_iterator: + yield from page["LoadBalancerDescriptions"] + + +def iter_load_balancers_v2(names=None): + elbv2 = get_client("elbv2") + + paginator = elbv2.get_paginator("describe_load_balancers") + + if names: + page_iterator = paginator.paginate(LoadBalancerArns=names) + else: + page_iterator = paginator.paginate() + + for page in page_iterator: + yield from page["LoadBalancers"] + + +def iter_ec2_instances(instance_ids=None): + ec2 = get_client("ec2") + + paginator = ec2.get_paginator("describe_instances") + + if instance_ids: + page_iterator = paginator.paginate(InstanceIds=instance_ids) + else: + page_iterator = paginator.paginate() + + for page in page_iterator: + for reservations in page["Reservations"]: + yield from reservations["Instances"] + + +@transaction.atomic +def _map_dns_records(): + """Map DNS records (from Amazon Route 53) to the database as a set of InfraNodes. + + Creates records in the InfraNode table of the following kinds: + + - ``aws.root.dns`` - root of all Amazon DNS records + - ``aws.hostedzone.dns`` - results from Route 53 > ListHostedZones + - ``aws.recordset.dns`` - results from Route 53 > ListResourceRecordSets + """ + root = InfraNode.get_or_create_node(kind=NodeKind.AWS_ROOT_DNS, value="*") + + for zone in iter_hosted_zones(): + zone_path = InfraNode.get_or_create_node( + kind=NodeKind.AWS_HOSTED_ZONE_DNS, value=_get_url(zone["Name"]), source=root + ) + + if "AliasTarget" in zone and zone["AliasTarget"].get("DNSName"): + InfraNode.get_or_create_node( + kind=NodeKind.AWS_RECORD_SET_DNS, + value=_get_url(zone["AliasTarget"]["DNSName"]), + source=zone_path, + ) + + record_sets = iter_resource_record_sets([zone["Id"]]) + + for record_set in record_sets: + record_set_path = InfraNode.get_or_create_node( + kind=NodeKind.AWS_RECORD_SET_DNS, + value=_get_url(record_set["Name"]), + source=zone_path, + ) + + record_type = record_set.get("Type") + + if record_type == "CNAME": + value = record_set["ResourceRecords"][0]["Value"] + elif record_type == "A" and "AliasTarget" in record_set: + value = record_set["AliasTarget"]["DNSName"] + else: + continue + + if value: + InfraNode.get_or_create_node( + kind=NodeKind.AWS_RECORD_SET_DNS, + value=_get_url(value), + source=record_set_path, + ) + + +@transaction.atomic +def _map_dns_to_ec2s(): + """Map all DNS records found in the ``InfraNode`` table via ELBs to EC2 instances. + + Creates records in the InfraNode table of the following kinds: + + - ``aws.elb.dns`` - Public DNS of an Elastic Load Balancer (ELB and ELBv2) + - ``aws.ec2.dns.private`` - Private DNS of an EC2 instance + """ + record_set_nodes = InfraNode.objects.filter(kind=NodeKind.AWS_RECORD_SET_DNS).all() + + load_balancers = { + _get_elb_dns(elb["DNSName"]): elb + for elb in itertools.chain(iter_load_balancers(), iter_load_balancers_v2()) + } + ec2_instances = {ec2["InstanceId"]: ec2 for ec2 in iter_ec2_instances()} + + for record_set_node in record_set_nodes: + elb_info = load_balancers.get(record_set_node.value) + + if elb_info is None: + continue + + elb_node = InfraNode.get_or_create_node( + kind=NodeKind.AWS_ELB_DNS, + value=_get_elb_dns(elb_info["DNSName"]), + source=record_set_node, + ) + + for instance_info in elb_info.get("Instances", []): + instance = ec2_instances[instance_info["InstanceId"]] + InfraNode.get_or_create_node( + kind=NodeKind.AWS_EC2_DNS_PRIVATE, + value=instance["PrivateDnsName"], + source=elb_node, + ) + + +def map_to_nodes(): + """Map Amazon infrastructure to a set of ``InfraNode``s.""" + _map_dns_records() + _map_dns_to_ec2s() diff --git a/zoo/datacenters/apps.py b/zoo/datacenters/apps.py new file mode 100644 index 00000000..c4d94479 --- /dev/null +++ b/zoo/datacenters/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class DatacentersConfig(AppConfig): + name = "zoo.datacenters" diff --git a/zoo/datacenters/mapping.py b/zoo/datacenters/mapping.py new file mode 100644 index 00000000..3dc2fd79 --- /dev/null +++ b/zoo/datacenters/mapping.py @@ -0,0 +1,139 @@ +from collections import defaultdict + +from django.db import transaction + +from . import amazon, models, rancher +from .models import InfraNode, NodeKind + + +def url_matches_dns(url, dns_record): + return all(part in url for part in dns_record.split("*") if part) + + +@transaction.atomic +def connect_aws_rancher_nodes(): + """Connect AWS EC2 instances' DNS records to Rancher's host DNS records. + + Doesn't create any new nodes. + """ + aws_ec2_nodes = InfraNode.objects.filter(kind=NodeKind.AWS_EC2_DNS_PRIVATE).all() + rancher_host_nodes = InfraNode.objects.filter(kind=NodeKind.RANCHER_HOST_DNS).all() + + aws_ec2_dict = {node.value: node for node in aws_ec2_nodes} + for node in rancher_host_nodes: + aws_node = aws_ec2_dict.get(node.value) + if aws_node is not None: + node.sources.add(aws_node) + + +def map_infra_to_nodes(): + """Map infrastructure from all datacenters to the InfraNode table.""" + amazon.map_to_nodes() + rancher.map_to_nodes() + connect_aws_rancher_nodes() + + +class Mapper: + """Abstract class to help retrieve data from ``InfraNode``s.""" + + def __init__(self): + self._components_cache = {} + self._members_cache = {} + + def get_service_image_nodes(self, service): + image_uuid_part = f"{service.repository.owner}/{service.repository.name}" + return InfraNode.objects.filter( + kind=NodeKind.DOCKER_IMAGE_UUID, value__contains=image_uuid_part + ) + + def link_service_to_datacenters(self, service): + for image_node in self.get_service_image_nodes(service): + self.link_image_to_service(image_node, service) + + @transaction.atomic + def link_image_to_service(self, image_node, service): + raise NotImplementedError() + + +class AmazonRancherMapper(Mapper): + """Retrieve data from Amazon and Rancher infrastructure and store it.""" + + def _get_component_urls(self, component): + portrules = component.find_sources_by_kind(NodeKind.RANCHER_LB_PORTRULE_URI) + if not portrules: + return [ + node.value + for node in component.find_sources_by_kind(NodeKind.AWS_RECORD_SET_DNS) + ] + + urls = [] + for portrule_node in portrules: + for node in portrule_node.find_sources_by_kind(NodeKind.AWS_RECORD_SET_DNS): + if url_matches_dns(portrule_node.value, node.value): + urls.append(portrule_node.value) + return urls + + def _get_component_data(self, component, project): + if component.value in self._components_cache: + return self._components_cache[component.value] + + name = rancher.get_service(project.value, component.value).get("name") + urls = self._get_component_urls(component) + result = {"name": name, "urls": urls} + + self._components_cache[component.value] = result + return result + + def _get_project_members(self, project): + if project.value not in self._members_cache: + self._members_cache[project.value] = rancher.parse_members_from_project( + rancher.get_project(project.value) + ) + return self._members_cache[project.value] + + def _get_amazon_datacenters(self, component_node, service): + datacenters = [] + for elb_node in component_node.find_sources_by_kind(NodeKind.AWS_ELB_DNS): + _, zone, _ = elb_node.value.split(".", 2) + + datacenter, _ = models.Datacenter.objects.get_or_create( + provider="Amazon", region=zone + ) + service_datacenter, _ = models.ServiceDatacenter.objects.get_or_create( + service=service, datacenter=datacenter + ) + datacenters.append(service_datacenter) + return datacenters + + @transaction.atomic + def link_image_to_service(self, image_node, service): + datacenters_components = defaultdict(set) + + for component in image_node.find_sources_by_kind(NodeKind.RANCHER_SERVICE_ID): + datacenters = self._get_amazon_datacenters(component, service) + + for project in component.find_sources_by_kind(NodeKind.RANCHER_PROJ_ID): + component_data = self._get_component_data(component, project) + project_members = self._get_project_members(project) + + for service_datacenter in datacenters: + models.ServiceDatacenterComponent.objects.get_or_create( + service_datacenter=service_datacenter, **component_data + ) + # save service_datacenter id with its components for later deletion + datacenters_components[service_datacenter.id].add( + component_data["name"] + ) + for name in project_members: + models.ServiceDatacenterMember.objects.get_or_create( + service_datacenter=service_datacenter, name=name + ) + models.ServiceDatacenterMember.objects.filter( + service_datacenter=service_datacenter + ).exclude(name__in=project_members).delete() + + # delete no longer existing services in datacenters + for datacenter_id in datacenters_components: + models.ServiceDatacenterComponent.objects.filter( + service_datacenter_id=datacenter_id + ).exclude(name__in=datacenters_components[datacenter_id]).delete() diff --git a/zoo/datacenters/migrations/0001_initial.py b/zoo/datacenters/migrations/0001_initial.py new file mode 100644 index 00000000..5ab8f02f --- /dev/null +++ b/zoo/datacenters/migrations/0001_initial.py @@ -0,0 +1,170 @@ +# Generated by Django 2.1.7 on 2019-05-21 14:50 + +import arrow.api +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [("services", "0013_auto_20190515_1451")] + + operations = [ + migrations.CreateModel( + name="Datacenter", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("provider", models.CharField(max_length=100)), + ("region", models.CharField(blank=True, max_length=100, null=True)), + ], + ), + migrations.CreateModel( + name="InfraNode", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("value", models.CharField(max_length=500)), + ("kind", models.CharField(max_length=50)), + ("checked_at", models.DateTimeField(default=arrow.api.now)), + ( + "sources", + models.ManyToManyField( + related_name="targets", + related_query_name="target", + to="datacenters.InfraNode", + ), + ), + ], + ), + migrations.CreateModel( + name="ServiceDatacenter", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "datacenter", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + to="datacenters.Datacenter", + ), + ), + ( + "service", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="services.Service", + ), + ), + ], + ), + migrations.CreateModel( + name="ServiceDatacenterComponent", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=100)), + ( + "urls", + django.contrib.postgres.fields.ArrayField( + base_field=models.URLField(max_length=500), + blank=True, + default=list, + size=None, + ), + ), + ( + "service_datacenter", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="components", + related_query_name="component", + to="datacenters.ServiceDatacenter", + ), + ), + ], + ), + migrations.CreateModel( + name="ServiceDatacenterMember", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=100)), + ("email", models.CharField(max_length=100)), + ( + "service_datacenter", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="members", + related_query_name="member", + to="datacenters.ServiceDatacenter", + ), + ), + ], + ), + migrations.AddField( + model_name="datacenter", + name="services", + field=models.ManyToManyField( + related_name="datacenters", + related_query_name="datacenter", + through="datacenters.ServiceDatacenter", + to="services.Service", + ), + ), + migrations.AlterUniqueTogether( + name="servicedatacenter", unique_together={("service", "datacenter")} + ), + migrations.AlterUniqueTogether( + name="servicedatacentercomponent", + unique_together={("service_datacenter", "name")}, + ), + migrations.AlterUniqueTogether( + name="servicedatacentermember", + unique_together={("service_datacenter", "name")}, + ), + migrations.AlterUniqueTogether( + name="infranode", unique_together={("value", "kind")} + ), + migrations.AlterUniqueTogether( + name="datacenter", unique_together={("provider", "region")} + ), + ] diff --git a/zoo/datacenters/migrations/__init__.py b/zoo/datacenters/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/zoo/datacenters/models.py b/zoo/datacenters/models.py new file mode 100644 index 00000000..ac710a2e --- /dev/null +++ b/zoo/datacenters/models.py @@ -0,0 +1,118 @@ +import arrow +from django.contrib.postgres import fields as pg_fields +from django.db import models + + +def utcnow(): + return arrow.utcnow().datetime + + +class NodeKind: + AWS_ROOT_DNS = "aws.root.dns" + AWS_HOSTED_ZONE_DNS = "aws.hostedzone.dns" + AWS_RECORD_SET_DNS = "aws.recordset.dns" + AWS_ELB_DNS = "aws.elb.dns" + AWS_EC2_DNS_PRIVATE = "aws.ec2.dns.private" + + RANCHER_ROOT_PROJ = "rancher.root.proj" + RANCHER_PROJ_ID = "rancher.proj.id" + RANCHER_HOST_DNS = "rancher.host.dns" + RANCHER_LB_ID = "rancher.lb.id" + RANCHER_LB_PORTRULE_URI = "rancher.lb.portrule.uri" + RANCHER_SERVICE_ID = "rancher.service.id" + + DOCKER_IMAGE_UUID = "docker.image.uuid" + + +class InfraNode(models.Model): + sources = models.ManyToManyField( + "InfraNode", related_name="targets", related_query_name="target" + ) + value = models.CharField(max_length=500) + kind = models.CharField(max_length=50) + checked_at = models.DateTimeField(default=utcnow) + + class Meta: + unique_together = ["value", "kind"] + + def pretty_str(self, separator=""): + result = f"{separator} ({self.kind}) {self.value}\n" + for target in self.targets.order_by("value").all(): + result += target.pretty_str(separator=separator + " ") + return result + + def find_sources_by_kind(self, kind): + sources = list(self.sources.all()) + sources_of_kind = [] + visited = set() + + while sources: + source = sources.pop() + if source.id in visited: + continue + visited.add(source.id) + if source.kind == kind: + sources_of_kind.append(source) + sources += list(source.sources.all()) + + return sources_of_kind + + @classmethod + def get_or_create_node(cls, kind, value, source=None): + node, _ = cls.objects.get_or_create(kind=kind, value=value) + if source is not None: + node.sources.add(source) + node.checked_at = utcnow() + node.save(update_fields=["checked_at"]) + return node + + +class Datacenter(models.Model): + class Meta: + unique_together = ("provider", "region") + + provider = models.CharField(max_length=100) + region = models.CharField(max_length=100, null=True, blank=True) + + def __str__(self): + return f"{self.provider} {self.region}" if self.region else self.provider + + +class ServiceDatacenter(models.Model): + service = models.ForeignKey( + "services.Service", on_delete=models.CASCADE, related_name="datacenters" + ) + datacenter = models.ForeignKey("Datacenter", on_delete=models.PROTECT) + + class Meta: + unique_together = ("service", "datacenter") + + +class ServiceDatacenterMember(models.Model): + service_datacenter = models.ForeignKey( + "ServiceDatacenter", + on_delete=models.CASCADE, + related_name="members", + related_query_name="member", + ) + name = models.CharField(max_length=100) + email = models.CharField(max_length=100) + + class Meta: + unique_together = ["service_datacenter", "name"] + + +class ServiceDatacenterComponent(models.Model): + service_datacenter = models.ForeignKey( + "ServiceDatacenter", + on_delete=models.CASCADE, + related_name="components", + related_query_name="component", + ) + name = models.CharField(max_length=100) + urls = pg_fields.ArrayField( + base_field=models.URLField(max_length=500), blank=True, default=list + ) + + class Meta: + unique_together = ["service_datacenter", "name"] diff --git a/zoo/datacenters/rancher.py b/zoo/datacenters/rancher.py new file mode 100644 index 00000000..f945a89a --- /dev/null +++ b/zoo/datacenters/rancher.py @@ -0,0 +1,140 @@ +from urllib.parse import urljoin + +from django.conf import settings +from django.db import transaction + +from ..base.http import session +from .models import InfraNode, NodeKind + + +def get(path, params=None): + url = urljoin(settings.RANCHER_API_URL, path) + auth = (settings.RANCHER_ACCESS_KEY, settings.RANCHER_SECRET_KEY) + + resp = session.get(url, auth=auth, params=params) + resp.raise_for_status() + return resp.json() + + +def iter_get(path): + content = get(path) + + while content.get("data"): + yield from content["data"] + next_page_url = (content.get("pagination") or {}).get("next") + + if next_page_url is None: + break + content = get(next_page_url) + + +def iter_projects(): + yield from iter_get("projects") + + +def get_project(project_id): + return get(f"projects/{project_id}") + + +def iter_services(project_id): + yield from iter_get(f"projects/{project_id}/services/") + + +def get_service(project_id, service_id): + return get(f"projects/{project_id}/services/{service_id}") + + +def iter_load_balancers(project_id): + yield from iter_get(f"projects/{project_id}/loadbalancerservices/") + + +def iter_hosts(project_id): + yield from iter_get(f"projects/{project_id}/hosts/") + + +def parse_members_from_project(project_data): + members = [] + for member in project_data["members"]: + external_id = member.get("externalId") + if external_id is None: + continue + + data = dict(item.split("=", 1) for item in external_id.split(",")) + members.append(data.get("cn")) + return members + + +def _map_lb_to_services(lb, hosts, project_node): + lb_node = InfraNode.get_or_create_node(kind=NodeKind.RANCHER_LB_ID, value=lb["id"]) + for endpoint in lb["publicEndpoints"]: + host_node = InfraNode.get_or_create_node( + kind=NodeKind.RANCHER_HOST_DNS, + value=hosts[endpoint["hostId"]], + source=project_node, + ) + lb_node.sources.add(host_node) + + service_nodes = [] + for rule in lb.get("lbConfig", {}).get("portRules", []): + if not rule["protocol"].startswith("http"): + continue + + uri = ( + rule["hostname"] + if rule["sourcePort"] == 80 + else f"{rule['hostname']}:{rule['sourcePort']}" + ) + portrule_node = InfraNode.get_or_create_node( + kind=NodeKind.RANCHER_LB_PORTRULE_URI, value=uri, source=lb_node + ) + service_node = InfraNode.get_or_create_node( + kind=NodeKind.RANCHER_SERVICE_ID, + value=rule["serviceId"], + source=portrule_node, + ) + service_nodes.append(service_node) + + return service_nodes + + +@transaction.atomic +def map_to_nodes(): + """Map Rancher projects (i.e. Rancher environments) to Rancher services. + + Creates records in the InfraNode table of the following kinds: + + - ``rancher.root.proj`` - root node for all Rancher projects + - ``rancher.proj.id`` - Rancher project ID + - ``rancher.host.dns`` - Rancher host's DNS + - ``rancher.lb.id`` - Rancher load balancer service's ID + - ``rancher.lb.portrule.uri`` - Port rule of a Rancher load balancer service + - ``rancher.service.id`` - Rancher service ID + - ``docker.image.uuid`` - Docker image UUID + """ + root = InfraNode.get_or_create_node(kind=NodeKind.RANCHER_ROOT_PROJ, value="*") + + for project in iter_projects(): + project_node = InfraNode.get_or_create_node( + kind=NodeKind.RANCHER_PROJ_ID, value=project["id"], source=root + ) + hosts = { + host["id"]: host["hostname"] + for host in iter_hosts(project_id=project["id"]) + } + services = { + service["id"]: service["launchConfig"]["imageUuid"] + for service in iter_services(project_id=project["id"]) + if service.get("launchConfig", {}).get("imageUuid") + } + + all_service_nodes = [] + + for lb in iter_load_balancers(project_id=project["id"]): + all_service_nodes += _map_lb_to_services(lb, hosts, project_node) + + for service_node in all_service_nodes: + InfraNode.get_or_create_node( + kind=NodeKind.DOCKER_IMAGE_UUID, + value=services[service_node.value], + source=service_node, + ) diff --git a/zoo/datacenters/tasks.py b/zoo/datacenters/tasks.py new file mode 100644 index 00000000..47cd77f4 --- /dev/null +++ b/zoo/datacenters/tasks.py @@ -0,0 +1,25 @@ +import arrow +from celery import shared_task + +from ..services.models import Service +from .mapping import AmazonRancherMapper, map_infra_to_nodes +from .models import InfraNode + + +@shared_task +def link_service_to_datacenters(service_id): + service = Service.objects.get(id=service_id) + mapper = AmazonRancherMapper() + mapper.link_service_to_datacenters(service) + + +@shared_task +def schedule_infra_mapping(): + map_infra_to_nodes() + + InfraNode.objects.filter( + checked_at__lt=arrow.utcnow().shift(hours=-1).datetime + ).delete() + + for service in Service.objects.all(): + link_service_to_datacenters.delay(service.id)