diff --git a/.copilot/config.yml b/.copilot/config.yml new file mode 100644 index 000000000..ea47594a0 --- /dev/null +++ b/.copilot/config.yml @@ -0,0 +1,4 @@ +repository: data-hub-api +builder: + name: paketobuildpacks/builder-jammy-full + version: 0.3.339 diff --git a/.copilot/image_build_run.sh b/.copilot/image_build_run.sh new file mode 100755 index 000000000..d0f14e4f7 --- /dev/null +++ b/.copilot/image_build_run.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# Exit early if something goes wrong +set -e + +# Add commands below to run inside the container after all the other buildpacks have been applied +export ADMIN_OAUTH2_ENABLED="True" +export ADMIN_OAUTH2_BASE_URL="" +export ADMIN_OAUTH2_TOKEN_FETCH_PATH="/o/token/" +export ADMIN_OAUTH2_USER_PROFILE_PATH="/o/v1/user/me/" +export ADMIN_OAUTH2_AUTH_PATH="/o/authorize/" +export ADMIN_OAUTH2_CLIENT_ID="client-id" +export ADMIN_OAUTH2_CLIENT_SECRET="client-secret" +export ADMIN_OAUTH2_LOGOUT_PATH="/o/logout" +export ACTIVITY_STREAM_ACCESS_KEY_ID="some-id" +export ACTIVITY_STREAM_SECRET_ACCESS_KEY="some-secret" +export DATABASE_URL="postgresql://postgres:datahub@postgres/datahub" +export DEBUG="True" +export DJANGO_SECRET_KEY="changeme" +export DJANGO_SETTINGS_MODULE="config.settings.local" +export ES_INDEX_PREFIX="test_index" +export ES5_URL="http://localhost:9200" +export OPENSEARCH_URL="http://localhost:9200" +export OPENSEARCH_INDEX_PREFIX="test_index" +export PAAS_IP_ALLOWLIST="1.2.3.4" +export AWS_DEFAULT_REGION="eu-west-2" +export AWS_ACCESS_KEY_ID="foo" +export AWS_SECRET_ACCESS_KEY="bar" +export DEFAULT_BUCKET="baz" +export SSO_ENABLED="True" +export STAFF_SSO_BASE_URL="http://sso.invalid/" +export STAFF_SSO_AUTH_TOKEN="sso-token" +export DIT_EMAIL_DOMAINS="trade.gov.uk,digital.trade.gov.uk" +export DATA_HUB_FRONTEND_ACCESS_KEY_ID="frontend-key-id" +export DATA_HUB_FRONTEND_SECRET_ACCESS_KEY="frontend-key" +export ES_APM_ENABLED="False" +export ES_APM_SERVICE_NAME="datahub" +export ES_APM_SECRET_TOKEN="" +export ES_APM_SERVER_URL="http://localhost:8200" +export ES_APM_ENVIRONMENT="circleci" +export REDIS_BASE_URL="redis://localhost:6379" + +python manage.py collectstatic --noinput diff --git a/.copilot/phases/build.sh b/.copilot/phases/build.sh new file mode 100644 index 000000000..c1a73f284 --- /dev/null +++ b/.copilot/phases/build.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Exit early if something goes wrong +set -e + +# Add commands below to run as part of the build phase diff --git a/.copilot/phases/install.sh b/.copilot/phases/install.sh new file mode 100644 index 000000000..17794e84a --- /dev/null +++ b/.copilot/phases/install.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Exit early if something goes wrong +set -e + +# Add commands below to run as part of the install phase diff --git a/.copilot/phases/post_build.sh b/.copilot/phases/post_build.sh new file mode 100644 index 000000000..1676bb5b3 --- /dev/null +++ b/.copilot/phases/post_build.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Exit early if something goes wrong +set -e + +# Add commands below to run as part of the post_build phase diff --git a/.copilot/phases/pre_build.sh b/.copilot/phases/pre_build.sh new file mode 100644 index 000000000..2d6b40b5c --- /dev/null +++ b/.copilot/phases/pre_build.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Exit early if something goes wrong +set -e + +# Add commands below to run as part of the pre_build phase diff --git a/config/settings/common_logging.py b/config/settings/common_logging.py index 220f50771..1153ec310 100644 --- a/config/settings/common_logging.py +++ b/config/settings/common_logging.py @@ -1,6 +1,7 @@ import sys import sentry_sdk -from django_log_formatter_ecs import ECSFormatter +from django_log_formatter_asim import ASIMFormatter + from sentry_sdk.integrations.django import DjangoIntegration from config.settings.common import * @@ -13,30 +14,30 @@ 'verbose': { 'format': '%(asctime)s [%(levelname)s] [%(name)s] %(message)s' }, - 'ecs_formatter': { - '()': ECSFormatter, + "asim_formatter": { + "()": ASIMFormatter, }, }, 'handlers': { - 'ecs': { + 'asim': { 'class': 'logging.StreamHandler', - 'formatter': 'ecs_formatter', + 'formatter': 'asim_formatter', 'stream': sys.stdout, }, }, 'root': { 'level': 'INFO', - 'handlers': ['ecs'], + 'handlers': ['asim'], }, 'loggers': { 'django': { 'level': 'INFO', - 'handlers': ['ecs'], + 'handlers': ['asim'], 'propagate': False, }, 'django.db.backends': { 'level': 'ERROR', - 'handlers': ['ecs'], + 'handlers': ['asim'], 'propagate': False, }, }, diff --git a/config/urls.py b/config/urls.py index 61be87132..f5a6003f0 100644 --- a/config/urls.py +++ b/config/urls.py @@ -24,7 +24,7 @@ path('', include('datahub.oauth.admin.urls')), *admin_oauth2_urls, path('admin/', admin.site.urls), - path('ping.xml', ping, name='ping'), + path('pingdom/ping.xml', ping, name='ping'), path('whoami/', who_am_i, name='who_am_i'), ] diff --git a/data_generator.py b/data_generator.py index e31b92bd5..467962800 100644 --- a/data_generator.py +++ b/data_generator.py @@ -22,14 +22,23 @@ pre_save, ) +from datahub.company.models.adviser import Advisor +from datahub.company.models.company import Company +from datahub.company.models.contact import Contact from datahub.company.test.factories import ( AdviserFactory, ArchivedCompanyFactory, + # ArchivedCompanyFactory, CompanyFactory, CompanyWithAreaFactory, + DuplicateCompanyFactory, + # CompanyWithAreaFactory, ContactFactory, + ContactWithOwnAddressFactory, + ContactWithOwnAreaFactory, SubsidiaryFactory, ) +from datahub.metadata.models import Team class DisableSignals: @@ -68,50 +77,103 @@ def reconnect(self, signal): with DisableSignals(): start_time = time.time() + # Pre fetch Metadata + teams = list(Team.objects.all()) + + advisers = Advisor.objects.all() + + contact = Contact.objects.all() + # In February 2024 there were 18,000 advisers, 500,000 companies, and 950,000 contacts. # Alter number of adivsers below to create larger or smaller data set. - advisers = AdviserFactory.create_batch(200) - print(f'Generated {len(advisers)} advisers') # noqa + # Generate Advisers + print('Generating advisers') # noqa + for index in range(10): + AdviserFactory(dit_team=random.choice(teams)) + if index % 10 == 0: + print('.', end='') # noqa + advisers = Advisor.objects.all() + + print(f'Generated {advisers.count} advisers') # noqa + + # # Generate base companies + print('\nGenerating Companies') # noqa for index, adviser in enumerate(advisers): - companies = CompanyFactory.create_batch( - random.randint(1, 25), + CompanyFactory.create_batch( + random.randint(0, 25), created_by=adviser, - modified_by=adviser, + modified_by=random.choice(advisers), ) + if index % 10 == 0: + print('.', end='') # noqa - # The ratios of the below types of companies do not reflect the live database. - companies.extend( - SubsidiaryFactory.create_batch( - random.randint(1, 5), - created_by=adviser, - modified_by=adviser, - ), + + def generateContacts(advisers, min, max): + print('\nGenerating contacts on advisers') + for index, adviser in enumerate(advisers): + ContactFactory.create_batch( + random.randint(min, max), + created_by=random.choice(advisers), + modified_by=random.choice(advisers), + ) + + print('\nGenerating contacts on advisers with a different address from company') + for index, adviser in enumerate(advisers): + ContactWithOwnAddressFactory.create_batch( + random.randint(min, max), + created_by=random.choice(advisers), + modified_by=random.choice(advisers), + ) + + print('\nGenerating contacts on advisers with a different address from the contact company that includes an ' + 'area') + for index, adviser in enumerate(advisers): + ContactWithOwnAreaFactory.create_batch( + random.randint(min, max), + created_by=random.choice(advisers), + modified_by=random.choice(advisers), + ) + + print('\nGenerating Company variations') # noqa + companies = Company.objects.all() + # The ratios of the below types of companies do not reflect the live database. + # Generate different type of companies + for index, adviser in enumerate(advisers): + SubsidiaryFactory.create_batch( + random.randint(0, 25), + created_by=adviser, + modified_by=random.choice(advisers), + global_headquarters=random.choice(companies), + ) + CompanyWithAreaFactory.create_batch( + random.randint(0, 1), + created_by=adviser, + modified_by=random.choice(advisers), ) - companies.extend( - CompanyWithAreaFactory.create_batch( - random.randint(0, 1), - created_by=adviser, - modified_by=adviser, - ), + ArchivedCompanyFactory.create_batch( + random.randint(0, 1), + created_by=adviser, + modified_by=adviser, ) - companies.extend( - ArchivedCompanyFactory.create_batch( - random.randint(0, 1), - created_by=adviser, - modified_by=adviser, - ), + DuplicateCompanyFactory.create_batch( + random.randint(0, 1), + created_by=adviser, + modified_by=adviser, + transferred_by=random.choice(advisers), + transferred_to=random.choice(companies), ) + # Show a sign of life every now and then if index % 10 == 0: print('.', end='') # noqa # The below ratio of contacts to companies does not reflect the live database. - for company in companies: - ContactFactory.create_batch( - random.randint(1, 2), - company=company, - created_by=adviser, - ) + # for company in companies: + # ContactFactory.create_batch( + # random.randint(1, 2), + # company=company, + # created_by=adviser, + # ) elapsed = time.time() - start_time print(f'{timedelta(seconds=elapsed)}') # noqa diff --git a/datahub/core/management/commands/rq_health_check.py b/datahub/core/management/commands/rq_health_check.py new file mode 100644 index 000000000..456fa6d35 --- /dev/null +++ b/datahub/core/management/commands/rq_health_check.py @@ -0,0 +1,42 @@ +import sys + +from functools import reduce +from logging import getLogger +from operator import concat + +from django.conf import settings +from django.core.management.base import BaseCommand +from redis import Redis +from rq import Worker + + +logger = getLogger(__name__) + + +class Command(BaseCommand): + help = 'RQ Health Check' + + def add_arguments(self, parser): + """Define extra arguments.""" + parser.add_argument( + '--queue', + type=str, + help='Name of the queue to perform health check on.', + ) + + def handle(self, *args, **options): + if options['queue']: + queue = str(options['queue']) + redis = Redis.from_url(settings.REDIS_BASE_URL) + workers = Worker.all(connection=redis) + queue_names = reduce(concat, [worker.queue_names() for worker in workers], []) + missing_queues = set([queue]) - set(queue_names) + + if missing_queues: + logger.error(f'RQ queue not running: {missing_queues}') + sys.exit(1) + logger.info('OK') + sys.exit(0) + + logger.error('Nothing checked! Please provide --queue parameter') + sys.exit(1) diff --git a/datahub/core/test/management/commands/test_rq_health_check.py b/datahub/core/test/management/commands/test_rq_health_check.py new file mode 100644 index 000000000..e42e2e631 --- /dev/null +++ b/datahub/core/test/management/commands/test_rq_health_check.py @@ -0,0 +1,63 @@ +import logging +from unittest import mock +from unittest.mock import patch + +import pytest + +from django.core.management import call_command + + +class MockWorker: + """ + Mock queue names object returned by worker + """ + + queue_name = '' + + def __init__(self, queue_name, *args, **kwargs): + self.queue_name = queue_name + + def queue_names(self): + return self.queue_name + + +def test_rq_health_check_ok(): + logger = logging.getLogger('datahub.core.management.commands.rq_health_check') + with patch( + 'datahub.core.management.commands.rq_health_check.Worker.all', + return_value=[MockWorker(['short-running']), MockWorker(['long-running'])], + ): + with mock.patch.object(logger, 'info') as mock_info: + with pytest.raises(SystemExit) as exception_info: + call_command('rq_health_check', '--queue=short-running') + + assert exception_info.value.code == 0 + assert 'OK' in str(mock_info.call_args_list) + assert mock_info.call_count == 1 + + +def test_rq_health_check_rq_not_running(): + logger = logging.getLogger('datahub.core.management.commands.rq_health_check') + with patch( + 'datahub.core.management.commands.rq_health_check.Worker.all', + return_value=[MockWorker(['long-running'])], + ): + with mock.patch.object(logger, 'error') as mock_error: + with pytest.raises(SystemExit) as exception_info: + call_command('rq_health_check', '--queue=short-running') + + assert exception_info.value.code == 1 + assert "RQ queue not running: {\'short-running\'}" in str(mock_error.call_args_list) + assert mock_error.call_count == 1 + + +def test_command_called_without_parameter(): + logger = logging.getLogger('datahub.core.management.commands.rq_health_check') + with mock.patch.object(logger, 'error') as mock_error: + with pytest.raises(SystemExit) as exception_info: + call_command('rq_health_check') + + assert exception_info.value.code == 1 + assert 'Nothing checked! Please provide --queue parameter' \ + in str(mock_error.call_args_list) + assert mock_error.call_count == 1 diff --git a/datahub/ping/test/test_ping_view.py b/datahub/ping/test/test_ping_view.py index 9bd529c14..f725620de 100644 --- a/datahub/ping/test/test_ping_view.py +++ b/datahub/ping/test/test_ping_view.py @@ -1,5 +1,9 @@ +from unittest.mock import patch + import pytest +from django.db import DatabaseError + from rest_framework import status from rest_framework.reverse import reverse @@ -10,6 +14,20 @@ def test_all_good(client): """Test all good.""" url = reverse('ping') response = client.get(url) + assert response.status_code == status.HTTP_200_OK assert 'OK' in str(response.content) assert response.headers['content-type'] == 'text/xml' + + +def test_check_database_fail(client): + url = reverse('ping') + with patch( + 'datahub.ping.services.Company.objects.all', + side_effect=DatabaseError('No database'), + ): + response = client.get(url) + + assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR + assert 'FALSE' in str(response.content) + assert response.headers['content-type'] == 'text/xml' diff --git a/docker-compose-minimal.yml b/docker-compose-minimal.yml index e2b514c76..8b9e7a506 100644 --- a/docker-compose-minimal.yml +++ b/docker-compose-minimal.yml @@ -1,7 +1,7 @@ version: '3.8' services: postgres: - image: postgres:12 + image: postgres:16 restart: always environment: - POSTGRES_DB=datahub diff --git a/docker-compose.yml b/docker-compose.yml index b86e489c7..165d1e76b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,16 @@ services: depends_on: - api command: python short-running-worker.py + healthcheck: + test: + [ + "CMD-SHELL", + "python ./manage.py rq_health_check --queue=short-running" + ] + interval: 10s + timeout: 5s + retries: 2 + start_period: 5s rq_long: build: @@ -37,6 +47,16 @@ services: depends_on: - api command: python long-running-worker.py + healthcheck: + test: + [ + "CMD-SHELL", + "python ./manage.py rq_health_check --queue=long-running" + ] + interval: 10s + timeout: 5s + retries: 2 + start_period: 5s rq_sched: build: @@ -50,7 +70,7 @@ services: command: python cron-scheduler.py postgres: - image: postgres:12 + image: postgres:16 restart: always ports: - "5432:5432" @@ -59,7 +79,7 @@ services: - POSTGRES_PASSWORD=datahub opensearch: - image: opensearchproject/opensearch:1.2.4 + image: opensearchproject/opensearch:2.11.0 environment: - plugins.security.disabled=true - cluster.name=cluster-001 @@ -70,7 +90,7 @@ services: - "9200:9200" redis: - image: redis:6.2.6 + image: redis:7.2.4 restart: always ports: - "6379:6379" @@ -86,4 +106,3 @@ services: RQ_REDIS_HOST: "redis" depends_on: - redis - diff --git a/requirements-dev.txt b/requirements-dev.txt index 7f9712b9c..dc7125045 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --output-file=requirements-dev.txt requirements-dev.in @@ -15,7 +15,9 @@ astroid==3.1.0 asttokens==2.2.1 # via stack-data async-timeout==4.0.2 - # via redis + # via + # aiohttp + # redis attrs==22.2.0 # via # aiohttp @@ -65,6 +67,7 @@ django==4.2.11 # django-extensions # django-filter # django-js-asset + # django-log-formatter-asim # django-redis # django-reversion # djangorestframework @@ -80,11 +83,9 @@ django-extensions==3.2.3 # via -r requirements.in django-filter==24.1 # via -r requirements.in -django-ipware==3.0.7 - # via django-log-formatter-ecs django-js-asset==2.0.0 # via django-mptt -django-log-formatter-ecs==0.0.5 +django-log-formatter-asim==0.0.4 # via -r requirements.in django-mptt==0.16.0 # via -r requirements.in @@ -102,6 +103,10 @@ ecs-logging==2.0.2 # via elastic-apm elastic-apm==6.21.4.post8347027212 # via -r requirements.in +exceptiongroup==1.2.0 + # via + # ipython + # pytest execnet==1.9.0 # via pytest-xdist executing==1.2.0 @@ -171,14 +176,10 @@ jmespath==1.0.1 # via # boto3 # botocore -kubi-ecs-logger==0.1.2 - # via django-log-formatter-ecs mail-parser==3.15.0 # via -r requirements.in markupsafe==2.1.2 # via werkzeug -marshmallow==3.19.0 - # via kubi-ecs-logger matplotlib-inline==0.1.6 # via ipython mccabe==0.7.0 @@ -206,7 +207,6 @@ opensearch-py==2.4.2 packaging==22.0 # via # build - # marshmallow # pytest # python-redis-rate-limit pandas==2.2.1 @@ -228,7 +228,9 @@ platformdirs==2.6.0 pluggy==1.4.0 # via pytest pre-commit==3.6.2 - # via -r requirements-dev.in + # via + # -r requirements-dev.in + # django-log-formatter-asim prompt-toolkit==3.0.41 # via ipython psycogreen==1.0.2 @@ -364,6 +366,14 @@ stack-data==0.6.2 # via ipython statsd==4.0.1 # via -r requirements.in +tomli==2.0.1 + # via + # build + # coverage + # pip-tools + # pylint + # pyproject-hooks + # pytest tomlkit==0.11.6 # via pylint tqdm==4.66.2 @@ -372,6 +382,8 @@ traitlets==5.14.1 # via # ipython # matplotlib-inline +typing-extensions==4.10.0 + # via astroid tzdata==2023.3 # via pandas uritemplate==4.1.1 diff --git a/requirements.in b/requirements.in index 5e235da2c..a583bc8f6 100644 --- a/requirements.in +++ b/requirements.in @@ -6,7 +6,7 @@ django-csp==3.8 django-environ==0.11.2 django-extensions==3.2.3 django-filter==24.1 -django-log-formatter-ecs==0.0.5 +django-log-formatter-asim==0.0.4 django-mptt==0.16.0 django-pglocks==1.0.4 django-reversion==5.0.12 diff --git a/requirements.txt b/requirements.txt index b67cc0214..8621e8477 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --output-file=requirements.txt requirements.in @@ -11,7 +11,9 @@ aiosignal==1.3.1 asgiref==3.6.0 # via django async-timeout==4.0.2 - # via redis + # via + # aiohttp + # redis attrs==22.2.0 # via aiohttp bigtree==0.16.4 @@ -28,6 +30,8 @@ certifi==2023.7.22 # opensearch-py # requests # sentry-sdk +cfgv==3.4.0 + # via pre-commit chardet==5.2.0 # via -r requirements.in charset-normalizer==2.1.1 @@ -36,6 +40,8 @@ click==8.1.3 # via rq crontab==1.0.0 # via rq-scheduler +distlib==0.3.8 + # via virtualenv django==4.2.11 # via # -r requirements.in @@ -45,6 +51,7 @@ django==4.2.11 # django-extensions # django-filter # django-js-asset + # django-log-formatter-asim # django-redis # django-reversion # djangorestframework @@ -60,11 +67,9 @@ django-extensions==3.2.3 # via -r requirements.in django-filter==24.1 # via -r requirements.in -django-ipware==3.0.7 - # via django-log-formatter-ecs django-js-asset==2.0.0 # via django-mptt -django-log-formatter-ecs==0.0.5 +django-log-formatter-asim==0.0.4 # via -r requirements.in django-mptt==0.16.0 # via -r requirements.in @@ -82,6 +87,8 @@ ecs-logging==2.0.2 # via elastic-apm elastic-apm==6.21.4.post8347027212 # via -r requirements.in +filelock==3.13.3 + # via virtualenv freezegun==1.4.0 # via rq-scheduler frozenlist==1.3.3 @@ -94,6 +101,8 @@ greenlet==3.0.1 # via gevent icalendar==5.0.12 # via -r requirements.in +identify==2.5.35 + # via pre-commit idna==3.4 # via # requests @@ -102,20 +111,18 @@ jmespath==1.0.1 # via # boto3 # botocore -kubi-ecs-logger==0.1.2 - # via django-log-formatter-ecs mail-parser==3.15.0 # via -r requirements.in markupsafe==2.1.2 # via werkzeug -marshmallow==3.19.0 - # via kubi-ecs-logger mohawk==1.1.0 # via -r requirements.in multidict==6.0.4 # via # aiohttp # yarl +nodeenv==1.8.0 + # via pre-commit notifications-python-client==9.0.0 # via -r requirements.in numpy==1.24.3 @@ -127,11 +134,13 @@ opensearch-py==2.4.2 # -r requirements.in # opensearch-dsl packaging==22.0 - # via - # marshmallow - # python-redis-rate-limit + # via python-redis-rate-limit pandas==2.2.1 # via -r requirements.in +platformdirs==4.2.0 + # via virtualenv +pre-commit==3.7.0 + # via django-log-formatter-asim psycogreen==1.0.2 # via -r requirements.in psycopg2-binary==2.9.9 @@ -156,7 +165,9 @@ pytz==2022.7 # icalendar # pandas pyyaml==6.0.1 - # via -r requirements.in + # via + # -r requirements.in + # pre-commit redis==4.5.4 # via # django-redis @@ -211,6 +222,8 @@ urllib3==1.26.18 # opensearch-py # requests # sentry-sdk +virtualenv==20.25.1 + # via pre-commit werkzeug==3.0.1 # via -r requirements.in whitenoise==6.6.0 diff --git a/web.sh b/web.sh index f4a4b4491..29bdce8f8 100755 --- a/web.sh +++ b/web.sh @@ -13,5 +13,11 @@ if [ -z "$SKIP_OPENSEARCH_MAPPING_MIGRATIONS" ] && [ "${CF_INSTANCE_INDEX:-0}" = ./manage.py migrate_search fi -python manage.py collectstatic --noinput -python app.py +if [ -n "${COPILOT_ENVIRONMENT_NAME}" ]; then + echo "Running in DBT Platform" + python app.py +else + echo "Running in Cloud Foundry" + python manage.py collectstatic --noinput + python app.py +fi