From 141d57b72cb484be93dc81588f23e0008353f9be Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Mon, 1 Dec 2025 22:59:59 +0000 Subject: [PATCH 1/3] Prevent deletion of `Handle` instances via ORM. `Codelist` are created with one `Handle`. They may gain more later, but business logic in `actions.py` does not permit deletion of any of them, and parts of the application may expect that there is always a current `Handle` for a `Codelist`. So we ought not permit it in application code. The only models with deletion in `codelists/actions.py` or `builder/actions.py` are `Codelist` (when discarding the last draft version), `CodelistVersion` (when discarding drafts), and `Search` (when deleting a search in the builder). We don't think that we have application code that does this (see `actions.py`) but have observered it twice in production in recent months, see parent issue https://github.com/opensafely-core/opencodelists/issues/2893. This also helps avoid any use of the developer Django shell to delete `Handles`. We enforce this constraint only in the Django model layer. That does not stop `Handles` being deleted by other means such as via direct SQL, such as via `on_delete=models.CASCADE`, see https://docs.djangoproject.com/en/5.2/topics/db/queries/#deleting-objects > Keep in mind that this will, whenever possible, be executed purely in SQL, > and so the delete() methods of individual object instances will not > necessarily be called during the process. That includes `Handle` when deleted via `Codelist` `CASCADE` as they do not have any dependent models, triggers etc. See https://github.com/django/django/blob/cb1d2854ed2b13799f2b0cc6e04019df181bacd4/django/db/models/deletion.py#L501 Trying locally via deleting a codelist draft in the web UI led to: ``` [debug ] (0.007) DELETE FROM "codelists_handle" WHERE "codelists_handle"."codelist_id" IN (9979); args=(9979,); alias=default [django.db.backends] ``` ... and I can find and delete multiple codelist with multiple handles in the Django shell, which deletes the handles via cascade: ``` >>> from django.db.models import Count >>> five_handle=Codelist.objects.annotate(hcount=Count("handles")).filter(hcount=5) >>> len(five_handle) 2 >>> five_handle.delete() >>> five_handle=Codelist.objects.annotate(hcount=Count("handles")).filter(hcount=5) >>> len(five_handle) 0 ``` ... and I cannot delete via the handle instances or `QuerySet` methods directly in the Django shell: ``` >>> three_handle_cl=Codelist.objects.annotate(hcount=Count("handles")).filter(hcount=3).first() >>> three_handle_cl.handles.all().delete() codelists.models.DeleteHandleException: Bulk deletion of Handle instances via the ORM is not permitted. Attempted on QuerySet `, , ]>` >>> three_handle_cl.handles.all().first().delete() codelists.models.DeleteHandleException: May not delete handles - attempted on `Handle object (2800)` for codelist `BNF codes for NSAIDs (Medication Safety Indicator GIB01)` ``` Added automated tests for the overriden methods and manager queryset. There are already automated tests for discarding drafts etc. in `builder/tests/test_views.py`: - test_discard_only_draft_version - test_discard_one_draft_version --- codelists/models.py | 35 ++++++++++++++++++++++++++++++++++ codelists/tests/test_api.py | 11 ++++++++--- codelists/tests/test_models.py | 12 +++++++++++- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/codelists/models.py b/codelists/models.py index 25ad8315..2c2748ec 100644 --- a/codelists/models.py +++ b/codelists/models.py @@ -4,6 +4,7 @@ from django.core.validators import MinLengthValidator, RegexValidator from django.db import models from django.db.models import Q +from django.db.models.query import QuerySet from django.urls import reverse from django.utils.functional import cached_property from taggit.managers import TaggableManager @@ -217,6 +218,19 @@ def has_published_versions(self): return self.versions.filter(status="published").exists() +class DeleteHandleException(Exception): + """Raised when attempting to delete Handle via the ORM.""" + + +class NoDeleteHandleQuerySet(QuerySet): + def delete(self): + """Prevent deletion of Handle QuerySets via ORM.""" + raise DeleteHandleException( + "Bulk deletion of Handle instances via the ORM is not permitted. " + f"Attempted on QuerySet `{self}`" + ) + + class Handle(models.Model): codelist = models.ForeignKey( "Codelist", on_delete=models.CASCADE, related_name="handles" @@ -271,6 +285,27 @@ class Meta: def owner(self): return self.organisation or self.user + # Attempt to stop deletion of Handle instances via the ORM. + # Codelist are created with one Handle. They may gain more later, but + # business logic in actions.py does not permit deletion of any of them, + # and parts of the application may expect that there is always a current + # Handle for a Codelist. + # Note that this does not stop deletion via on_delete=models.CASCADE, see + # https://docs.djangoproject.com/en/5.2/topics/db/queries/#deleting-objects + # We enforce this constraint only in the Django model layer. That does not + # stop Handles being deleted by other means such as via direct SQL, such + # as via a cascade. + + # Prevent deletion via QuerySet. + objects = NoDeleteHandleQuerySet.as_manager() + + # Prevent deletion via instance method. + def delete(self, using=None, keep_parents=False): + """Prevent deletion of instances via the ORM.""" + raise DeleteHandleException( + f"May not delete handles - attempted on `{self}` for codelist `{self.codelist}`" + ) + class Status(models.TextChoices): DRAFT = "draft" # the version is being edited in the builder diff --git a/codelists/tests/test_api.py b/codelists/tests/test_api.py index 3b8fcccb..7b274d61 100644 --- a/codelists/tests/test_api.py +++ b/codelists/tests/test_api.py @@ -3,9 +3,10 @@ from datetime import datetime import pytest +from django.db import connection from codelists.actions import update_codelist -from codelists.models import Codelist +from codelists.models import Codelist, Handle from mappings.dmdvmpprevmap.models import Mapping as VmpPrevMapping from opencodelists.tests.assertions import assert_difference, assert_no_difference @@ -272,9 +273,13 @@ def test_codelists_get_all_still_works_for_a_codelist_with_a_missing_handle( # We found a Codelist without an associated Handle, # which broke this API endpoint. - # Delete the Handle for one codelist. + # Delete the Handle for one codelist. Via SQL as we disallow via ORM. codelist_to_break = Codelist.objects.get(handles__name="User Codelist From Scratch") - codelist_to_break.current_handle.delete() + table = Handle._meta.db_table + with connection.cursor() as cursor: + cursor.execute( + f'DELETE FROM "{table}" WHERE id = {codelist_to_break.current_handle.pk}' + ) rsp = client.get("/api/v1/codelist/?include-users") assert rsp.status_code == 200 diff --git a/codelists/tests/test_models.py b/codelists/tests/test_models.py index 2bcb5f1c..d8d0297e 100644 --- a/codelists/tests/test_models.py +++ b/codelists/tests/test_models.py @@ -3,7 +3,7 @@ from django.db.utils import IntegrityError from builder import actions as builder_actions -from codelists.models import CodelistVersion, Handle +from codelists.models import CodelistVersion, DeleteHandleException, Handle def test_handle_cannot_belong_to_user_and_organisation(codelist, user, organisation): @@ -150,6 +150,16 @@ def test_handle_name_validation_valid(user_codelist): assert clean_name == name +def test_handle_delete_not_allowed_method(codelist): + with pytest.raises(DeleteHandleException): + codelist.current_handle.delete() + + +def test_handle_delete_not_allowed_queryset(codelist): + with pytest.raises(DeleteHandleException): + Handle.objects.first().delete() + + def test_old_style_codes(old_style_version): assert old_style_version.codes == ( "128133004", From 434b13b2c13fc778bac299e970f4a517c3658426 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Tue, 2 Dec 2025 15:11:49 +0000 Subject: [PATCH 2/3] Migration to remove `Codelist` with no `Handle`. Per https://github.com/opensafely-core/opencodelists/pull/2919/ and https://github.com/opensafely-core/opencodelists/issues/2893 `Codelist` should not be able to exist without `Handle`. We found an example, let's clean it up. Let's do this in a migration in a new maintenance app, so that the fix is auditable, reviewable, repeatable, testable, avoids the possibility of manual error, and fixes up development environment databases as well. This also provides an example if we need to do this or other maintenance again. And it avoids cluttering the feature apps and getting squashed. The migration is idempotent and has a noop reversal. Manual testing with the latest sanitised backup (with judicious snipping of noise throughout): Before the migration migrations state: ``` ~/opencodelists$ just manage showmigrations ... maintenance [ ] 0001_delete_codelists_without_handles ``` Before state in the Django shell, 1 such codelist exists: ``` >>> len(Codelist.objects.all()) 10419 >>> no_handles = Codelist.objects.filter(handles__isnull=True) >>> len(no_handles) 1 ``` Applying the migration: ``` just manage migrate maintenance 0001 Running migrations: Applying maintenance.0001_delete_codelists_without_handles... 0001: Deleted 1 codelists without handles just manage showmigrations ... maintenance [X] 0001_delete_codelists_without_handles ``` After state in the Django shell, no such codelist exists and only 1 codelist has been deleted: ``` len(Codelist.objects.all()) 10418 >>> no_handles = Codelist.objects.filter(handles__isnull=True) >>> len(no_handles) 0 ``` Re-applying a very similr migration based on the same file and depending on it (not committed), to to test the `except` branch: ``` just manage migrate maintenance 0002 0002: No codelists without handles to delete ``` This safely demonstrates the migration works as expected on near-production data and also in databases without the faulty codelists. This isn't part of ongoing application code so it does not need automated testing. --- maintenance/__init__.py | 0 .../0001_delete_codelists_without_handles.py | 30 +++++++++++++++++++ maintenance/migrations/__init__.py | 0 opencodelists/settings.py | 1 + 4 files changed, 31 insertions(+) create mode 100644 maintenance/__init__.py create mode 100644 maintenance/migrations/0001_delete_codelists_without_handles.py create mode 100644 maintenance/migrations/__init__.py diff --git a/maintenance/__init__.py b/maintenance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/maintenance/migrations/0001_delete_codelists_without_handles.py b/maintenance/migrations/0001_delete_codelists_without_handles.py new file mode 100644 index 00000000..70ba7db4 --- /dev/null +++ b/maintenance/migrations/0001_delete_codelists_without_handles.py @@ -0,0 +1,30 @@ +# Delete codelists without handles, see https://github.com/opensafely-core/opencodelists/issues/2893 +from django.db import migrations + + +def delete_codelists_without_handles(apps, schema_editor): + """Delete codelists that have no associated handles.""" + Codelist = apps.get_model('codelists', 'Codelist') + codelists_without_handles = Codelist.objects.filter(handles__isnull=True) + # Deletes via CASCADE bypassing ORM `delete``methods. + deleted_stats = codelists_without_handles.delete() + + # Provide some feedback. + try: + deleted_count = deleted_stats[1]['codelists.Codelist'] + print(f"0001: Deleted {deleted_count} codelists without handles") + except KeyError: + print("0001: No codelists without handles to delete") + +class Migration(migrations.Migration): + + dependencies = [ + ('codelists', '0063_alter_handle_name'), + ] + + operations = [ + migrations.RunPython( + delete_codelists_without_handles, + migrations.RunPython.noop + ), + ] diff --git a/maintenance/migrations/__init__.py b/maintenance/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opencodelists/settings.py b/opencodelists/settings.py index c861f822..019ae1dd 100644 --- a/opencodelists/settings.py +++ b/opencodelists/settings.py @@ -110,6 +110,7 @@ def get_env_var(name): "mappings.bnfdmd", "mappings.ctv3sctmap2", "mappings.dmdvmpprevmap", + "maintenance", "corsheaders", "crispy_forms", "crispy_bootstrap4", From 3ddb5776d8c93c5dea2debd0d4377948e9ec2aeb Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Tue, 2 Dec 2025 15:51:19 +0000 Subject: [PATCH 3/3] Document how to modify production via migrations. We should not make any such changes by directly logging into the Django management shell or a dbshell. Entering commands manually is error-prone and not auditable, reviewable, repeatable, or testable. Document what we might do instead. --- DEVELOPERS.md | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/DEVELOPERS.md b/DEVELOPERS.md index adbe3f23..1913df7c 100644 --- a/DEVELOPERS.md +++ b/DEVELOPERS.md @@ -305,7 +305,35 @@ Only images that have changed will be committed to github - but it's worth notin ## Other production maintenance tasks -### Manually creating and updating codelist versions +### Modifying the production database + +Sometimes, we may need to modify the production database to fix integrity +errors due to bugs or to perform some management actions. We should not make +any such changes by directly logging into the Django management shell or a +dbshell. Entering commands manually is error-prone and not auditable, +reviewable, repeatable, or testable. + +It may be okay to directly run management commands in production, discuss it +with the team. Safest is probably to make the change via a migration, which +also provides an audit trail and makes it easier to review, test, and +potentially repeat. Create any such migrations in the `maintenance` app, see +the history of that app for examples. + +You can create a fresh empty migration in a development environment with: + +``` +just manage makemigrations maintenance --empty --name XXX +``` + +Populate the logic and mark it as dependent on the latest migrations from other +relevant apps (likely just `codelists` and potentially previous migrations in +the same app. Ideally it is idempotent and reversible (even if the reverse is +just a noop). These don't necessarily need automated testing if you do +sufficiently robust manual testing on near-production data. This is because +they are not used in any other components and we don't expect them to change +once merged. + +#### Manually creating and updating codelist versions Currently OpenCodelists handles multiple coding system releases, however it can't always handle new versions of codelists when the coding system has changed significantly since the original @@ -350,23 +378,8 @@ not organisation codelists. #### Process for deleting a single version of a codelist -First, visit the codelist URL of interest -and note down the version ID hash on the page. - -On dokku3: - -1. Start `shell_plus` which loads the database models for you: - ```sh - $ dokku run opencodelists python manage.py shell_plus - ``` -2. Access the specific version of the codelist: - ```pycon - >>> version = CodelistVersion.objects.get_by_hash("") - ``` -3. Delete the **codelist version**: - ```pycon - >>> version.delete() - ``` +Write a migration to delete the single version of the codelist and make it +easy for anyone doing the same in future to repeat this. #### Process for deleting a codelist entirely