Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Out of council boundary imports #7764

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions polling_stations/apps/addressbase/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,18 @@ class Meta:
)


def get_uprn_hash_table(gss_code):
addresses = Address.objects.filter(uprntocouncil__lad=gss_code)
def get_uprn_hash_table(gss_codes: list[str]) -> dict[str, dict[str:str]]:
"""
Takes a list of gss codes and returns a dict with shape:
{
<uprn>: {
"address": <address>,
"postcode": <postcode>,
"location": <location>
}
}
"""
addresses = Address.objects.filter(uprntocouncil__lad__in=gss_codes)
# return result a hash table keyed by UPRN
return {
a.uprn: {
Expand Down
9 changes: 5 additions & 4 deletions polling_stations/apps/data_importers/base_importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,7 @@ def report(self):
)
station_report = StationReport(self.council.pk, self.additional_report_councils)
district_report = DistrictReport(self.council.pk)
address_report = AddressReport(
self.council.pk, additional_report_councils=self.additional_report_councils
)
address_report = AddressReport(self.council.pk)

report.build_report()

Expand Down Expand Up @@ -860,7 +858,10 @@ def import_data(self):
self.pre_import()

self.stations = StationSet()
self.addresses = AddressList(self.logger)
self.addresses = AddressList(
self.logger, extra_councils=self.additional_report_councils
)

self.import_residential_addresses()
self.import_polling_stations()
self.addresses.check_records()
Expand Down
44 changes: 17 additions & 27 deletions polling_stations/apps/data_importers/data_quality_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from addressbase.models import UprnToCouncil
from councils.models import Council
from councils.models import CouncilGeography
from django.db import connection
from django.db.models import Q
from pollingstations.models import PollingDistrict, PollingStation
Expand Down Expand Up @@ -229,26 +229,17 @@ def get_districts_containing_more_stations(self):

# data quality stats for UPRNs assigned polling station ids
class AddressReport:
def __init__(self, council_id, additional_report_councils=None):
if not additional_report_councils:
additional_report_councils = []
self.additional_report_councils = additional_report_councils
def __init__(self, council_id):
self.council_id = council_id
self.councils = self.additional_report_councils + [self.council_id]
self.gss_codes = [
council.geography.gss
for council in Council.objects.filter(pk__in=self.councils).select_related(
"geography"
)
]
self.gss_code = CouncilGeography.objects.get(council_id=self.council_id).gss

def get_uprns_in_addressbase(self):
return UprnToCouncil.objects.filter(lad__in=self.gss_codes).count()
return UprnToCouncil.objects.filter(lad=self.gss_code).count()

def get_addresses_with_station_id(self):
return (
UprnToCouncil.objects.filter(
lad__in=self.gss_codes, polling_station_id__isnull=False
lad=self.gss_code, polling_station_id__isnull=False
)
.exclude(polling_station_id="")
.count()
Expand All @@ -257,7 +248,7 @@ def get_addresses_with_station_id(self):
def get_addresses_without_station_id(self):
return UprnToCouncil.objects.filter(
Q(polling_station_id__isnull=True) | Q(polling_station_id=""),
council_id__in=self.councils,
council_id=self.council_id,
).count()

def get_addresses_with_valid_station_id_ref(self):
Expand All @@ -267,12 +258,12 @@ def get_addresses_with_valid_station_id_ref(self):
SELECT COUNT(*) FROM addressbase_uprntocouncil
WHERE polling_station_id IN
(SELECT internal_council_id FROM pollingstations_pollingstation
WHERE council_id IN %s)
AND lad IN %s
WHERE council_id = %s)
AND lad = %s
AND polling_station_id != ''
AND polling_station_id IS NOT NULL;
""",
[tuple(self.councils), tuple(self.gss_codes)],
[self.council_id, self.gss_code],
)
results = cursor.fetchall()
return results[0][0]
Expand All @@ -284,12 +275,12 @@ def get_addresses_with_invalid_station_id_ref(self):
SELECT COUNT(*) FROM addressbase_uprntocouncil
WHERE polling_station_id NOT IN
(SELECT internal_council_id FROM pollingstations_pollingstation
WHERE council_id IN %s)
AND lad IN %s
WHERE council_id = %s)
AND lad = %s
AND polling_station_id != ''
AND polling_station_id IS NOT NULL;
""",
[tuple(self.councils), tuple(self.gss_codes)],
[self.council_id, self.gss_code],
)
results = cursor.fetchall()
return results[0][0]
Expand Down Expand Up @@ -474,14 +465,12 @@ def get_csv_coverage_row_color(self, station_ids):

return row_color

def build_address_report(self):
table = Table(title="ADDRESSES", show_header=False, min_width=50)
def build_address_report(self, council_id):
table = Table(title=f"{council_id} ADDRESSES", show_header=False, min_width=50)
table.add_column("Caption")
table.add_column("Number", justify="right")

address_report = AddressReport(
self.council_id, additional_report_councils=self.additional_report_councils
)
address_report = AddressReport(council_id)
uprns_in_council_area = address_report.get_uprns_in_addressbase()
addresses_imported = address_report.get_addresses_with_station_id()
station_ids = address_report.get_addresses_with_station_id()
Expand Down Expand Up @@ -538,7 +527,8 @@ def build_report(self):
if self.expecting_districts:
self.report.add_row(self.build_district_report())
self.report.add_row(self.build_station_report())
self.report.add_row(self.build_address_report())
for council_id in [self.council_id] + self.additional_report_councils:
self.report.add_row(self.build_address_report(council_id))

def generate_string_report(self):
recorder = Console(record=True)
Expand Down
91 changes: 80 additions & 11 deletions polling_stations/apps/data_importers/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import namedtuple

from addressbase.models import Address, UprnToCouncil, get_uprn_hash_table
from councils.models import Council
from councils.models import Council, CouncilGeography
from django.db import connection
from pollingstations.models import PollingDistrict, PollingStation
from uk_geo_utils.helpers import Postcode
Expand Down Expand Up @@ -75,15 +75,9 @@ def council_id(self): # TODO Deal with old_to_new council_ids map
def gss_code(self):
return Council.objects.get(pk=self.council_id).geography.gss

@abc.abstractmethod
def update_uprn_to_council_model(self, polling_station_lookup=None):
if not polling_station_lookup:
polling_station_lookup = self.get_polling_station_lookup()

uprns_in_council = UprnToCouncil.objects.filter(lad=self.gss_code)
for polling_station_id, uprns in polling_station_lookup.items():
uprns_in_council.filter(uprn__in=uprns).update(
polling_station_id=polling_station_id
)
pass


class DistrictSet(CustomSet, AssignPollingStationsMixin):
Expand Down Expand Up @@ -242,9 +236,13 @@ def save(self):


class AddressList(AssignPollingStationsMixin):
def __init__(self, logger):
def __init__(self, logger, extra_councils=None):
if extra_councils is None:
extra_councils = []

self.elements = []
self.logger = logger
self.extra_councils = extra_councils

def append(self, address):
if (
Expand All @@ -263,6 +261,16 @@ def append(self, address):

self.elements.append(address)

@property
def council_ids(self) -> list[str]: # TODO Deal with old_to_new council_ids map
return [self.council_id] + self.extra_councils

@property
def gss_codes(self) -> list[str]:
return CouncilGeography.objects.filter(
council_id__in=self.council_ids
).values_list("gss", flat=True)

def get_uprn_lookup(self):
# for each address, build a lookup of uprn -> set of station ids
uprn_lookup = {}
Expand Down Expand Up @@ -375,11 +383,72 @@ def check_split_postcodes_are_split(self, split_postcodes):
pretty=True,
)

def update_uprn_to_council_model(self, polling_station_lookup=None):
if not polling_station_lookup:
polling_station_lookup = self.get_polling_station_lookup()

uprns_in_council = UprnToCouncil.objects.filter(lad__in=self.gss_codes)
for polling_station_id, uprns in polling_station_lookup.items():
uprns_assigned_to_station = uprns_in_council.filter(uprn__in=uprns)

if self.extra_councils:
self.set_polling_station_for_extra_councils(
polling_station_id, uprns_assigned_to_station
)

uprns_assigned_to_station.filter(uprn__in=uprns).update(
polling_station_id=polling_station_id
)

def set_polling_station_for_extra_councils(
self, polling_station_id, uprns_assigned_to_station
):
# At this stage we want to know if the station has the right council id.
# There are three cases:
# 1. All the addresses are in the council named in the import script.
# So the station will have the correct council_id and no action is necessary.
# 2. All the addresses are in a different council.
# In this case we need to update the council_id on the polling station in the pollingstations table
# 3. The addresses assigned to this station are in different council areas.
# In this case we need to duplicate the station making sure there are a record for each council_id.
Comment on lines +395 to +413
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach won't work, because the polling stations aren't saved in the DB yet.

gss_codes = uprns_assigned_to_station.values_list("lad", flat=True)
council_ids = CouncilGeography.objects.filter(gss__in=gss_codes).values_list(
"council_id", flat=True
)
if len(council_ids) == 1 and council_ids[0] == self.council_id:
# Case 1 - no-op
pass
if len(council_ids) == 1 and council_ids[0] != self.council_id:
# Case 2 - change council id on station
station = PollingStation.objects.get(
internal_council_id=polling_station_id,
council_id=self.council_id,
)
station.council_id = council_ids[0]
station.save()
if len(council_ids) > 1:
# Case 3 - create a station for each council
for council_id in council_ids:
try:
PollingStation.objects.get(
council_id=council_id,
internal_council_id=polling_station_id,
)
except PollingStation.DoesNotExist:
existing_station = PollingStation.objects.get(
council_id=self.council_id,
internal_council_id=polling_station_id,
)
existing_station.id = None
existing_station.council_id = council_id
existing_station._state.adding = True
existing_station.save()

def check_records(self):
split_postcodes = self.get_council_split_postcodes()
self.remove_records_missing_uprns()
self.remove_duplicate_uprns()
addressbase_data = get_uprn_hash_table(self.gss_code)
addressbase_data = get_uprn_hash_table(self.gss_codes)
self.remove_records_not_in_addressbase(addressbase_data)
self.remove_records_that_dont_match_addressbase(addressbase_data)
self.check_split_postcodes_are_split(split_postcodes)
Original file line number Diff line number Diff line change
@@ -1,56 +1,14 @@
from addressbase.models import UprnToCouncil
from data_importers.management.commands import BaseHalaroseCsvImporter


class Command(BaseHalaroseCsvImporter):
council_id = "ABD"
addresses_name = "2022-05-05/2022-04-12T10:11:25.128402/polling_station_export-2022-04-07.edited.csv"
stations_name = "2022-05-05/2022-04-12T10:11:25.128402/polling_station_export-2022-04-07.edited.csv"
elections = ["2022-05-05"]

def pre_import(self):
# We need to consider rows that don't have a uprn when importing data.
# However there are lots of rows for other councils in this file.
# So build a list of stations from rows that do have UPRNS
# and then use that list of stations to make sure we check relevant rows, even if they don't have a UPRN

council_uprns = set(
UprnToCouncil.objects.filter(lad=self.council.geography.gss).values_list(
"uprn", flat=True
)
)
self.COUNCIL_STATIONS = set()
data = self.get_addresses()

for record in data:
if record.uprn in council_uprns:
self.COUNCIL_STATIONS.add(self.get_station_hash(record))

def address_record_to_dict(self, record):
if self.get_station_hash(record) not in self.COUNCIL_STATIONS:
return None

if record.housepostcode in [
"AB39 2UJ",
"AB30 1SL",
"AB43 7LN",
"AB42 5JB",
"AB51 8XH",
"AB41 7UA",
"AB51 5DU",
"AB21 0QJ",
"AB35 5PR",
]:
return None

return super().address_record_to_dict(record)

def station_record_to_dict(self, record):
station_hash = self.get_station_hash(record)
if station_hash not in self.COUNCIL_STATIONS:
return None

if station_hash == "74-hanover-community-centre":
return None

return super().station_record_to_dict(record)
addresses_name = (
"2024-07-04/2024-06-07T15:42:14.722645/Eros_SQL_Output002 - Aberdeenshire.csv"
)
stations_name = (
"2024-07-04/2024-06-07T15:42:14.722645/Eros_SQL_Output002 - Aberdeenshire.csv"
)
elections = ["2024-07-04"]

additional_report_councils = ["MRY"]