diff --git a/cohortextractor/ons_cis_utils.py b/cohortextractor/ons_cis_utils.py deleted file mode 100644 index 161cd551..00000000 --- a/cohortextractor/ons_cis_utils.py +++ /dev/null @@ -1,278 +0,0 @@ -# Expected types for each column -# Some of these columns are ints but represent coded categories and -# are converted to their coded labels and returned as str -# See ONS_CIS_CATEGORY_COLUMNS -ONS_CIS_COLUMN_MAPPINGS = { - "age_at_visit": "int", - "contact_any_covid": "bool", - "contact_any_covid_dayssinceg": "int", - "contact_face_covering": "int", - "country": "int", - "covid_admitted": "bool", - "covid_date": "date", - "covid_nhs_contact": "bool", - "covid_test_blood": "bool", - "covid_test_blood_neg_last_date": "date", - "covid_test_blood_pos_first_date": "date", - "covid_test_blood_result": "int", - "covid_test_swab": "bool", - "covid_test_swab_neg_last_date": "date", - "covid_test_swab_pos_first_date": "date", - "covid_test_swab_result": "int", - "covid_think_havehad": "bool", - "ct_mean": "float", - "dataset": "int", - "ethnicity": "int", - "geography_code": "str", - "geography_name": "str", - "gor9d": "int", - "health_care_clean": "bool", - "health_conditions": "bool", - "health_conditions_impact": "int", - "hhsize": "int", - "patient_facing_clean": "int", - "received_ox_date": "date", - "result_combined": "int", - "result_mk": "int", - "result_mk_date": "date", - "result_tdi": "int", - "samples_taken_date": "date", - "school_year": "int", - "self_isolating": "bool", - "self_isolating_v1": "int", - "sex": "int", - "smoke_ever_regularly": "bool", - "smoke_now_cigar": "bool", - "smoke_now_cigarettes": "bool", - "smoke_now_pipe": "bool", - "smoke_now_vape": "bool", - "SOC_occupation": "str", - "sympt_covid_abdominal_pain": "bool", - "sympt_covid_any": "bool", - "sympt_covid_cough": "bool", - "sympt_covid_diarrhoea": "bool", - "sympt_covid_fatigue_weakness": "bool", - "sympt_covid_fever": "bool", - "sympt_covid_headache": "bool", - "sympt_covid_loss_of_smell": "bool", - "sympt_covid_loss_of_taste": "bool", - "sympt_covid_muscle_ache_myalgia": "bool", - "sympt_covid_nausea_vomiting": "bool", - "sympt_covid_shortness_of_breath": "bool", - "sympt_covid_sore_throat": "bool", - "sympt_now_abdominal_pain": "bool", - "sympt_now_any": "bool", - "sympt_now_cough": "bool", - "sympt_now_date": "date", - "sympt_now_diarrhoea": "bool", - "sympt_now_fatigue_weakness": "bool", - "sympt_now_fever": "bool", - "sympt_now_headache": "bool", - "sympt_now_loss_of_smell": "bool", - "sympt_now_loss_of_taste": "bool", - "sympt_now_muscle_ache_myalgia": "bool", - "sympt_now_nausea_vomiting": "bool", - "sympt_now_shortness_of_breath": "bool", - "sympt_now_sore_throat": "bool", - "tenure_group": "int", - "think_have_covid_sympt_now": "bool", - "travel_abroad": "bool", - "travel_abroad_date": "date", - "visit_date": "date", - "pseudo_visit_id": "bytes", - "work_direct_contact_patients_etc": "bool", - "work_location": "int", - "work_outside_home_days": "int", - "work_sector": "int", - "work_social_distancing": "int", - "work_status": "int", - "work_status_clean": "int", - "work_status_v1": "int", - "work_travel": "int", -} - - -# ONS_CIS has a lot of columns that can be returned -# Some columns are ints, but represented category values -# These need to be cast to varchar in the db. -COVID_TEST_RESULTS_CODES = { - 0: "Any tests negative, but none positive", - 1: "One or more positive test(s)", - 2: "Waiting for all results", - 9: "All Tests failed", -} - - -RESULTS_CODES = { - 0: "Negative", - 1: "Positive", - 7: "Rejected", - 8: "Inconclusive", - 9: "Void", - 10: "Insufficient sample", - 11: "Could not process", - 12: "Unassayed", -} - - -HH_SIZE_CODES = {1: "1", 2: "2", 3: "3", 4: "4", 5: "5+"} - - -ONS_CIS_CATEGORY_COLUMNS = { - "contact_any_covid_dayssinceg": { - 1: "0-14", - 2: "15-28", - 3: "29-60", - 4: "61-90", - 5: "91+", - }, - "contact_face_covering": { - 0: "No", - 1: "Yes, at work/school only", - 2: "Yes, in other situations only", - 3: "Yes, usually both Work/school/other", - 4: "My face is already covered", - }, - "country": {0: "England", 1: "Wales", 2: "NI", 3: "Scotland"}, - "covid_test_blood_result": COVID_TEST_RESULTS_CODES, - "covid_test_swab_result": COVID_TEST_RESULTS_CODES, - "dataset": { - 0: "Voyager 1 original", - 1: "Voyager 1 upgrade", - 2: "Voyager 2", - }, - "ethnicity": { - 1: "White-British", - 2: "White-Irish", - 3: "White-Gypsy or Irish Traveller", - 4: "Any other white background", - 5: "Mixed-White & Black Caribbean", - 6: "Mixed-White & Black African", - 7: "Mixed-White & Asian", - 8: "Any other Mixed background", - 9: "Asian or Asian British-Indian", - 10: "Asian or Asian British-Pakistani", - 11: "Asian or Asian British-Bangladeshi", - 12: "Asian or Asian British-Chinese", - 13: "Any other Asian background", - 14: "Black,Caribbean,African-African", - 15: "Black,Caribbean,Afro-Caribbean", - 16: "Any other Black background", - 17: "Other ethnic group-Arab", - 18: "Any other ethnic group", - }, - "gor9d": { - 1: "E12000001", - 2: "E12000002", - 3: "E12000003", - 4: "E12000004", - 5: "E12000005", - 6: "E12000006", - 7: "E12000007", - 8: "E12000008", - 9: "E12000009", - 10: "N99999999", - 11: "S99999999", - 12: "W99999999", - }, - "health_conditions_impact": { - 0: "Not at all", - 1: "Yes, a little", - 2: "Yes, a lot", - }, - "hhsize": HH_SIZE_CODES, - "patient_facing_clean": { - 0: "No", - 1: "Yes", - 2: "Not working in health care", - }, - "result_combined": RESULTS_CODES, - "result_mk": RESULTS_CODES, - "result_tdi": RESULTS_CODES, - "self_isolating_v1": { - 0: "No", - 1: "Yes, you have/have had symptoms", - 2: "Yes, someone you live with had symptoms", - 3: "Yes, for other reasons (e.g. going into hospital, quarantining)", - }, - "sex": {1: "Male", 2: "Female"}, - "surge_flag": { - 0: "No", - 1: "boost1", - 2: "boost2", - }, - "tenure_group": { - 1: "Owner, missing", - 2: "Mortgage", - 3: "Rent, squatting etc", - }, - "work_location": { - -9: "<=15y", - -8: ">=75y", - 1: "Working from home", - 2: "Working somewhere else (not your home)", - 3: "Both (from home and somewhere else)", - 4: "Not applicable, not currently working", - }, - "work_sector": { - 1: "Teaching and education", - 2: "Health care", - 3: "Social care", - 4: "Transport (incl. storage, logistic)", - 5: "Retail sector (incl. wholesale)", - 6: "Hospitality (e.g. hotel, restaurant)", - 7: "Food production, agriculture, farming", - 8: "Personal services (e.g. hairdressers)", - 9: "Information technology and communication", - 10: "Financial services incl. insurance", - 11: "Manufacturing or construction", - 12: "Civil service or Local Government", - 13: "Armed forces", - 14: "Arts,Entertainment or Recreation", - 15: "Other occupation sector", - 99: "NA(Not currently working)", - }, - "work_social_distancing": { - 1: "Easy to maintain 2m", - 2: "Relatively easy to maintain 2m", - 3: "Difficult to maintain 2m, but can be 1m", - 4: "Very difficult to be more than 1m away", - 9: "N/A (not working/in education etc)", - }, - "work_status": { - 1: "Employed", - 2: "Self-employed", - 3: "Furloughed (temporarily not working)", - 4: "Not working (unemployed, retired, long-term sick etc.)", - 5: "Student", - }, - "work_status_clean": { - 0: "Not working", - 1: "Working", - 2: "Student", - }, - "work_status_v1": { - 1: "Employed and currently working", - 2: "Employed and currently not working", - 3: "Self-employed and currently working", - 4: "Self-employed and currently not working", - 5: "Looking for paid work and able to start", - 6: "Not working and not looking for work", - 7: "Retired", - 8: "Child under 5y not attending child care", - 9: "Child under 5y attending child care", - 10: "5y and older in full-time education", - }, - "work_travel": { - 1: "Underground, metro, light rail, tram", - 2: "Train", - 3: "Bus, minibus, coach", - 4: "Motorbike, scooter or moped", - 5: "Car or van", - 6: "Taxi/minicab", - 7: "Bicycle", - 8: "On foot", - 9: "Other method", - 99: "N/A (not working/in education etc)", - }, -} diff --git a/cohortextractor/patients.py b/cohortextractor/patients.py index bef0e563..7795c93b 100644 --- a/cohortextractor/patients.py +++ b/cohortextractor/patients.py @@ -2842,90 +2842,6 @@ def with_an_isaric_record( return "with_an_isaric_record", locals() -def with_an_ons_cis_record( - returning, - return_category_labels=True, - # Date filtering: column to filter - date_filter_column=None, - # Date filtering: date limits - on_or_before=None, - on_or_after=None, - between=None, - # Matching rule - find_first_match_in_period=None, - find_last_match_in_period=None, - include_date_of_match=False, - date_format=None, - return_expectations=None, -): - """ - Return whether patient has an ONS CIS record - - Args: - returning: string value; options are: - - * "binary_flag" - * "number_of_matches_in_period" - * the ONS CIS table column to return - return_category_labels: If the value of `returning` is a coded category, return the - the corresponding longform string labels - date_filter_column: the ONS CIS column to use with date limit args; options are: - - * "covid_date" - * "covid_test_blood_neg_last_date" - * "covid_test_blood_pos_first_date" - * "covid_test_swab_neg_last_date" - * "covid_test_swab_pos_first_date" - * "received_ox_date" - * "result_mk_date" - * "samples_taken_date" - * "sympt_now_date" - * "travel_abroad_date" - * "visit_date" - - `date_filter_column` is not required when returning "number_of_matches_in_period", with no - date limit arguments. It is required for all other `returning` options - - on_or_before: date of interest as a string with the format `YYYY-MM-DD`. Filters results to measurements - on or before the given date (as defined by `date_filter_column`). - on_or_after: date of interest as a string with the format `YYYY-MM-DD`. Filters results to measurements - on or after the given date (as defined by `date_filter_column`). - between: two dates of interest as a list with each date as a string with the format `YYYY-MM-DD`. - Filters results to measurements between the two dates (as defined by `date_filter_column`) - provided (inclusive). - The two dates must be in chronological order. - find_first_match_in_period: as described elsewhere - find_last_match_in_period: as described elsewhere - include_date_of_match: a boolean indicating if an extra column containing the date (from `date_filter_column`) - of the match should be returned. - date_format: a string detailing the format of dates to be returned. - It can be "YYYY-MM-DD", "YYYY-MM" or "YYYY" and wherever possible the least disclosive data should be - returned. i.e returning only year is less disclosive than a date with month and year. - return_expectations: as described elsewhere. - - Example: - Return cleaned employment status (as longform category labels) for patients with a positive covid blood test - after 01 Jan 2022, returning also the date of the positive covid blood test: - - employment_status = patients.with_an_ons_cis_record( - returning="work_status_clean", - return_category_labels=True, - date_filter_column="covid_test_blood_pos_first_date", - on_or_after="2022-01-01", - find_first_match_in_period=True, - include_date_of_match=True, - date_format="YYYY-MM-DD", - return_expectations={ - "rate": "universal", - "category": { - "ratios": {"Not working": 0.2, "Working": 0.6, "Student": 0.2}, - }, - }, - ) - """ - return "with_an_ons_cis_record", locals() - - def with_record_in_ukrr( # picks dataset held by UK Renal Registry (UKRR) from_dataset=None, diff --git a/cohortextractor/process_covariate_definitions.py b/cohortextractor/process_covariate_definitions.py index 45e51455..3c55f54c 100644 --- a/cohortextractor/process_covariate_definitions.py +++ b/cohortextractor/process_covariate_definitions.py @@ -2,8 +2,6 @@ import datetime import re -from .ons_cis_utils import ONS_CIS_CATEGORY_COLUMNS, ONS_CIS_COLUMN_MAPPINGS - # ISARIC data has a lot of columns that are all varchar in the db. ISARIC_COLUMN_MAPPINGS = { "abdopain_ceoccur_v2": "str", @@ -813,13 +811,6 @@ def type_of_with_covid_therapeutics(self, returning, **kwargs): def type_of_with_an_isaric_record(self, returning, **kwargs): return ISARIC_COLUMN_MAPPINGS[returning] - def type_of_with_an_ons_cis_record(self, returning, **kwargs): - if returning in ONS_CIS_CATEGORY_COLUMNS: - return "str" - elif returning in ONS_CIS_COLUMN_MAPPINGS: - return ONS_CIS_COLUMN_MAPPINGS[returning] - return self._type_from_return_value(returning) - def type_of_with_record_in_ukrr(self, returning, **kwargs): return self._type_from_return_value(returning) diff --git a/cohortextractor/tpp_backend.py b/cohortextractor/tpp_backend.py index 97dc3cf0..bcfa781b 100644 --- a/cohortextractor/tpp_backend.py +++ b/cohortextractor/tpp_backend.py @@ -20,7 +20,6 @@ mssql_dbapi_connection_from_url, mssql_fetch_table, ) -from .ons_cis_utils import ONS_CIS_CATEGORY_COLUMNS, ONS_CIS_COLUMN_MAPPINGS from .pandas_utils import dataframe_from_rows, dataframe_to_file from .process_covariate_definitions import ISARIC_COLUMN_MAPPINGS from .therapeutics_utils import ALLOWED_RISK_GROUPS @@ -55,7 +54,6 @@ def __init__( self.dummy_data = dummy_data self.next_temp_table_id = 1 self._therapeutics_table_name = None - self._ons_cis_table_name = None self.truncate_sql_logs = False if self.covariate_definitions: self.queries = self.get_queries(self.covariate_definitions) @@ -3403,146 +3401,6 @@ def patients_with_an_isaric_record( queries.append(query) return queries - def create_ons_cis_table(self): - """ - Create a temporarary ons_cis table to use for `with_an_ons_cis_record` queries - Remove complete duplicate rows so we don't count them when returning `number_of_matches` - """ - if self._ons_cis_table_name is None: - self._ons_cis_table_name = self.get_temp_table_name("ons_cis") - queries = [ - f""" - -- Creating ons_cis temp table - SELECT DISTINCT Patient_ID, {', '.join(ONS_CIS_COLUMN_MAPPINGS)} - INTO {self._ons_cis_table_name} FROM ONS_CIS_New - """ - ] - else: - queries = [] - return self._ons_cis_table_name, queries - - def patients_with_an_ons_cis_record( - self, - returning="binary_flag", - return_category_labels=True, - date_filter_column=None, - between=None, - # Matching rule - find_first_match_in_period=None, - find_last_match_in_period=None, - include_date_of_match=False, - ): - table, table_queries = self.create_ons_cis_table() - - # Result ordering - if find_first_match_in_period: - ordering = "ASC" - else: - ordering = "DESC" - - # There can be multiple rows per patient in the ONS_CIS dataset - # Partition query is used for all return values except `number_of_matches_in_period` - use_partition_query = True - if returning == "binary_flag": - column_definition = "1" - elif returning == "number_of_matches_in_period": - column_definition = "COUNT(*)" - use_partition_query = False - else: - if returning not in ONS_CIS_COLUMN_MAPPINGS: - raise TypeError(f"returning={returning} is not a valid ONS_CIS column") - elif returning in ONS_CIS_CATEGORY_COLUMNS: - # Category columns are coded values with associated labels - # By default, we convert the coded values to their labels and return the longform strings - if return_category_labels: - mapping = ONS_CIS_CATEGORY_COLUMNS[returning] - case_definitions = "\n".join( - [ - f"WHEN {table}.{returning} = {key} THEN '{value}'" - for key, value in mapping.items() - ] - ) - column_definition = f""" - CASE - {case_definitions} - END - """ - else: - # When returning the codes rather than the string labels, we need to - # cast to varchar, otherwise any int-type codes will return missing - # values as 0, which is usually a valid category - column_definition = f"CAST({table}.{returning} AS VARCHAR)" - else: - column_definition = f"{table}.{returning}" - if date_filter_column: - # If we have a date_filter column, make sure it's valid - filter_type = ONS_CIS_COLUMN_MAPPINGS.get(date_filter_column) - if filter_type is None: - raise TypeError( - f"date_filter_column={date_filter_column} is not a valid ONS_CIS column" - ) - elif filter_type != "date": - raise TypeError( - f"date_filter_column={date_filter_column} is type {filter_type}, not a date" - ) - elif ( - between in [None, (None, None)] - and returning == "number_of_matches_in_period" - ): - # We don't need to filter by date if we're just counting matches and there's no - # date matching required; just set a default date_filter_column (which will be ignored) - date_filter_column = "visit_date" - else: - # We need a date_filter_column for all returning values except counts - # (i.e. number_of_matches_in_period) because we need to identify first or last value - # if there are multiple rows per patient - # For number_of_matches_in_period, we need still a date_filter_column if a - # date-matching arg is specified - raise ValueError("date_filter_column is required") - - date_condition, date_joins = self.get_date_condition( - table, f"{table}.{date_filter_column}", between - ) - - if use_partition_query: - # additionally ordering by pseudo_visit_id should be enough to ensure consistent return - # order in the event that there are duplicate values for the date_filter_column - # The raw dataset does have duplicate pseudo_visit_ids, but these are typically complete - # duplicate rows (which we've already filtered out) or duplicates between patients - # which are presumably an error - sql = f""" - SELECT - t.Patient_ID AS patient_id, - t.return_value as {returning}, - t.{date_filter_column} AS date - FROM ( - SELECT - {table}.Patient_ID, - {column_definition} as return_value, - {table}.{date_filter_column}, - ROW_NUMBER() OVER ( - PARTITION BY {table}.Patient_ID - ORDER BY {table}.{date_filter_column} {ordering}, pseudo_visit_id - ) AS rownum - FROM {table} - {date_joins} - WHERE {date_condition} - ) t - WHERE t.rownum = 1 - """ - else: - # number_of_matches_in_period only - sql = f""" - SELECT - {table}.Patient_ID AS patient_id, - {column_definition} AS {returning} - FROM {table} - {date_joins} - WHERE {date_condition} - GROUP BY {table}.Patient_ID - """ - return table_queries + [sql] - def patients_with_record_in_ukrr( self, # picks dataset held by UKRR diff --git a/tests/test_study_definition.py b/tests/test_study_definition.py index aefcdefa..1b12c8ee 100644 --- a/tests/test_study_definition.py +++ b/tests/test_study_definition.py @@ -393,82 +393,3 @@ def study(): study() else: study() - - -@pytest.mark.parametrize( - "returning,date_filter_column,on_or_after,error,error_msg", - [ - # invalid return value with no matching type - ("foo", None, None, ValueError, "No matching type for 'foo'"), - # invalid ONS_CIS column - ( - "primary_diagnosis", - None, - None, - TypeError, - "returning=primary_diagnosis is not a valid ONS_CIS column", - ), - # invalid date_filter_column - ( - "age_at_visit", - "foo", - None, - TypeError, - "date_filter_column=foo is not a valid ONS_CIS column", - ), - # invalid type of date_filter_column - ( - "age_at_visit", - "age_at_visit", - None, - TypeError, - "date_filter_column=age_at_visit is type int, not a date", - ), - # date_filter_column required - ( - "age_at_visit", - None, - None, - ValueError, - "date_filter_column is required", - ), - # date_filter_column required for number_of_matches_in_period when - # date arg is specified - ( - "number_of_matches_in_period", - None, - "2022-01-01", - ValueError, - "date_filter_column is required", - ), - # date_filter_column not required if returning is number_of_matches_in_period - # and no date arg - ( - "number_of_matches_in_period", - None, - None, - None, - None, - ), - ], -) -def test_ons_cis_study_definition_errors( - returning, date_filter_column, on_or_after, error, error_msg -): - def define_study(): - StudyDefinition( - population=patients.all(), - # by default returns last match in period, using visit date - value=patients.with_an_ons_cis_record( - returning=returning, - date_filter_column=date_filter_column, - on_or_after=on_or_after, - ), - ) - - if error is None: - # instantiating study definition raises no exceptions - define_study() - else: - with pytest.raises(error, match=error_msg): - define_study() diff --git a/tests/test_tpp_backend.py b/tests/test_tpp_backend.py index 960be6ad..482b22cd 100644 --- a/tests/test_tpp_backend.py +++ b/tests/test_tpp_backend.py @@ -29,7 +29,6 @@ CPNS, EC, ICNARC, - ONS_CIS, OPA, UKRR, APCS_Der, @@ -126,7 +125,6 @@ def setup_function(function): session.query(HealthCareWorker).delete() session.query(Therapeutics).delete() session.query(ISARICData).delete() - session.query(ONS_CIS).delete() session.query(UKRR).delete() session.query(Patient).delete() session.query(BuildProgress).delete() @@ -6387,281 +6385,6 @@ def get_cursor(self): assert attempts == 2 -def test_ons_cis(): - session = make_session() - session.add_all( - [ - Patient( - ONS_CIS=[ - ONS_CIS( - age_at_visit=20, - visit_date="2021-10-01", - covid_test_blood_pos_first_date="2021-08-01", - result_tdi=0, - country=0, - self_isolating=0, - ct_mean=12.345, - result_combined=0, - pseudo_visit_id=bytes([1]), - ), - ], - ), - Patient( - ONS_CIS=[ - ONS_CIS( - age_at_visit=30, - visit_date="2021-10-01", - covid_test_blood_pos_first_date="2021-07-01", - result_tdi=1, - country=1, - self_isolating=1, - result_combined=9, - pseudo_visit_id=bytes([2]), - ), - ], - ), - Patient( - ONS_CIS=[ - ONS_CIS( - age_at_visit=40, - visit_date="2020-10-01", - covid_test_blood_pos_first_date="2021-06-01", - result_tdi=10, - country=2, - self_isolating=0, - result_combined=12, - pseudo_visit_id=bytes([3]), - ), - ONS_CIS( - age_at_visit=41, - visit_date="2021-10-01", - covid_test_blood_pos_first_date="2021-06-01", - result_tdi=1, - country=2, - self_isolating=0, - pseudo_visit_id=bytes([4]), - ), - # duplicate record ignored in number_of_matches_in_period counts - ONS_CIS( - age_at_visit=41, - visit_date="2021-10-01", - covid_test_blood_pos_first_date="2021-06-01", - result_tdi=1, - country=2, - self_isolating=0, - pseudo_visit_id=bytes([4]), - ), - ], - ), - ] - ) - session.commit() - study = StudyDefinition( - population=patients.all(), - # by default returns last match in period, using visit date - age_at_visit=patients.with_an_ons_cis_record( - returning="age_at_visit", - date_filter_column="visit_date", - include_date_of_match=True, - date_format="YYYY-MM-DD", - ), - # specifiy first match in period - age_at_first_visit=patients.with_an_ons_cis_record( - returning="age_at_visit", - date_filter_column="visit_date", - find_first_match_in_period=True, - ), - # filter by a different date column; filters out patient 3 - age_filtered_by_covid_test_pos=patients.with_an_ons_cis_record( - returning="age_at_visit", - date_filter_column="covid_test_blood_pos_first_date", - on_or_after="2021-06-15", - include_date_of_match=True, - date_format="YYYY-MM-DD", - ), - # filter by the returning value - covid_test_pos_date=patients.with_an_ons_cis_record( - returning="covid_test_blood_pos_first_date", - date_filter_column="covid_test_blood_pos_first_date", - on_or_after="2021-06-15", - date_format="YYYY-MM-DD", - ), - # binary flag return value - has_visit_with_pos_test_before_2021_08=patients.with_an_ons_cis_record( - returning="binary_flag", - date_filter_column="covid_test_blood_pos_first_date", - on_or_before="2021-07-31", - ), - # number_of_matches_in_period return value - # date_filter_column required - num_visits_with_pos_test_before_2021_08=patients.with_an_ons_cis_record( - returning="number_of_matches_in_period", - date_filter_column="covid_test_blood_pos_first_date", - on_or_before="2021-07-31", - ), - # date_filter_column not required for number of matches with no date-matching - num_visits=patients.with_an_ons_cis_record( - returning="number_of_matches_in_period", - ), - # boolean value - self_isolating_at_last_visit=patients.with_an_ons_cis_record( - returning="self_isolating", - date_filter_column="visit_date", - ), - # Coded categories are converted to long-form string labels - # result_tdi is a coded category value; varchar type in the db - # raw values are stringified ints, return the long form category label - result_tdi=patients.with_an_ons_cis_record( - returning="result_tdi", - date_filter_column="visit_date", - find_first_match_in_period=True, - include_date_of_match=True, - date_format="YYYY-MM-DD", - ), - # country is a coded category value, int type in the db - # raw values are ints, return the long form category label - country=patients.with_an_ons_cis_record( - returning="country", - date_filter_column="visit_date", - find_first_match_in_period=True, - include_date_of_match=True, - date_format="YYYY-MM-DD", - ), - # return country as codes, with a date filter that filters out patient 3; - # Missing values returned as empty string - country_as_codes=patients.with_an_ons_cis_record( - returning="country", - return_category_labels=False, - find_first_match_in_period=True, - date_filter_column="covid_test_blood_pos_first_date", - on_or_after="2021-06-15", - ), - # float data type with no mapping - ct_mean=patients.with_an_ons_cis_record( - returning="ct_mean", - find_first_match_in_period=True, - date_filter_column="visit_date", - ), - # result_combined can take a value of 12 (new in ONS_CIS_New table), mapped to "Unassayed" - result_combined=patients.with_an_ons_cis_record( - returning="result_combined", - find_first_match_in_period=True, - date_filter_column="visit_date", - ), - pseudo_visit_id=patients.with_an_ons_cis_record( - returning="pseudo_visit_id", - find_first_match_in_period=True, - date_filter_column="visit_date", - ), - ) - - assert_results( - study.to_dicts(convert_to_strings=False), - age_at_visit=[20, 30, 41], - age_at_visit_date=["2021-10-01", "2021-10-01", "2021-10-01"], - age_at_first_visit=[20, 30, 40], - age_filtered_by_covid_test_pos=[20, 30, 0], - age_filtered_by_covid_test_pos_date=["2021-08-01", "2021-07-01", ""], - covid_test_pos_date=["2021-08-01", "2021-07-01", ""], - has_visit_with_pos_test_before_2021_08=[0, 1, 1], - num_visits_with_pos_test_before_2021_08=[0, 1, 2], - num_visits=[1, 1, 2], - self_isolating_at_last_visit=[0, 1, 0], - result_tdi=["Negative", "Positive", "Insufficient sample"], - result_tdi_date=["2021-10-01", "2021-10-01", "2020-10-01"], - country=["England", "Wales", "NI"], - country_date=["2021-10-01", "2021-10-01", "2020-10-01"], - country_as_codes=["0", "1", ""], - ct_mean=[12.345, 0, 0], - result_combined=["Negative", "Void", "Unassayed"], - pseudo_visit_id=[bytes([1]), bytes([2]), bytes([3])], - ) - - -def test_nested_ons_cis_variables(): - session = make_session() - session.add_all( - [ - Patient( - ONS_CIS=[ - ONS_CIS( - age_at_visit=20, - visit_date="2021-10-01", - covid_test_blood_pos_first_date="2021-08-01", - result_tdi=0, - country=0, - self_isolating=0, - ), - ], - ), - Patient( - ONS_CIS=[ - ONS_CIS( - age_at_visit=30, - visit_date="2021-11-01", - covid_test_blood_pos_first_date="2021-07-01", - result_tdi=1, - country=1, - self_isolating=1, - ), - ], - ), - Patient( - ONS_CIS=[ - ONS_CIS( - age_at_visit=40, - visit_date="2021-10-01", - covid_test_blood_pos_first_date="2021-06-01", - result_tdi=10, - country=2, - self_isolating=0, - ), - ONS_CIS( - age_at_visit=41, - visit_date="2021-11-01", - covid_test_blood_pos_first_date="2021-06-01", - result_tdi=1, - country=2, - self_isolating=0, - ), - ], - ), - ] - ) - session.commit() - study = StudyDefinition( - population=patients.all(), - # by default returns last match in period, using visit date - visit_date_1=patients.with_an_ons_cis_record( - returning="visit_date", - date_filter_column="visit_date", - include_date_of_match=True, - date_format="YYYY-MM-DD", - find_first_match_in_period=True, - ), - # specifiy first match in period - visit_date_2=patients.with_an_ons_cis_record( - returning="visit_date", - date_filter_column="visit_date", - on_or_after="visit_date_1 + 7 days", - date_format="YYYY-MM-DD", - find_first_match_in_period=True, - ), - num_visits_on_or_before_visit_date_1=patients.with_an_ons_cis_record( - returning="number_of_matches_in_period", - date_filter_column="visit_date", - on_or_before="visit_date_1", - ), - ) - res = study.to_dicts() - assert_results( - res, - visit_date_1=["2021-10-01", "2021-11-01", "2021-10-01"], - visit_date_2=["", "", "2021-11-01"], - num_visits_on_or_before_visit_date_1=["1", "1", "1"], - ) - - def test_ukrr(): "Test UK Renal Registry" session = make_session() diff --git a/tests/tpp_backend_setup.py b/tests/tpp_backend_setup.py index 81a7238a..1ef713ad 100644 --- a/tests/tpp_backend_setup.py +++ b/tests/tpp_backend_setup.py @@ -11,17 +11,13 @@ Float, ForeignKey, Integer, - LargeBinary, String, types, ) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship, sessionmaker -from cohortextractor.process_covariate_definitions import ( - ISARIC_COLUMN_MAPPINGS, - ONS_CIS_COLUMN_MAPPINGS, -) +from cohortextractor.process_covariate_definitions import ISARIC_COLUMN_MAPPINGS from cohortextractor.tpp_backend import AppointmentStatus from tests.helpers import mssql_sqlalchemy_engine_from_url, wait_for_mssql_to_be_ready @@ -327,11 +323,6 @@ class Patient(Base): back_populates="Patient", cascade="all, delete, delete-orphan", ) - ONS_CIS = relationship( - "ONS_CIS", - back_populates="Patient", - cascade="all, delete, delete-orphan", - ) UKRR = relationship( "UKRR", back_populates="Patient", @@ -1025,30 +1016,6 @@ class ISARICData(Base): setattr(ISARICData, name, Column(String)) -class ONS_CIS(Base): - __tablename__ = "ONS_CIS_New" - - # fake pk to satisfy the ORM - id = Column(Integer, primary_key=True) - - Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID")) - Patient = relationship("Patient", back_populates="ONS_CIS") - - -# There are lots of columns for ONS_CIS, so we create the ORM columns -# dynamically -sqlalchemy_type_conversion = { - "int": Integer, - "bool": Boolean, - "float": Float, - "str": String, - "date": Date, - "bytes": LargeBinary, -} -for name, ons_cis_type in ONS_CIS_COLUMN_MAPPINGS.items(): - setattr(ONS_CIS, name, Column(sqlalchemy_type_conversion[ons_cis_type])) - - class UKRR(Base): __tablename__ = "UKRR"