Skip to content

Commit

Permalink
fix: Use HES archive tables
Browse files Browse the repository at this point in the history
See discussion in opensafely-core/ehrql#2047 for details

Note that archive tables contain data up to March 2024.
  • Loading branch information
inglesp committed Aug 22, 2024
1 parent 51c9e7e commit 96988ae
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 49 deletions.
88 changes: 45 additions & 43 deletions cohortextractor/tpp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2467,7 +2467,7 @@ def patients_attended_emergency_care(
assert False

date_condition, date_joins = self.get_date_condition(
"EC", "Arrival_Date", between
"EC_ARCHIVE", "Arrival_Date", between
)
conditions = [date_condition]

Expand Down Expand Up @@ -2499,14 +2499,14 @@ def patients_attended_emergency_care(
t.Patient_ID AS patient_id,
{column} AS {returning}
FROM (
SELECT EC.Patient_ID, {column},
SELECT EC_ARCHIVE.Patient_ID, {column},
ROW_NUMBER() OVER (
PARTITION BY EC.Patient_ID
ORDER BY Arrival_Date {ordering}, EC.EC_Ident
PARTITION BY EC_ARCHIVE.Patient_ID
ORDER BY Arrival_Date {ordering}, EC_ARCHIVE.EC_Ident
) AS rownum
FROM EC
FROM EC_ARCHIVE
INNER JOIN EC_Diagnosis
ON EC.EC_Ident = EC_Diagnosis.EC_Ident
ON EC_ARCHIVE.EC_Ident = EC_Diagnosis.EC_Ident
{date_joins}
WHERE {conditions}
) t
Expand All @@ -2515,14 +2515,14 @@ def patients_attended_emergency_care(
else:
sql = f"""
SELECT
EC.Patient_ID AS patient_id,
EC_ARCHIVE.Patient_ID AS patient_id,
{column} AS {returning}
FROM EC
FROM EC_ARCHIVE
INNER JOIN EC_Diagnosis
ON EC.EC_Ident = EC_Diagnosis.EC_Ident
ON EC_ARCHIVE.EC_Ident = EC_Diagnosis.EC_Ident
{date_joins}
WHERE {conditions}
GROUP BY EC.Patient_ID
GROUP BY EC_ARCHIVE.Patient_ID
"""
return sql

Expand All @@ -2544,7 +2544,7 @@ def patients_admitted_to_hospital(
"discharge_destination": "Discharge_Destination",
"patient_classification": "Patient_Classification",
"admission_treatment_function_code": "Der_Admit_Treatment_Function_Code",
"days_in_critical_care": "APCS_Der.Spell_PbR_CC_Day",
"days_in_critical_care": "APCS_Der_ARCHIVE.Spell_PbR_CC_Day",
"administrative_category": "Administrative_Category",
"duration_of_elective_wait": "Duration_of_Elective_Wait",
}
Expand Down Expand Up @@ -2605,7 +2605,7 @@ def patients_admitted_to_hospital(
elif returning == "total_critical_care_days_in_period":
# In case of duplicate spells that start on the same date, we take the
# max value by admission date
returning_column = "MAX(CAST(APCS_Der.Spell_PbR_CC_Day AS INTEGER))"
returning_column = "MAX(CAST(APCS_Der_ARCHIVE.Spell_PbR_CC_Day AS INTEGER))"
use_sum_query = True
sum_adjustment = ""
use_partition_query = False
Expand All @@ -2616,7 +2616,7 @@ def patients_admitted_to_hospital(
raise ValueError(f"Unsupported `returning` value: {returning}")

date_condition, date_joins = self.get_date_condition(
"APCS", "Admission_Date", between
"APCS_ARCHIVE", "Admission_Date", between
)
conditions = [date_condition]

Expand All @@ -2625,12 +2625,12 @@ def patients_admitted_to_hospital(
conditions.append(f"{supported_columns[column_name]} IN ({value_sql})")

if with_at_least_one_day_in_critical_care:
conditions.append("CAST(APCS_Der.Spell_PbR_CC_Day AS int) > 0")
conditions.append("CAST(APCS_Der_ARCHIVE.Spell_PbR_CC_Day AS int) > 0")

if with_these_primary_diagnoses:
assert with_these_primary_diagnoses.system == "icd10"
fragments = [
f"APCS_Der.Spell_Primary_Diagnosis LIKE {pattern} ESCAPE '!'"
f"APCS_Der_ARCHIVE.Spell_Primary_Diagnosis LIKE {pattern} ESCAPE '!'"
for pattern in codelist_to_like_patterns(
with_these_primary_diagnoses, prefix="", suffix="%"
)
Expand Down Expand Up @@ -2673,14 +2673,14 @@ def patients_admitted_to_hospital(
t.Patient_ID AS patient_id,
t.{returning} AS {returning}
FROM (
SELECT APCS.Patient_ID, {returning_column} AS {returning},
SELECT APCS_ARCHIVE.Patient_ID, {returning_column} AS {returning},
ROW_NUMBER() OVER (
PARTITION BY APCS.Patient_ID
ORDER BY APCS.Admission_Date {ordering}, APCS.APCS_Ident
PARTITION BY APCS_ARCHIVE.Patient_ID
ORDER BY APCS_ARCHIVE.Admission_Date {ordering}, APCS.APCS_Ident
) AS rownum
FROM APCS
INNER JOIN APCS_Der
ON APCS.APCS_Ident = APCS_Der.APCS_Ident
FROM APCS_ARCHIVE
INNER JOIN APCS_Der_ARCHIVE
ON APCS_ARCHIVE.APCS_Ident = APCS_Der_ARCHIVE.APCS_Ident
{date_joins}
WHERE {conditions}
) t
Expand All @@ -2691,28 +2691,28 @@ def patients_admitted_to_hospital(
SELECT patient_id, SUM({returning}{sum_adjustment}) AS {returning}
FROM (
SELECT
APCS.Patient_ID AS patient_id,
APCS_ARCHIVE.Patient_ID AS patient_id,
{returning_column} AS {returning}
FROM APCS
INNER JOIN APCS_Der
ON APCS.APCS_Ident = APCS_Der.APCS_Ident
FROM APCS_ARCHIVE
INNER JOIN APCS_Der_ARCHIVE
ON APCS_ARCHIVE.APCS_Ident = APCS_Der_ARCHIVE.APCS_Ident
{date_joins}
WHERE {conditions}
GROUP BY APCS.Patient_ID, APCS.Admission_Date
GROUP BY APCS_ARCHIVE.Patient_ID, APCS.Admission_Date
) t
GROUP BY patient_id
"""
else:
sql = f"""
SELECT
APCS.Patient_ID AS patient_id,
APCS_ARCHIVE.Patient_ID AS patient_id,
{returning_column} AS {returning}
FROM APCS
INNER JOIN APCS_Der
ON APCS.APCS_Ident = APCS_Der.APCS_Ident
FROM APCS_ARCHIVE
INNER JOIN APCS_Der_ARCHIVE
ON APCS_ARCHIVE.APCS_Ident = APCS_Der_ARCHIVE.APCS_Ident
{date_joins}
WHERE {conditions}
GROUP BY APCS.Patient_ID
GROUP BY APCS_ARCHIVE.Patient_ID
"""
return sql

Expand Down Expand Up @@ -2872,19 +2872,19 @@ def patients_with_ethnicity_from_sus(
Patient_ID,
Ethnic_group AS ethnicity_code
FROM
APCS
APCS_ARCHIVE
UNION ALL
SELECT
Patient_ID,
Ethnic_Category AS ethnicity_code
FROM
EC
EC_ARCHIVE
UNION ALL
SELECT
Patient_ID,
Ethnic_Category AS ethnicity_code
FROM
OPA
OPA_ARCHIVE
) t
WHERE ethnicity_code IS NOT NULL
AND ethnicity_code != '99'
Expand Down Expand Up @@ -2920,7 +2920,7 @@ def patients_outpatient_appointment_date(
returning="binary_flag",
):
date_condition, date_joins = self.get_date_condition(
"OPA", "Appointment_Date", between
"OPA_ARCHIVE", "Appointment_Date", between
)

conditions = [date_condition]
Expand Down Expand Up @@ -2955,13 +2955,15 @@ def patients_outpatient_appointment_date(
if with_these_procedures:
assert with_these_procedures.system == "opcs4"
fragments = [
f"OPA_Proc.Primary_Procedure_Code LIKE {pattern} ESCAPE '!'"
f"OPA_Proc_ARCHIVE.Primary_Procedure_Code LIKE {pattern} ESCAPE '!'"
for pattern in codelist_to_like_patterns(
with_these_procedures, prefix="%", suffix="%"
)
]
conditions.append("(" + " OR ".join(fragments) + ")")
procedures_joins = "JOIN OPA_Proc ON OPA.OPA_Ident = OPA_Proc.OPA_Ident"
procedures_joins = (
"JOIN OPA_Proc_ARCHIVE ON OPA_ARCHIVE.OPA_Ident = OPA_Proc.OPA_Ident"
)

conditions = " AND ".join(conditions)

Expand Down Expand Up @@ -2994,13 +2996,13 @@ def patients_outpatient_appointment_date(
t.{column_definition} AS {returning}
FROM (
SELECT
OPA.Patient_ID,
OPA_ARCHIVE.Patient_ID,
{column_definition} AS {returning},
ROW_NUMBER() OVER (
PARTITION BY OPA.Patient_ID
PARTITION BY OPA_ARCHIVE.Patient_ID
ORDER BY Appointment_Date {ordering}
) AS rownum
FROM OPA
FROM OPA_ARCHIVE
{date_joins}
{procedures_joins}
WHERE {conditions}
Expand All @@ -3010,15 +3012,15 @@ def patients_outpatient_appointment_date(
else:
return f"""
SELECT
OPA.Patient_ID AS patient_id,
OPA_ARCHIVE.Patient_ID AS patient_id,
{column_definition} AS {returning}
FROM
OPA
OPA_ARCHIVE
{date_joins}
{procedures_joins}
WHERE {conditions}
GROUP BY
OPA.Patient_ID
OPA_ARCHIVE.Patient_ID
"""

@staticmethod
Expand Down
12 changes: 6 additions & 6 deletions tests/tpp_backend_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ class HouseholdMember(Base):


class EC(Base):
__tablename__ = "EC"
__tablename__ = "EC_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="ECEpisodes", cascade="all, delete"
Expand All @@ -714,7 +714,7 @@ class EC(Base):


class EC_Diagnosis(Base):
__tablename__ = "EC_Diagnosis"
__tablename__ = "EC_Diagnosis_ARCHIVED"

# This column isn't in the actual database but SQLAlchemy gets a bit upset
# if we don't give it a primary key
Expand Down Expand Up @@ -753,7 +753,7 @@ class EC_Diagnosis(Base):


class APCS(Base):
__tablename__ = "APCS"
__tablename__ = "APCS_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="APCSEpisodes", cascade="all, delete"
Expand All @@ -776,7 +776,7 @@ class APCS(Base):


class APCS_Der(Base):
__tablename__ = "APCS_Der"
__tablename__ = "APCS_Der_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="APCS_DerEpisodes", cascade="all, delete"
Expand Down Expand Up @@ -829,7 +829,7 @@ class HighCostDrugs(Base):


class OPA(Base):
__tablename__ = "OPA"
__tablename__ = "OPA_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="OPAEpisodes", cascade="all, delete"
Expand All @@ -848,7 +848,7 @@ class OPA(Base):


class OPA_Proc(Base):
__tablename__ = "OPA_Proc"
__tablename__ = "OPA_Proc_ARCHIVED"

Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship("Patient", back_populates="OPA_Proc")
Expand Down

0 comments on commit 96988ae

Please sign in to comment.