Skip to content

Commit

Permalink
Merge pull request #90 from opensafely/viv3ckj/move_validation_data
Browse files Browse the repository at this point in the history
Viv3ckj/move validation data
  • Loading branch information
viv3ckj authored Jan 8, 2025
2 parents fa12372 + 77f607b commit 2ac4e21
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 41 deletions.
6 changes: 5 additions & 1 deletion analysis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@

# measures_definition_pf_medications.py
start_date_measure_medications = "2023-11-01"
monthly_intervals_measure_medications = 9
monthly_intervals_measure_medications = 9

# measures_definition_pf_consultation_pf_counts.py
start_date_measure_med_counts = "2024-02-01"
monthly_intervals_measure_med_counts = 6
72 changes: 72 additions & 0 deletions analysis/measures_definition_pf_consultation_med_counts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from ehrql import INTERVAL, create_measures, months
from ehrql.tables.tpp import (
patients,
clinical_events,
practice_registrations,
)
from ehrql.tables.raw.tpp import medications

from config import start_date_measure_med_counts, monthly_intervals_measure_med_counts
from codelists import (
pharmacy_first_consultation_codelist,
pharmacy_first_med_codelist,
)
from pf_variables_library import select_events

# Script taken from Pharmacy First Data Development (for top 10 PF meds table)

measures = create_measures()
measures.configure_dummy_data(population_size=1000)

start_date = start_date_measure_med_counts
monthly_intervals = monthly_intervals_measure_med_counts

registration = practice_registrations.for_patient_on(INTERVAL.end_date)

# Select Pharmacy First events during interval date range
pharmacy_first_events = select_events(
clinical_events,
start_date=INTERVAL.start_date,
end_date=INTERVAL.end_date).where(
clinical_events.snomedct_code.is_in(
pharmacy_first_consultation_codelist
)
)

pharmacy_first_ids = pharmacy_first_events.consultation_id
has_pharmacy_first_consultation = pharmacy_first_events.exists_for_patient()

# Select Pharmacy First consultations during interval date range
selected_medications = select_events(
medications,
start_date=INTERVAL.start_date, end_date=INTERVAL.end_date
).where(medications.consultation_id.is_in(pharmacy_first_ids))

# First medication for each patient
first_selected_medication = (
selected_medications.sort_by(selected_medications.date).first_for_patient().dmd_code
)
# Boolean variable that selected medication is part of pharmacy first med codelists
has_pharmacy_first_medication = first_selected_medication.is_in(pharmacy_first_med_codelist)

# Numerator, patients with a PF medication
# This allows me to count all (first) medications linked to a PF consultation
numerator = first_selected_medication.is_not_null()

# Denominator, registered patients (f/m) with a PF consultation
denominator = (
registration.exists_for_patient()
& patients.sex.is_in(["male", "female"])
& has_pharmacy_first_consultation
)

measures.define_measure(
name="pf_medication_count",
numerator = first_selected_medication.is_not_null(),
denominator=denominator,
group_by={
"dmd_code": first_selected_medication,
"pharmacy_first_med": has_pharmacy_first_medication,
},
intervals=months(monthly_intervals).starting_on(start_date),
)
124 changes: 85 additions & 39 deletions analysis/measures_definition_pf_descriptive_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,72 +2,118 @@
from ehrql.tables.raw.tpp import medications
from ehrql.tables.tpp import practice_registrations, patients, clinical_events

from pf_variables_library import select_events_from_codelist, select_events_by_consultation_id
from pf_variables_library import select_events
from codelists import (
pharmacy_first_med_codelist,
pharmacy_first_consultation_codelist,
pharmacy_first_conditions_codelist,
)
from config import start_date_measure_descriptive_stats, monthly_intervals_measure_descriptive_stats
from config import (
start_date_measure_descriptive_stats,
monthly_intervals_measure_descriptive_stats,
)

measures = create_measures()
measures.configure_dummy_data(population_size=1000)
measures.configure_dummy_data(population_size=100)
measures.configure_disclosure_control(enabled=True)

start_date = start_date_measure_descriptive_stats
monthly_intervals = monthly_intervals_measure_descriptive_stats

registration = practice_registrations.for_patient_on(INTERVAL.end_date)

# Function to retrieve consultation ids from clinical events that are PF consultations
pharmacy_first_ids = select_events_from_codelist(
clinical_events, pharmacy_first_consultation_codelist
).consultation_id
# Select clinical events and medications for measures INTERVAL
selected_events = clinical_events.where(
clinical_events.date.is_on_or_between(
INTERVAL.start_date,
INTERVAL.end_date,
)
)
selected_medications = medications.where(
medications.date.is_on_or_between(
INTERVAL.start_date,
INTERVAL.end_date,
)
)

# Select all Pharmacy First consultation events
pf_consultation_events = select_events(
selected_events,
codelist=pharmacy_first_consultation_codelist,
)

# Extract Pharmacy First consultation IDs and dates
pf_ids = pf_consultation_events.consultation_id
pf_dates = pf_consultation_events.date

# Function to retrieve selected events using pharmacy first ids
selected_clinical_events = select_events_by_consultation_id(
clinical_events, pharmacy_first_ids
).where(clinical_events.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date))
has_pf_consultation = pf_consultation_events.exists_for_patient()

selected_med_events = select_events_by_consultation_id(medications, pharmacy_first_ids).where(
medications.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date)
# Select Pharmacy First conditions by ID and date
selected_pf_id_conditions = selected_events.where(
selected_events.consultation_id.is_in(pf_ids)
).where(selected_events.snomedct_code.is_in(pharmacy_first_conditions_codelist))

selected_pf_date_conditions = (
selected_events.where(selected_events.consultation_id.is_not_in(pf_ids))
.where(selected_events.date.is_in(pf_dates))
.where(selected_events.snomedct_code.is_in(pharmacy_first_conditions_codelist))
)

# Create variable which contains boolean values of whether pharmacy first event exists for patient
has_pf_consultation = select_events_from_codelist(selected_clinical_events, pharmacy_first_consultation_codelist).exists_for_patient()
has_pf_id_condition = selected_pf_id_conditions.exists_for_patient()
has_pf_date_condition = selected_pf_date_conditions.exists_for_patient()

# Select Pharmacy First Medications by ID and date
selected_pf_id_medications = selected_medications.where(
selected_medications.consultation_id.is_in(pf_ids)
).where(selected_medications.dmd_code.is_in(pharmacy_first_med_codelist))

# PF consultations with PF clinical condition
has_pf_condition = select_events_from_codelist(selected_clinical_events, pharmacy_first_conditions_codelist).exists_for_patient()
selected_pf_date_medications = (
selected_medications.where(selected_medications.consultation_id.is_not_in(pf_ids))
.where(selected_medications.date.is_in(pf_dates))
.where(selected_medications.dmd_code.is_in(pharmacy_first_med_codelist))
)

# PF consultations with prescribed PF medication
has_pf_medication = selected_med_events.where(
selected_med_events.dmd_code.is_in(pharmacy_first_med_codelist)
).exists_for_patient()
has_pf_id_medication = selected_pf_id_medications.exists_for_patient()
has_pf_date_medication = selected_pf_date_medications.exists_for_patient()

# Define the denominator as the number of patients registered
denominator = (
registration.exists_for_patient()
& patients.sex.is_in(["male", "female"])
& has_pf_consultation
# Define measures
measures.define_defaults(
denominator=(
registration.exists_for_patient()
& patients.sex.is_in(["male", "female"])
& has_pf_consultation
),
intervals=months(monthly_intervals).starting_on(start_date),
)
measures.define_defaults(denominator=denominator)

# Measures for PF consultations with PF medication
# Measures linked by Pharmacy First consultation ID
measures.define_measure(
name="pf_with_pfmed",
numerator=has_pf_medication,
intervals=months(monthly_intervals).starting_on(start_date),
name="pfmed_with_pfid",
numerator=has_pf_id_medication,
)
# Measures for PF consultations with PF condition

measures.define_measure(
name="pf_with_pfcondition",
numerator=has_pf_condition,
intervals=months(monthly_intervals).starting_on(start_date),
name="pfcondition_with_pfid",
numerator=has_pf_id_condition,
)

# Measures for PF consultations with both PF medication and condition
measures.define_measure(
name="pf_with_pfmed_and_pfcondition",
numerator=has_pf_condition & has_pf_medication,
intervals=months(monthly_intervals).starting_on(start_date),
name="pfmed_and_pfcondition_with_pfid",
numerator=has_pf_id_medication & has_pf_id_condition,
)

# Measures linked by Pharmacy First consultation date
measures.define_measure(
name="pfmed_on_pfdate",
numerator=has_pf_date_medication,
)

measures.define_measure(
name="pfcondition_on_pfdate",
numerator=has_pf_date_condition,
)

measures.define_measure(
name="pfmed_and_pfcondition_on_pfdate",
numerator=has_pf_date_medication & has_pf_date_condition,
)
39 changes: 39 additions & 0 deletions lib/functions/create_tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,42 @@ create_clinical_conditions_codes_table <- function(title) {
heading.subtitle.font.size = "small"
)
}

# Create top 5 table grouped by pharmacy_first_med status
# Data needs to have the following columns:
# pharmacy_first_med
# term
# count
# ratio_by_group
gt_top_meds <- function(data) {
data |>
gt(
groupname_col = "pharmacy_first_med",
rowname_col = "term"
) %>%
tab_header(
title = "Top 5 medications linked to Pharmacy First consultations",
subtitle = "Timeframe: 1st Feb 2024 to 31st July 2024"
) %>%
cols_label(
term = md("**Medication**"),
count = md("**Count**"),
ratio_by_group = md("**%**")
) %>%
fmt_number(
columns = count,
decimals = 0
) %>%
fmt_percent(
columns = ratio_by_group,
decimals = 1
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups(groups = everything())
) %>%
tab_stub_indent(
rows = everything(),
indent = 3
)
}
36 changes: 36 additions & 0 deletions lib/functions/eps_erd_prescribing_data.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
library(tidyverse)
library(janitor)
library(here)
library(httr)

# Function to download and read the xlsx files
read_xlsx_from_url <- function(url_list, sheet = NULL, skip = NULL, ...) {
temp_file <- tempfile(fileext = ".xlsx")
GET(
url_list,
write_disk(temp_file, overwrite = TRUE)
)
readxl::read_xlsx(
temp_file,
col_names = TRUE,
.name_repair = janitor::make_clean_names,
sheet = sheet,
skip = skip,
...
)
}

df <- read_xlsx_from_url(
"https://github.com/user-attachments/files/17774058/EPS.and.eRD.Prescribing.Dashboard.July.2024.xlsx",
skip = 2,
sheet = "Historical Data"
)

df_filtered <- df %>%
select(month, region_code, practice_code, eps_items, erd_items) %>%
filter(month %in% c(202402, 202403, 202404, 202405, 202406, 202407)) %>%
mutate(month = ym(month))

df_filtered |> write_csv(
here("lib", "validation", "data", "eps_erd_prescribing_2024-02-01_to_2024-07-01.csv")
)
11 changes: 10 additions & 1 deletion project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,13 @@ actions:
--output output/population/pf_population.csv.gz
outputs:
highly_sensitive:
cohort: output/population/pf_population.csv.gz
cohort: output/population/pf_population.csv.gz

generate_pf_med_counts_measures:
run: >
ehrql:v1 generate-measures analysis/measures_definition_pf_consultation_med_counts.py
--dummy-tables dummy_tables
--output output/measures/consultation_med_counts_measures.csv
outputs:
moderately_sensitive:
measure: output/measures/consultation_med_counts_measures.csv

0 comments on commit 2ac4e21

Please sign in to comment.