Merge pull request #70 from opensafely/viv3ckj/update-descriptive-sta…

…ts-measures Viv3ckj/update descriptive stats measures
opensafely · Nov 22, 2024 · deb5c6f · deb5c6f
2 parents 00ce545 + f21c4ac
commit deb5c6f
Show file tree

Hide file tree

Showing 7 changed files with 121 additions and 39 deletions.
diff --git a/analysis/codelists.py b/analysis/codelists.py
@@ -55,3 +55,16 @@
     "codelists/opensafely-pharmacy-first-urinary-tract-infection-treatment-dmd.csv",
     column="code",
 )
+
+pharmacy_first_med_codelist = (
+    acute_otitis_media_tx_cod
+    + impetigo_treatment_tx_cod
+    + infected_insect_bites_tx_cod
+    + shingles_treatment_tx_cod
+    + sinusitis_tx_cod
+    + sore_throat_tx_cod
+    + urinary_tract_infection_tx_cod
+)
+# Community Pharmacist Consultation Service for minor illness - 1577041000000109
+# Pharmacy First service - 983341000000102
+pharmacy_first_consultation_codelist = ["1577041000000109", "983341000000102"]
diff --git a/analysis/measures_definition_pf_descriptive_stats.py b/analysis/measures_definition_pf_descriptive_stats.py
@@ -1,66 +1,76 @@
 from ehrql import INTERVAL, create_measures, months
-from ehrql.tables.tpp import (
-    practice_registrations,
-    patients,
-)
+from ehrql.tables.raw.tpp import medications
+from ehrql.tables.tpp import practice_registrations, patients, clinical_events
 
-from pf_dataset import pharmacy_first_event_codes
-from measures_definition_pf_medications import pharmacy_first_med_codes
-from measures_definition_pf_breakdown import pharmacy_first_conditions_codes, selected_events
+from pf_variables_library import get_consultation_ids, get_consultationid_events
+from codelists import (
+    pharmacy_first_med_codelist,
+    pharmacy_first_consultation_codelist,
+    pharmacy_first_conditions_codelist,
+)
 
 measures = create_measures()
 measures.configure_dummy_data(population_size=1000)
+measures.configure_disclosure_control(enabled=True)
 
 start_date = "2024-02-01"
 monthly_intervals = 9
 
 registration = practice_registrations.for_patient_on(INTERVAL.end_date)
 
-# Loop through all codes in each sublist of the dictionary to flatten the list ready for is_in commands to be used and have a list of pf_condition codes
-pf_condition_codelist = [code for sublist in pharmacy_first_conditions_codes.values() for code in sublist]
+# Function to retrieve consultation ids from clinical events that are PF consultations
+pharmacy_first_ids = get_consultation_ids(
+    clinical_events, pharmacy_first_consultation_codelist
+)
+
+# Function to retrieve selected events using pharmacy first ids
+selected_clinical_events = get_consultationid_events(
+    clinical_events, pharmacy_first_ids
+).where(clinical_events.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date))
+
+selected_med_events = get_consultationid_events(medications, pharmacy_first_ids).where(
+    medications.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date)
+)
 
 # Create variable which contains boolean values of whether pharmacy first event exists for patient
-has_pf_consultation = selected_events.where(
-    selected_events.snomedct_code.is_in(
-        pharmacy_first_event_codes["combined_pf_service"]
-    )
+has_pf_consultation = selected_clinical_events.where(
+    selected_clinical_events.snomedct_code.is_in(pharmacy_first_consultation_codelist)
 ).exists_for_patient()
 
 # PF consultations with PF clinical condition
-has_pf_condition = selected_events.where(
-    selected_events.snomedct_code.is_in(
-        pf_condition_codelist
-    )
+has_pf_condition = selected_clinical_events.where(
+    selected_clinical_events.snomedct_code.is_in(pharmacy_first_conditions_codelist)
 ).exists_for_patient()
 
 # PF consultations with prescribed PF medication
-has_pf_medication = selected_events.where(
-    selected_events.snomedct_code.is_in(
-        pharmacy_first_med_codes
-    )
+has_pf_medication = selected_med_events.where(
+    selected_med_events.dmd_code.is_in(pharmacy_first_med_codelist)
 ).exists_for_patient()
 
 # Define the denominator as the number of patients registered
-denominator = registration.exists_for_patient() & patients.sex.is_in(["male", "female"]) & has_pf_consultation
-measures.define_defaults(
-    denominator = denominator)
+denominator = (
+    registration.exists_for_patient()
+    & patients.sex.is_in(["male", "female"])
+    & has_pf_consultation
+)
+measures.define_defaults(denominator=denominator)
 
 # Measures for PF consultations with PF medication
 measures.define_measure(
-    name="count_pfmed_status",
+    name="pf_with_pfmed",
     numerator=has_pf_medication,
     intervals=months(monthly_intervals).starting_on(start_date),
 )
 # Measures for PF consultations with PF condition
 measures.define_measure(
-    name="count_pfcondition_status",
+    name="pf_with_pfcondition",
     numerator=has_pf_condition,
     intervals=months(monthly_intervals).starting_on(start_date),
 )
 
 # Measures for PF consultations with both PF medication and condition
 measures.define_measure(
-    name="count_pfmed_and_pfcondition_status",
+    name="pf_with_pfmed_and_pfcondition",
     numerator=has_pf_condition & has_pf_medication,
     intervals=months(monthly_intervals).starting_on(start_date),
 )
diff --git a/analysis/pf_codelist_functions.py b/analysis/pf_codelist_functions.py
@@ -0,0 +1,10 @@
+# Function which formats the unformatted conditions codelist, and retrieves its codes
+# Unused in descriptive_stats.py, but will be used in later tickets (streamline breakdown.py)
+def get_pf_condition_codes(pharmacy_first_conditions_codelist):
+    pharmacy_first_conditions_codes = {}
+    for codes, term in pharmacy_first_conditions_codelist.items():
+        normalised_term = term.lower().replace(" ", "_")
+        codes = [codes]
+        pharmacy_first_conditions_codes[normalised_term] = codes
+
+    return pharmacy_first_conditions_codes
diff --git a/analysis/pf_variables_library.py b/analysis/pf_variables_library.py
@@ -23,3 +23,20 @@ def count_past_events(index_date, selected_events, codelist, num_months):
         )
         .count_for_patient()
     )
+
+
+# Function to get consultation IDs linked to a specified codelist
+def get_consultation_ids(clinical_events, codelist):
+    consultation_ids = clinical_events.where(
+        clinical_events.snomedct_code.is_in(codelist)
+    ).consultation_id
+
+    return consultation_ids
+
+
+# Function to get events with specific consultation IDs
+def get_consultationid_events(event_frame, consultation_ids):
+    selected_events = event_frame.where(
+        event_frame.consultation_id.is_in(consultation_ids)
+    )
+    return selected_events
diff --git a/dummy_tables/clinical_events.csv b/dummy_tables/clinical_events.csv
@@ -1,12 +1,12 @@
 patient_id,consultation_id,date,snomedct_code
-1,1,2023-12-01,1577041000000109
-1,1,2023-12-01,15805002
-2,2,2023-12-01,1577041000000109
-2,2,2023-12-01,100
-3,3,2023-12-01,1577041000000109
-3,3,2023-12-01,15805002
-4,4,2023-12-01,1577041000000109
-4,4,2023-12-01,15805002
-4,4,2023-12-01,39692111000001101
-5,5,2023-12-01,100000
-5,5,2023-12-01,15805002
+1,1,2024-03-01,1577041000000109
+1,1,2024-03-01,15805002
+2,2,2024-03-01,1577041000000109
+2,2,2024-03-01,100
+3,3,2024-03-01,1577041000000109
+3,3,2024-03-01,15805002
+4,4,2024-03-01,1577041000000109
+4,4,2024-03-01,15805002
+4,4,2024-03-01,39692111000001101
+5,5,2024-03-01,100000
+5,5,2024-03-01,15805002
diff --git a/dummy_tables/medications_raw.csv b/dummy_tables/medications_raw.csv
@@ -2,7 +2,7 @@ patient_id,consultation_id,date,dmd_code
 1,1,2023-12-01,10000000000000000
 2,2,2023-12-01,10000000000000000
 3,3,2023-12-01,10000000000000000
-4,4,2023-12-01,10000000000000000
+4,4,2024-03-01,39692111000001101
 5,5,2023-12-01,10000000000000000
 6,6,2023-12-01,10000000000000000
 7,7,2023-12-01,10000000000000000

diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd
@@ -39,6 +39,8 @@ if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
 # Load validation data (NHS BSA)
 df_bsa_validation <- read_csv(here("lib", "validation", "data", "pf_consultation_validation_data.csv"))
 
+df_descriptive_stats <- read_csv(here("output", "measures", "pf_descriptive_stats_measures.csv"))
+
 # Define dictionaries with tidy names and mappings for measures
 pf_measures_name_dict <- list(
   consultation_service = "Consultation Service",
@@ -906,4 +908,34 @@ validation_total_counts_figure <- ggplot(df_long, aes(x = month, y = count, colo
 validation_total_counts_figure
 ```
 
+```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
+# Line graph comparing clinical condition counts of BSA and OS data
+df_descriptive_stats <- df_descriptive_stats %>%
+  mutate(
+    measure = recode(measure,
+    "pf_with_pfmed" = "PF Med",
+    "pf_with_pfcondition" = "PF Condition",
+    "pf_with_pfmed_and_pfcondition" = "PF Med & PF Condition",
+    ))
+
+descriptive_stats_figure <- ggplot(df_descriptive_stats, aes(x = interval_start, y = ratio, color = measure, group = measure)) +
+  geom_point() +
+  geom_line(size = 0.5) +
+  # facet_wrap(~ measure, scales = "free_y") +
+  labs(title = "Breakdown of PF consultations with linked PF conditions and medications",
+      color = "PF consultation with:") +
+  theme(
+    plot.title = element_text(hjust = 0.5)) +
+  scale_x_date(
+      labels = scales::label_date_short()
+  ) +
+  scale_y_continuous(labels = scales::percent,
+  limits = c(0,1)) + 
+  theme(axis.title.x = element_blank(),
+    axis.title.y = element_blank())
+
+descriptive_stats_figure
+
+```
+
 # References