From aacb97e60d878e879095c3dc4630d752b99fcbbe Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Mon, 2 Dec 2024 09:09:31 +0000 Subject: [PATCH] Improve figures and save as `.png` files --- reports/pharmacy_first_report.Rmd | 503 +++++++++++------------------- 1 file changed, 187 insertions(+), 316 deletions(-) diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index 50c1e09..f53a0a0 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -16,116 +16,25 @@ library(tidyverse) library(here) library(readr) library(gt) +library(patchwork) ``` ```{r load-data, message=FALSE, warning=FALSE} -# Load plotting function -source(here::here("lib", "functions", "tidy_measures.R")) -source(here::here("lib", "functions", "plot_measures.R")) - -# Load data based on environment -if (Sys.getenv("OPENSAFELY_BACKEND") != "") { - # Load data from generate_pf_measures action - df_measures <- readr::read_csv( - here::here("output", "measures", "pf_codes_conditions_measures.csv") - ) -} else { - # Load data from released_output directory - df_measures <- readr::read_csv( - here::here("released_output", "measures", "pf_codes_conditions_measures.csv") - ) -} - -# Load validation data (NHS BSA) -df_bsa_validation <- read_csv(here("lib", "validation", "data", "pf_consultation_validation_data.csv")) -df_bsa_meds <- read_csv(here("lib", "validation", "data", "pf_medication_validation_data.csv")) - -df_descriptive_stats <- read_csv(here("released_output", "measures", "pf_descriptive_stats_measures.csv")) -df_pfmed <- read_csv(here("released_output", "measures", "pf_medications_measures.csv")) -df_condition_provider <- read_csv(here("released_output", "measures", "pf_condition_provider_measures.csv")) - -# Define dictionaries with tidy names and mappings for measures -pf_measures_name_dict <- list( - consultation_service = "Consultation Service", - pharmacy_first_service = "Pharmacy First Consultation", - combined_pf_service = "Pharmacy First Consultations (Combined)", - acute_otitis_media = "Acute Otitis Media", - herpes_zoster = "Herpes Zoster", - acute_sinusitis = "Acute Sinusitis", - impetigo = "Impetigo", - infected_insect_bite = "Infected Insect Bite", - acute_pharyngitis = "Acute Pharyngitis", - uncomplicated_urinary_tract_infection = "UTI" -) - -pf_measures_name_mapping <- list( - consultation_service = "clinical_service", - pharmacy_first_service = "clinical_service", - combined_pf_service = "pharmacy_first_services", - acute_otitis_media = "clinical_condition", - herpes_zoster = "clinical_condition", - acute_sinusitis = "clinical_condition", - impetigo = "clinical_condition", - infected_insect_bite = "clinical_condition", - acute_pharyngitis = "clinical_condition", - uncomplicated_urinary_tract_infection = "clinical_condition" -) - -pf_measures_groupby_dict <- list( - age_band = "Age band", - sex = "Sex", - imd = "IMD", - region = "Region", - ethnicity = "Ethnicity" -) - -df_measures <- tidy_measures( - data = df_measures, - pf_measures_name_dict = pf_measures_name_dict, - pf_measures_name_mapping = pf_measures_name_mapping, - pf_measures_groupby_dict = pf_measures_groupby_dict -) - -df_measures$ethnicity <- factor( - df_measures$ethnicity, - levels = c( - "White", "Mixed", "Asian or Asian British", - "Black or Black British", "Chinese or Other Ethnic Groups", - "Missing" - ), - ordered = TRUE -) - -df_measures$age_band <- factor( - df_measures$age_band, - levels = c( - "0-19", "20-39", "40-59", - "60-79", "80+", - "Missing" - ), - ordered = TRUE -) - -df_measures$region <- factor( - df_measures$region, - levels = c( - "East", "East Midlands", "London", - "North East", "North West", "South East", - "South West", "West Midlands", "Yorkshire and The Humber", - "Missing" - ), - ordered = TRUE -) - -df_measures <- df_measures %>% - mutate(sex = factor(sex, levels = c("female", "male"), labels = c("Female", "Male"))) - -df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" - -gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey") -region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey") -ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey") -sex_palette <- c("red", "blue") +# Load functions +source(here("lib", "functions", "tidy_measures.R")) +source(here("lib", "functions", "plot_measures.R")) + +# Load validation data: +# - df_bsa_medication_validation: date, pharmacy_advanced_service, bnf_paragraph, count +# - df_bsa_consultation_validation: date, consultation_type, source, count_method, count +source(here("lib", "functions", "load_validation_data.R")) + +# Load opensafely ouputs: +# - df_measures: measure, interval_start, interval_end, ratio numerator, denominator, age_band, sex,imd, region, ethnicity +# - df_descriptive_stats: measure, interval_start, interval_end, ratio numerator, denominator +# - df_pfmed: measure, interval_start, interval_end, ratio, numerator, denominator, dmd_code +# - df_condition_provider: measure, interval_start, interval_end, ratio, numerator, denominator, pf_status, imd +source(here("lib", "functions", "load_opensafely_outputs.R")) ``` # Background @@ -310,35 +219,6 @@ The [Pregnancy Codelist](https://www.opencodelists.org/codelist/nhsd-primary-car We used the [Ethnicity Codelist](https://www.opencodelists.org/codelist/opensafely/ethnicity-snomed-0removed/2e641f61/) identify ethnicity in Electronic Health Records. To ensure comprehensive ethnicity data, we supplemented missing ethnicity values with data from the Secondary Uses Service (SUS). -```{r echo=FALSE} -ethnicity_table <- data.frame( - Code = c(1, 2, 3, 4, 5), - Ethnicity = c( - "White", - "Mixed", - "Asian or Asian British", - "Black or Black British", - "Chinese or Other Ethnic Groups" - ) -) - -# ethnicity_table %>% -# gt() %>% -# tab_header( -# title = "Table 2. Ethnic Group Codes", -# subtitle = "Codes representing different ethnic groups in the study" -# ) %>% -# cols_label( -# Code = "Code", -# `Ethnicity` = "Ethnic Group" -# ) %>% -# tab_options( -# table.font.size = "medium", -# heading.title.font.size = "large", -# heading.subtitle.font.size = "small" -# ) -``` - # Results ### Total population @@ -772,226 +652,216 @@ plot_measures( ) + scale_color_manual(values = ethnicity_palette) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE} -# Multiply by 0.4 to get 40% of data -df_bsa_validation <- df_bsa_validation %>% - mutate(count_40pct = round(as.numeric(count * .4), digits = 0)) - # OpenSAFELY data for clinical conditions into a tidy df -df_opensafely <- df_measures %>% +df_opensafely_validation <- df_measures %>% filter(measure_desc == "clinical_condition") %>% - filter(interval_start >= as.Date("2024-02-01") & interval_start <= as.Date("2024-07-30")) %>% + # filter(interval_start >= as.Date("2024-02-01") & interval_start <= as.Date("2024-07-30")) %>% filter(is.na(group_by)) %>% - select(date = interval_start, consultation_type = measure, count = numerator) - -# Add a new column to each data frame to identify the source -df_opensafely <- df_opensafely %>% - mutate(source = "OS") -df_bsa_validation <- df_bsa_validation %>% - mutate(source = "BSA") - -# Drop the original 'count' column from the BSA data to allow for easy consistent grouping by 'count' -df_validation_condition_counts <- df_bsa_validation %>% - select(-count) %>% - rename(count = count_40pct) -# Combining rows from OS and BSA dataframes -df_validation_condition_counts <- bind_rows(df_opensafely, df_validation_condition_counts) - -# format both sources of data as MM-YYYY -df_validation_condition_counts <- df_validation_condition_counts %>% - mutate(month = format(as.Date(date), "%m-%Y")) - -# Pivot the data so that we get two sub columns per month for each source (OS and BSA) -df_pivoted <- df_validation_condition_counts %>% - pivot_wider(names_from = c(month, source), values_from = count) - -# Changing names to make column naming the same to ensure grouping -df_pivoted <- df_pivoted %>% + select(date = interval_start, consultation_type = measure, count = numerator) %>% mutate( - consultation_type = recode(consultation_type, - "sinusitis" = "Acute Sinusitis", - "infected_insect_bites" = "Infected Insect Bite", - "uncomplicated_uti" = "UTI", - "acute_otitis_media" = "Acute Otitis Media", - "acute_sore_throat" = "Acute Pharyngitis", - "shingles" = "Herpes Zoster", - "impetigo" = "Impetigo", - ) - ) -# Removing date column as this will prevent grouping (date is already pivot columns) -df_pivoted <- df_pivoted %>% - select(-date) + source = "opensafely", + count_method = "opensafely_tpp" + ) |> + filter(date >= "2024-01-01") %>% + relocate(date, consultation_type, source, count_method, count) -# Group by consultation type and summarise to get rid of NAs and multiple consultation_types of same name -df_pivoted <- df_pivoted %>% - group_by(consultation_type) %>% - summarise(across(everything(), sum, na.rm = TRUE), .groups = "drop") - -# Now create the gt table -tab_pf_conditions_validation <- df_pivoted %>% - gt() %>% - tab_header( - title = "Validation Data of Pharmacy First Clinical Condition Counts", - subtitle = "Timeframe: 1st Feb 2024 to 31st July 2024" - ) %>% - cols_label( - consultation_type = "Clinical Condition", - `02-2024_OS` = "OS", - `02-2024_BSA` = "BSA", - `03-2024_OS` = "OS", - `03-2024_BSA` = "BSA", - `04-2024_OS` = "OS", - `04-2024_BSA` = "BSA", - `05-2024_OS` = "OS", - `05-2024_BSA` = "BSA", - `06-2024_OS` = "OS", - `06-2024_BSA` = "BSA", - `07-2024_OS` = "OS", - `07-2024_BSA` = "BSA" - ) %>% - tab_spanner( - label = "February 2024", - columns = c(`02-2024_OS`, `02-2024_BSA`) - ) %>% - tab_spanner( - label = "March 2024", - columns = c(`03-2024_OS`, `03-2024_BSA`) - ) %>% - tab_spanner( - label = "April 2024", - columns = c(`04-2024_OS`, `04-2024_BSA`) - ) %>% - tab_spanner( - label = "May 2024", - columns = c(`05-2024_OS`, `05-2024_BSA`) - ) %>% - tab_spanner( - label = "June 2024", - columns = c(`06-2024_OS`, `06-2024_BSA`) - ) %>% - tab_spanner( - label = "July 2024", - columns = c(`07-2024_OS`, `07-2024_BSA`) - ) %>% - tab_footnote( - footnote = "OS - OpenSAFELY data, BSA - Validation data (NHS BSA)" - ) %>% - fmt_number( - columns = everything(), - decimals = 0 - ) - -tab_pf_conditions_validation -``` - -```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -df_long <- df_pivoted %>% pivot_longer( - cols = c("02-2024_OS", "02-2024_BSA", "03-2024_OS", "03-2024_BSA", "04-2024_OS", "04-2024_BSA", "05-2024_OS", "05-2024_BSA", "06-2024_OS", "06-2024_BSA", "07-2024_OS", "07-2024_BSA"), - names_to = c("month", "source"), - names_sep = "_", - values_to = "count" -) -# Changing format of date to use label_date_short to keep dates consistent for figures -df_long$month <- as.Date(paste0("01-", df_long$month), format = "%d-%m-%Y") +# Combining rows from OS and BSA validation dataframes +df_validation_condition_counts <- bind_rows(df_opensafely_validation, df_bsa_consultation_validation) # Line graph comparing clinical condition counts of BSA and OS data -validation_total_counts_figure <- ggplot(df_long, aes(x = month, y = count, color = consultation_type, group = consultation_type)) + - geom_point() + +plot_validation_condition_count <- df_validation_condition_counts %>% + filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% + mutate(source = factor(source, + levels = c("opensafely", "nhs_bsa"), + labels = c("OpenSAFELY-TPP", "NHS BSA (40%)") + )) %>% + ggplot( + aes( + x = date, + y = count, + shape = consultation_type, + color = consultation_type, + fill = consultation_type, + group = consultation_type + ) + ) + + geom_point(size = 2.5) + geom_line(size = 0.5) + facet_wrap(~source, scales = "free_y") + - labs( - title = "Clinical Conditions Count by Month (NHS BSA vs OpenSAFELY Data)", - x = "Month", y = "Count", color = "Clinical Condition" + scale_x_date( + labels = scales::label_date_short() + ) + + labs(x = NULL, y = "Count", colour = NULL, shape = NULL, fill = NULL) + + scale_color_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_fill_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_shape_manual( + values = c( + "Acute Sinusitis" = 15, + "Infected Insect Bite" = 19, + "UTI" = 4, + "Acute Otitis Media" = 23, + "Acute Pharyngitis" = 3, + "Herpes Zoster" = 17, + "Impetigo" = 8 + ) ) + theme( - plot.title = element_text(hjust = 0.5) + text = element_text(size = 14) ) + - scale_x_date( - labels = scales::label_date_short() - ) - -validation_total_counts_figure -``` - -```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -df_consultation_totals <- df_long %>% - group_by(month, source) %>% - summarise(count = sum(count), - .groups = 'drop') - -# Line graph comparing clinical condition counts of BSA and OS data -validation_total_consultation_counts_figure <- ggplot(df_consultation_totals, aes(x = month, y = count)) + - geom_point() + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_y_continuous(labels = scales::number) + +# Another plot visualising the percentage +plot_validation_condition_pct <- df_validation_condition_counts %>% + filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% + pivot_wider(names_from = c(source, count_method), values_from = count) %>% + mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>% + ggplot( + aes( + x = date, + y = opensafely_opensafely_tpp / nhs_bsa_count_40pct, + shape = consultation_type, + color = consultation_type, + fill = consultation_type, + group = consultation_type + ) + ) + + geom_point(size = 2.5) + geom_line(size = 0.5) + facet_wrap(~source, scales = "free_y") + - labs( - title = "Consultations by Month (NHS BSA vs OpenSAFELY Data)", - x = "Month", y = "Count" + scale_x_date( + labels = scales::label_date_short() + ) + + labs(x = NULL, y = "Percent", colour = NULL, shape = NULL, fill = NULL) + + scale_color_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_fill_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_shape_manual( + values = c( + "Acute Sinusitis" = 15, + "Infected Insect Bite" = 19, + "UTI" = 4, + "Acute Otitis Media" = 23, + "Acute Pharyngitis" = 3, + "Herpes Zoster" = 17, + "Impetigo" = 8 + ) ) + theme( - plot.title = element_text(hjust = 0.5), - axis.title.x = element_blank() + text = element_text(size = 14) ) + - scale_x_date( - labels = scales::label_date_short() - ) + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_y_continuous(labels = scales::percent) -validation_total_consultation_counts_figure +# Combining the plots with patchwork +plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_validation_condition_pct) + + plot_annotation(tag_levels = "A") + + plot_layout(guides = "collect", widths = c(2, 1)) & + theme( + legend.position = "bottom", + text = element_text(size = 15), + strip.background = element_rect(size = 0), + strip.text.x = element_text(size = 13, face = "bold") + ) +ggsave( + here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"), + plot_validation_condition_count_pct, + width = 15, height = 6 +) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} # Line graph comparing clinical condition counts of BSA and OS data -df_descriptive_stats <- df_descriptive_stats %>% +plot_pf_descriptive_stats <- df_descriptive_stats %>% mutate( - measure = recode(measure, - "pf_with_pfmed" = "PF Med", - "pf_with_pfcondition" = "PF Condition", - "pf_with_pfmed_and_pfcondition" = "PF Med & PF Condition", + measure = factor(measure, + levels = c("pf_with_pfmed", "pf_with_pfcondition", "pf_with_pfmed_and_pfcondition"), + labels = c("PF Med", "PF Condition", "PF Med & PF Condition") ) - ) - -descriptive_stats_figure <- ggplot(df_descriptive_stats, aes(x = interval_start, y = ratio, color = measure, group = measure)) + - geom_point() + + ) |> + ggplot(aes( + x = interval_start, + y = ratio, + colour = measure, + shape = measure, + )) + + geom_point(size = 2.5) + geom_line(size = 0.5) + - # facet_wrap(~ measure, scales = "free_y") + labs( - title = "Breakdown of PF consultations with linked PF conditions and medications", - color = "PF consultation with:" - ) + - theme( - plot.title = element_text(hjust = 0.5) + x = NULL, + y = NULL, + shape = "PF consultation with:", + colour = "PF consultation with:" ) + scale_x_date( - labels = scales::label_date_short() + labels = scales::label_date_short(), breaks = "month" ) + scale_y_continuous( labels = scales::percent, ) + theme( - axis.title.x = element_blank(), - axis.title.y = element_blank() - ) + text = element_text(size = 14) + ) + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_colour_brewer(palette = "Dark2") -descriptive_stats_figure + +plot_pf_descriptive_stats + +ggsave( + here("released_output", "results", "figures", "plot_pf_descriptive_stats.png"), + plot_pf_descriptive_stats, + width = 10, height = 6 +) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} - # Validation of pharmacy first medication counts figure # OS data - waiting on released output df_bsa_total_meds <- df_bsa_meds %>% group_by(date) %>% - summarise(total_meds = sum(count) * 0.4, - .groups = 'drop') %>% - mutate(source = "BSA") %>% - filter(date <= "2024-07-01") + summarise( + total_meds = sum(count) * 0.4, + .groups = "drop" + ) %>% + mutate(source = "BSA") %>% + filter(date <= "2024-07-01") df_pfmed_total <- df_pfmed %>% rename(date = interval_start) %>% - group_by(date) %>% - summarise(total_meds = sum(numerator), - .groups = 'drop') %>% + group_by(date) %>% + summarise( + total_meds = sum(numerator), + .groups = "drop" + ) %>% mutate(source = "OS") %>% filter(date >= "2024-02-01") @@ -1016,12 +886,14 @@ fig_validation_med_counts <- ggplot(df_validation_med_counts, aes(x = date, y = fig_validation_med_counts ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -# GP vs PF provider graph +# GP vs PF provider graph -df_condition_provider_grouped <- df_condition_provider %>% - group_by(measure, interval_start, pf_status) %>% - summarise(count = sum(numerator), - .groups = 'drop') %>% +df_condition_provider_grouped <- df_condition_provider %>% + group_by(measure, interval_start, pf_status) %>% + summarise( + count = sum(numerator), + .groups = "drop" + ) %>% mutate( measure = recode(measure, "count_acute_sinusitis_total" = "Acute Sinusitis", @@ -1056,7 +928,6 @@ figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start, ) figure_gp_vs_pf - ``` # References