From 70e8857de5324dcf6a70c40d6b7a56c8eca11c60 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Sun, 1 Dec 2024 17:51:16 +0000 Subject: [PATCH 1/7] Add more validation data --- .../get_pf_consultation_validation_data.R | 3 +- .../get_pf_medication_validation_data.R | 3 +- .../data/pf_consultation_validation_data.csv | 28 +++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/lib/functions/get_pf_consultation_validation_data.R b/lib/functions/get_pf_consultation_validation_data.R index 6d64c7a..f62b678 100644 --- a/lib/functions/get_pf_consultation_validation_data.R +++ b/lib/functions/get_pf_consultation_validation_data.R @@ -100,7 +100,7 @@ get_dispensing_data <- function(start_date = NULL, end_date = NULL) { } # Calculate summary of counts -df_dispensing_data <- get_dispensing_data(start_date = "2024-02-01") +df_dispensing_data <- get_dispensing_data(start_date = "2023-11-01") df_dispensing_data_summary <- df_dispensing_data |> group_by(date) |> @@ -132,4 +132,5 @@ df_dispensing_data_summary <- df_dispensing_data |> ) |> mutate(consultation_type = str_replace(consultation_type, "^n_pf_consultation_", "")) +range(df_dispensing_data_summary$date) write_csv(df_dispensing_data_summary, here("lib", "validation", "data", "pf_consultation_validation_data.csv")) diff --git a/lib/functions/get_pf_medication_validation_data.R b/lib/functions/get_pf_medication_validation_data.R index 7536ff5..4c4eef4 100644 --- a/lib/functions/get_pf_medication_validation_data.R +++ b/lib/functions/get_pf_medication_validation_data.R @@ -69,7 +69,7 @@ get_dataset_table_names <- function(dataset_id, start_date = NULL, end_date = NU dataset_tables } -get_dataset_table_names("prescription-cost-analysis-pca-monthly-data", "2024-09-01") +get_dataset_table_names("prescription-cost-analysis-pca-monthly-data", "2024-01-01") construct_sql_query <- function(table_name, sql_query) { gsub("{FROM_TABLE}", sprintf("FROM `%s`", table_name), sql_query, fixed = TRUE) @@ -108,6 +108,7 @@ df_validate <- get_nhsbsa_data("prescription-cost-analysis-pca-monthly-data", sq names(df_validate) unique(df_validate$pharmacy_advanced_service) +range(df_validate$date) pf_medication_validation_data <- df_validate |> select(date, snomed_code, pharmacy_advanced_service, bnf_section, bnf_paragraph, items) |> diff --git a/lib/validation/data/pf_consultation_validation_data.csv b/lib/validation/data/pf_consultation_validation_data.csv index 7bfc6e0..7a1c139 100644 --- a/lib/validation/data/pf_consultation_validation_data.csv +++ b/lib/validation/data/pf_consultation_validation_data.csv @@ -1,4 +1,25 @@ date,consultation_type,count +2023-11-01,acute_otitis_media,0 +2023-11-01,acute_sore_throat,0 +2023-11-01,impetigo,0 +2023-11-01,infected_insect_bites,0 +2023-11-01,shingles,0 +2023-11-01,sinusitis,0 +2023-11-01,uncomplicated_uti,0 +2023-12-01,acute_otitis_media,0 +2023-12-01,acute_sore_throat,0 +2023-12-01,impetigo,0 +2023-12-01,infected_insect_bites,0 +2023-12-01,shingles,0 +2023-12-01,sinusitis,0 +2023-12-01,uncomplicated_uti,0 +2024-01-01,acute_otitis_media,0 +2024-01-01,acute_sore_throat,0 +2024-01-01,impetigo,0 +2024-01-01,infected_insect_bites,0 +2024-01-01,shingles,0 +2024-01-01,sinusitis,0 +2024-01-01,uncomplicated_uti,0 2024-02-01,acute_otitis_media,18893 2024-02-01,acute_sore_throat,41014 2024-02-01,impetigo,6816 @@ -41,3 +62,10 @@ date,consultation_type,count 2024-07-01,shingles,5103 2024-07-01,sinusitis,14115 2024-07-01,uncomplicated_uti,51321 +2024-08-01,acute_otitis_media,17915 +2024-08-01,acute_sore_throat,40771 +2024-08-01,impetigo,7448 +2024-08-01,infected_insect_bites,44391 +2024-08-01,shingles,5151 +2024-08-01,sinusitis,10970 +2024-08-01,uncomplicated_uti,53389 From d89d6e316f65451d0a1ca457984f84ada8ee8aca Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Mon, 2 Dec 2024 09:07:08 +0000 Subject: [PATCH 2/7] Refactor data loading and tidying into separate scripts This will improve readability and modularity and make it easier for us to make further changes later on. It's only a start, there's much more we could do. --- lib/functions/load_opensafely_outputs.R | 88 +++++++++++++++++++++++++ lib/functions/load_validation_data.R | 42 ++++++++++++ lib/functions/plot_measures.R | 6 ++ lib/functions/tidy_measures.R | 35 ++++++++++ 4 files changed, 171 insertions(+) create mode 100644 lib/functions/load_opensafely_outputs.R create mode 100644 lib/functions/load_validation_data.R diff --git a/lib/functions/load_opensafely_outputs.R b/lib/functions/load_opensafely_outputs.R new file mode 100644 index 0000000..46204bd --- /dev/null +++ b/lib/functions/load_opensafely_outputs.R @@ -0,0 +1,88 @@ +# Load data based on execution environment +if (Sys.getenv("OPENSAFELY_BACKEND") != "") { + # Load data from generate_pf_measures action + df_measures <- readr::read_csv( + here("output", "measures", "pf_codes_conditions_measures.csv") + ) + df_descriptive_stats <- read_csv( + here("output", "measures", "pf_descriptive_stats_measures.csv") + ) + df_pfmed <- read_csv( + here("output", "measures", "pf_medications_measures.csv") + ) + df_condition_provider <- read_csv( + here("output", "measures", "pf_condition_provider_measures.csv") + ) +} else { + # Load data from released_output directory + df_measures <- readr::read_csv( + here("released_output", "measures", "pf_codes_conditions_measures.csv") + ) + df_descriptive_stats <- read_csv( + here("released_output", "measures", "pf_descriptive_stats_measures.csv") + ) + df_pfmed <- read_csv( + here("released_output", "measures", "pf_medications_measures.csv") + ) + df_condition_provider <- read_csv( + here("released_output", "measures", "pf_condition_provider_measures.csv") + ) +} + +df_measures <- tidy_measures( + data = df_measures, + pf_measures_name_dict = pf_measures_name_dict, + pf_measures_name_mapping = pf_measures_name_mapping, + pf_measures_groupby_dict = pf_measures_groupby_dict +) + +df_measures$ethnicity <- factor( + df_measures$ethnicity, + levels = c( + "White", + "Mixed", + "Asian or Asian British", + "Black or Black British", + "Chinese or Other Ethnic Groups", + "Missing" + ), + ordered = TRUE +) + +df_measures$age_band <- factor( + df_measures$age_band, + levels = c( + "0-19", + "20-39", + "40-59", + "60-79", + "80+", + "Missing" + ), + ordered = TRUE +) + +df_measures$region <- factor( + df_measures$region, + levels = c( + "East", + "East Midlands", + "London", + "North East", + "North West", + "South East", + "South West", + "West Midlands", + "Yorkshire and The Humber", + "Missing" + ), + ordered = TRUE +) + +df_measures <- df_measures %>% + mutate(sex = factor(sex, + levels = c("female", "male"), + labels = c("Female", "Male") + )) + +df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" \ No newline at end of file diff --git a/lib/functions/load_validation_data.R b/lib/functions/load_validation_data.R new file mode 100644 index 0000000..539dc41 --- /dev/null +++ b/lib/functions/load_validation_data.R @@ -0,0 +1,42 @@ +library(readr) +library(tidyr) +library(dplyr) +library(here) + +df_bsa_consultation_validation <- read_csv( + here("lib", "validation", "data", "pf_consultation_validation_data.csv") +) %>% + rename(count_100pct = count) |> + mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>% + mutate(source = "nhs_bsa") |> + pivot_longer( + cols = c(count_100pct, count_40pct), + names_to = "count_method", + values_to = "count" + ) + +df_bsa_consultation_validation <- df_bsa_consultation_validation %>% + mutate(consultation_type = factor(consultation_type, + levels = c( + "sinusitis", + "infected_insect_bites", + "uncomplicated_uti", + "acute_otitis_media", + "acute_sore_throat", + "shingles", + "impetigo" + ), + labels = c( + "Acute Sinusitis", + "Infected Insect Bite", + "UTI", + "Acute Otitis Media", + "Acute Pharyngitis", + "Herpes Zoster", + "Impetigo" + ) + )) + +df_bsa_medication_validation <- read_csv( + here("lib", "validation", "data", "pf_medication_validation_data.csv") +) diff --git a/lib/functions/plot_measures.R b/lib/functions/plot_measures.R index 9525dfa..045ebcb 100644 --- a/lib/functions/plot_measures.R +++ b/lib/functions/plot_measures.R @@ -97,3 +97,9 @@ plot_measures <- function( plot_tmp } + +# Colour palettes +gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey") +region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey") +ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey") +sex_palette <- c("red", "blue") \ No newline at end of file diff --git a/lib/functions/tidy_measures.R b/lib/functions/tidy_measures.R index 2689ac4..44ab4e6 100644 --- a/lib/functions/tidy_measures.R +++ b/lib/functions/tidy_measures.R @@ -1,3 +1,38 @@ +# Define dictionaries with tidy names and mappings for measures +pf_measures_name_dict <- list( + consultation_service = "Consultation Service", + pharmacy_first_service = "Pharmacy First Consultation", + combined_pf_service = "Pharmacy First Consultations (Combined)", + acute_otitis_media = "Acute Otitis Media", + herpes_zoster = "Herpes Zoster", + acute_sinusitis = "Acute Sinusitis", + impetigo = "Impetigo", + infected_insect_bite = "Infected Insect Bite", + acute_pharyngitis = "Acute Pharyngitis", + uncomplicated_urinary_tract_infection = "UTI" +) + +pf_measures_name_mapping <- list( + consultation_service = "clinical_service", + pharmacy_first_service = "clinical_service", + combined_pf_service = "pharmacy_first_services", + acute_otitis_media = "clinical_condition", + herpes_zoster = "clinical_condition", + acute_sinusitis = "clinical_condition", + impetigo = "clinical_condition", + infected_insect_bite = "clinical_condition", + acute_pharyngitis = "clinical_condition", + uncomplicated_urinary_tract_infection = "clinical_condition" +) + +pf_measures_groupby_dict <- list( + age_band = "Age band", + sex = "Sex", + imd = "IMD", + region = "Region", + ethnicity = "Ethnicity" +) + #' Tidy measures data #' #' Creates a tidier dataframe of measures data. From aacb97e60d878e879095c3dc4630d752b99fcbbe Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Mon, 2 Dec 2024 09:09:31 +0000 Subject: [PATCH 3/7] Improve figures and save as `.png` files --- reports/pharmacy_first_report.Rmd | 503 +++++++++++------------------- 1 file changed, 187 insertions(+), 316 deletions(-) diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index 50c1e09..f53a0a0 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -16,116 +16,25 @@ library(tidyverse) library(here) library(readr) library(gt) +library(patchwork) ``` ```{r load-data, message=FALSE, warning=FALSE} -# Load plotting function -source(here::here("lib", "functions", "tidy_measures.R")) -source(here::here("lib", "functions", "plot_measures.R")) - -# Load data based on environment -if (Sys.getenv("OPENSAFELY_BACKEND") != "") { - # Load data from generate_pf_measures action - df_measures <- readr::read_csv( - here::here("output", "measures", "pf_codes_conditions_measures.csv") - ) -} else { - # Load data from released_output directory - df_measures <- readr::read_csv( - here::here("released_output", "measures", "pf_codes_conditions_measures.csv") - ) -} - -# Load validation data (NHS BSA) -df_bsa_validation <- read_csv(here("lib", "validation", "data", "pf_consultation_validation_data.csv")) -df_bsa_meds <- read_csv(here("lib", "validation", "data", "pf_medication_validation_data.csv")) - -df_descriptive_stats <- read_csv(here("released_output", "measures", "pf_descriptive_stats_measures.csv")) -df_pfmed <- read_csv(here("released_output", "measures", "pf_medications_measures.csv")) -df_condition_provider <- read_csv(here("released_output", "measures", "pf_condition_provider_measures.csv")) - -# Define dictionaries with tidy names and mappings for measures -pf_measures_name_dict <- list( - consultation_service = "Consultation Service", - pharmacy_first_service = "Pharmacy First Consultation", - combined_pf_service = "Pharmacy First Consultations (Combined)", - acute_otitis_media = "Acute Otitis Media", - herpes_zoster = "Herpes Zoster", - acute_sinusitis = "Acute Sinusitis", - impetigo = "Impetigo", - infected_insect_bite = "Infected Insect Bite", - acute_pharyngitis = "Acute Pharyngitis", - uncomplicated_urinary_tract_infection = "UTI" -) - -pf_measures_name_mapping <- list( - consultation_service = "clinical_service", - pharmacy_first_service = "clinical_service", - combined_pf_service = "pharmacy_first_services", - acute_otitis_media = "clinical_condition", - herpes_zoster = "clinical_condition", - acute_sinusitis = "clinical_condition", - impetigo = "clinical_condition", - infected_insect_bite = "clinical_condition", - acute_pharyngitis = "clinical_condition", - uncomplicated_urinary_tract_infection = "clinical_condition" -) - -pf_measures_groupby_dict <- list( - age_band = "Age band", - sex = "Sex", - imd = "IMD", - region = "Region", - ethnicity = "Ethnicity" -) - -df_measures <- tidy_measures( - data = df_measures, - pf_measures_name_dict = pf_measures_name_dict, - pf_measures_name_mapping = pf_measures_name_mapping, - pf_measures_groupby_dict = pf_measures_groupby_dict -) - -df_measures$ethnicity <- factor( - df_measures$ethnicity, - levels = c( - "White", "Mixed", "Asian or Asian British", - "Black or Black British", "Chinese or Other Ethnic Groups", - "Missing" - ), - ordered = TRUE -) - -df_measures$age_band <- factor( - df_measures$age_band, - levels = c( - "0-19", "20-39", "40-59", - "60-79", "80+", - "Missing" - ), - ordered = TRUE -) - -df_measures$region <- factor( - df_measures$region, - levels = c( - "East", "East Midlands", "London", - "North East", "North West", "South East", - "South West", "West Midlands", "Yorkshire and The Humber", - "Missing" - ), - ordered = TRUE -) - -df_measures <- df_measures %>% - mutate(sex = factor(sex, levels = c("female", "male"), labels = c("Female", "Male"))) - -df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" - -gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey") -region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey") -ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey") -sex_palette <- c("red", "blue") +# Load functions +source(here("lib", "functions", "tidy_measures.R")) +source(here("lib", "functions", "plot_measures.R")) + +# Load validation data: +# - df_bsa_medication_validation: date, pharmacy_advanced_service, bnf_paragraph, count +# - df_bsa_consultation_validation: date, consultation_type, source, count_method, count +source(here("lib", "functions", "load_validation_data.R")) + +# Load opensafely ouputs: +# - df_measures: measure, interval_start, interval_end, ratio numerator, denominator, age_band, sex,imd, region, ethnicity +# - df_descriptive_stats: measure, interval_start, interval_end, ratio numerator, denominator +# - df_pfmed: measure, interval_start, interval_end, ratio, numerator, denominator, dmd_code +# - df_condition_provider: measure, interval_start, interval_end, ratio, numerator, denominator, pf_status, imd +source(here("lib", "functions", "load_opensafely_outputs.R")) ``` # Background @@ -310,35 +219,6 @@ The [Pregnancy Codelist](https://www.opencodelists.org/codelist/nhsd-primary-car We used the [Ethnicity Codelist](https://www.opencodelists.org/codelist/opensafely/ethnicity-snomed-0removed/2e641f61/) identify ethnicity in Electronic Health Records. To ensure comprehensive ethnicity data, we supplemented missing ethnicity values with data from the Secondary Uses Service (SUS). -```{r echo=FALSE} -ethnicity_table <- data.frame( - Code = c(1, 2, 3, 4, 5), - Ethnicity = c( - "White", - "Mixed", - "Asian or Asian British", - "Black or Black British", - "Chinese or Other Ethnic Groups" - ) -) - -# ethnicity_table %>% -# gt() %>% -# tab_header( -# title = "Table 2. Ethnic Group Codes", -# subtitle = "Codes representing different ethnic groups in the study" -# ) %>% -# cols_label( -# Code = "Code", -# `Ethnicity` = "Ethnic Group" -# ) %>% -# tab_options( -# table.font.size = "medium", -# heading.title.font.size = "large", -# heading.subtitle.font.size = "small" -# ) -``` - # Results ### Total population @@ -772,226 +652,216 @@ plot_measures( ) + scale_color_manual(values = ethnicity_palette) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE} -# Multiply by 0.4 to get 40% of data -df_bsa_validation <- df_bsa_validation %>% - mutate(count_40pct = round(as.numeric(count * .4), digits = 0)) - # OpenSAFELY data for clinical conditions into a tidy df -df_opensafely <- df_measures %>% +df_opensafely_validation <- df_measures %>% filter(measure_desc == "clinical_condition") %>% - filter(interval_start >= as.Date("2024-02-01") & interval_start <= as.Date("2024-07-30")) %>% + # filter(interval_start >= as.Date("2024-02-01") & interval_start <= as.Date("2024-07-30")) %>% filter(is.na(group_by)) %>% - select(date = interval_start, consultation_type = measure, count = numerator) - -# Add a new column to each data frame to identify the source -df_opensafely <- df_opensafely %>% - mutate(source = "OS") -df_bsa_validation <- df_bsa_validation %>% - mutate(source = "BSA") - -# Drop the original 'count' column from the BSA data to allow for easy consistent grouping by 'count' -df_validation_condition_counts <- df_bsa_validation %>% - select(-count) %>% - rename(count = count_40pct) -# Combining rows from OS and BSA dataframes -df_validation_condition_counts <- bind_rows(df_opensafely, df_validation_condition_counts) - -# format both sources of data as MM-YYYY -df_validation_condition_counts <- df_validation_condition_counts %>% - mutate(month = format(as.Date(date), "%m-%Y")) - -# Pivot the data so that we get two sub columns per month for each source (OS and BSA) -df_pivoted <- df_validation_condition_counts %>% - pivot_wider(names_from = c(month, source), values_from = count) - -# Changing names to make column naming the same to ensure grouping -df_pivoted <- df_pivoted %>% + select(date = interval_start, consultation_type = measure, count = numerator) %>% mutate( - consultation_type = recode(consultation_type, - "sinusitis" = "Acute Sinusitis", - "infected_insect_bites" = "Infected Insect Bite", - "uncomplicated_uti" = "UTI", - "acute_otitis_media" = "Acute Otitis Media", - "acute_sore_throat" = "Acute Pharyngitis", - "shingles" = "Herpes Zoster", - "impetigo" = "Impetigo", - ) - ) -# Removing date column as this will prevent grouping (date is already pivot columns) -df_pivoted <- df_pivoted %>% - select(-date) + source = "opensafely", + count_method = "opensafely_tpp" + ) |> + filter(date >= "2024-01-01") %>% + relocate(date, consultation_type, source, count_method, count) -# Group by consultation type and summarise to get rid of NAs and multiple consultation_types of same name -df_pivoted <- df_pivoted %>% - group_by(consultation_type) %>% - summarise(across(everything(), sum, na.rm = TRUE), .groups = "drop") - -# Now create the gt table -tab_pf_conditions_validation <- df_pivoted %>% - gt() %>% - tab_header( - title = "Validation Data of Pharmacy First Clinical Condition Counts", - subtitle = "Timeframe: 1st Feb 2024 to 31st July 2024" - ) %>% - cols_label( - consultation_type = "Clinical Condition", - `02-2024_OS` = "OS", - `02-2024_BSA` = "BSA", - `03-2024_OS` = "OS", - `03-2024_BSA` = "BSA", - `04-2024_OS` = "OS", - `04-2024_BSA` = "BSA", - `05-2024_OS` = "OS", - `05-2024_BSA` = "BSA", - `06-2024_OS` = "OS", - `06-2024_BSA` = "BSA", - `07-2024_OS` = "OS", - `07-2024_BSA` = "BSA" - ) %>% - tab_spanner( - label = "February 2024", - columns = c(`02-2024_OS`, `02-2024_BSA`) - ) %>% - tab_spanner( - label = "March 2024", - columns = c(`03-2024_OS`, `03-2024_BSA`) - ) %>% - tab_spanner( - label = "April 2024", - columns = c(`04-2024_OS`, `04-2024_BSA`) - ) %>% - tab_spanner( - label = "May 2024", - columns = c(`05-2024_OS`, `05-2024_BSA`) - ) %>% - tab_spanner( - label = "June 2024", - columns = c(`06-2024_OS`, `06-2024_BSA`) - ) %>% - tab_spanner( - label = "July 2024", - columns = c(`07-2024_OS`, `07-2024_BSA`) - ) %>% - tab_footnote( - footnote = "OS - OpenSAFELY data, BSA - Validation data (NHS BSA)" - ) %>% - fmt_number( - columns = everything(), - decimals = 0 - ) - -tab_pf_conditions_validation -``` - -```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -df_long <- df_pivoted %>% pivot_longer( - cols = c("02-2024_OS", "02-2024_BSA", "03-2024_OS", "03-2024_BSA", "04-2024_OS", "04-2024_BSA", "05-2024_OS", "05-2024_BSA", "06-2024_OS", "06-2024_BSA", "07-2024_OS", "07-2024_BSA"), - names_to = c("month", "source"), - names_sep = "_", - values_to = "count" -) -# Changing format of date to use label_date_short to keep dates consistent for figures -df_long$month <- as.Date(paste0("01-", df_long$month), format = "%d-%m-%Y") +# Combining rows from OS and BSA validation dataframes +df_validation_condition_counts <- bind_rows(df_opensafely_validation, df_bsa_consultation_validation) # Line graph comparing clinical condition counts of BSA and OS data -validation_total_counts_figure <- ggplot(df_long, aes(x = month, y = count, color = consultation_type, group = consultation_type)) + - geom_point() + +plot_validation_condition_count <- df_validation_condition_counts %>% + filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% + mutate(source = factor(source, + levels = c("opensafely", "nhs_bsa"), + labels = c("OpenSAFELY-TPP", "NHS BSA (40%)") + )) %>% + ggplot( + aes( + x = date, + y = count, + shape = consultation_type, + color = consultation_type, + fill = consultation_type, + group = consultation_type + ) + ) + + geom_point(size = 2.5) + geom_line(size = 0.5) + facet_wrap(~source, scales = "free_y") + - labs( - title = "Clinical Conditions Count by Month (NHS BSA vs OpenSAFELY Data)", - x = "Month", y = "Count", color = "Clinical Condition" + scale_x_date( + labels = scales::label_date_short() + ) + + labs(x = NULL, y = "Count", colour = NULL, shape = NULL, fill = NULL) + + scale_color_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_fill_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_shape_manual( + values = c( + "Acute Sinusitis" = 15, + "Infected Insect Bite" = 19, + "UTI" = 4, + "Acute Otitis Media" = 23, + "Acute Pharyngitis" = 3, + "Herpes Zoster" = 17, + "Impetigo" = 8 + ) ) + theme( - plot.title = element_text(hjust = 0.5) + text = element_text(size = 14) ) + - scale_x_date( - labels = scales::label_date_short() - ) - -validation_total_counts_figure -``` - -```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -df_consultation_totals <- df_long %>% - group_by(month, source) %>% - summarise(count = sum(count), - .groups = 'drop') - -# Line graph comparing clinical condition counts of BSA and OS data -validation_total_consultation_counts_figure <- ggplot(df_consultation_totals, aes(x = month, y = count)) + - geom_point() + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_y_continuous(labels = scales::number) + +# Another plot visualising the percentage +plot_validation_condition_pct <- df_validation_condition_counts %>% + filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% + pivot_wider(names_from = c(source, count_method), values_from = count) %>% + mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>% + ggplot( + aes( + x = date, + y = opensafely_opensafely_tpp / nhs_bsa_count_40pct, + shape = consultation_type, + color = consultation_type, + fill = consultation_type, + group = consultation_type + ) + ) + + geom_point(size = 2.5) + geom_line(size = 0.5) + facet_wrap(~source, scales = "free_y") + - labs( - title = "Consultations by Month (NHS BSA vs OpenSAFELY Data)", - x = "Month", y = "Count" + scale_x_date( + labels = scales::label_date_short() + ) + + labs(x = NULL, y = "Percent", colour = NULL, shape = NULL, fill = NULL) + + scale_color_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_fill_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_shape_manual( + values = c( + "Acute Sinusitis" = 15, + "Infected Insect Bite" = 19, + "UTI" = 4, + "Acute Otitis Media" = 23, + "Acute Pharyngitis" = 3, + "Herpes Zoster" = 17, + "Impetigo" = 8 + ) ) + theme( - plot.title = element_text(hjust = 0.5), - axis.title.x = element_blank() + text = element_text(size = 14) ) + - scale_x_date( - labels = scales::label_date_short() - ) + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_y_continuous(labels = scales::percent) -validation_total_consultation_counts_figure +# Combining the plots with patchwork +plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_validation_condition_pct) + + plot_annotation(tag_levels = "A") + + plot_layout(guides = "collect", widths = c(2, 1)) & + theme( + legend.position = "bottom", + text = element_text(size = 15), + strip.background = element_rect(size = 0), + strip.text.x = element_text(size = 13, face = "bold") + ) +ggsave( + here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"), + plot_validation_condition_count_pct, + width = 15, height = 6 +) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} # Line graph comparing clinical condition counts of BSA and OS data -df_descriptive_stats <- df_descriptive_stats %>% +plot_pf_descriptive_stats <- df_descriptive_stats %>% mutate( - measure = recode(measure, - "pf_with_pfmed" = "PF Med", - "pf_with_pfcondition" = "PF Condition", - "pf_with_pfmed_and_pfcondition" = "PF Med & PF Condition", + measure = factor(measure, + levels = c("pf_with_pfmed", "pf_with_pfcondition", "pf_with_pfmed_and_pfcondition"), + labels = c("PF Med", "PF Condition", "PF Med & PF Condition") ) - ) - -descriptive_stats_figure <- ggplot(df_descriptive_stats, aes(x = interval_start, y = ratio, color = measure, group = measure)) + - geom_point() + + ) |> + ggplot(aes( + x = interval_start, + y = ratio, + colour = measure, + shape = measure, + )) + + geom_point(size = 2.5) + geom_line(size = 0.5) + - # facet_wrap(~ measure, scales = "free_y") + labs( - title = "Breakdown of PF consultations with linked PF conditions and medications", - color = "PF consultation with:" - ) + - theme( - plot.title = element_text(hjust = 0.5) + x = NULL, + y = NULL, + shape = "PF consultation with:", + colour = "PF consultation with:" ) + scale_x_date( - labels = scales::label_date_short() + labels = scales::label_date_short(), breaks = "month" ) + scale_y_continuous( labels = scales::percent, ) + theme( - axis.title.x = element_blank(), - axis.title.y = element_blank() - ) + text = element_text(size = 14) + ) + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_colour_brewer(palette = "Dark2") -descriptive_stats_figure + +plot_pf_descriptive_stats + +ggsave( + here("released_output", "results", "figures", "plot_pf_descriptive_stats.png"), + plot_pf_descriptive_stats, + width = 10, height = 6 +) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} - # Validation of pharmacy first medication counts figure # OS data - waiting on released output df_bsa_total_meds <- df_bsa_meds %>% group_by(date) %>% - summarise(total_meds = sum(count) * 0.4, - .groups = 'drop') %>% - mutate(source = "BSA") %>% - filter(date <= "2024-07-01") + summarise( + total_meds = sum(count) * 0.4, + .groups = "drop" + ) %>% + mutate(source = "BSA") %>% + filter(date <= "2024-07-01") df_pfmed_total <- df_pfmed %>% rename(date = interval_start) %>% - group_by(date) %>% - summarise(total_meds = sum(numerator), - .groups = 'drop') %>% + group_by(date) %>% + summarise( + total_meds = sum(numerator), + .groups = "drop" + ) %>% mutate(source = "OS") %>% filter(date >= "2024-02-01") @@ -1016,12 +886,14 @@ fig_validation_med_counts <- ggplot(df_validation_med_counts, aes(x = date, y = fig_validation_med_counts ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -# GP vs PF provider graph +# GP vs PF provider graph -df_condition_provider_grouped <- df_condition_provider %>% - group_by(measure, interval_start, pf_status) %>% - summarise(count = sum(numerator), - .groups = 'drop') %>% +df_condition_provider_grouped <- df_condition_provider %>% + group_by(measure, interval_start, pf_status) %>% + summarise( + count = sum(numerator), + .groups = "drop" + ) %>% mutate( measure = recode(measure, "count_acute_sinusitis_total" = "Acute Sinusitis", @@ -1056,7 +928,6 @@ figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start, ) figure_gp_vs_pf - ``` # References From 84a498a1019423e1d325431831615a7546254143 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Mon, 2 Dec 2024 13:24:17 +0000 Subject: [PATCH 4/7] Update meds validation plot --- reports/pharmacy_first_report.Rmd | 131 ++++++++++++++++++++++++------ 1 file changed, 105 insertions(+), 26 deletions(-) diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index f53a0a0..75e36ba 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -785,6 +785,8 @@ plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_v strip.text.x = element_text(size = 13, face = "bold") ) +plot_validation_condition_count_pct + ggsave( here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"), plot_validation_condition_count_pct, @@ -842,48 +844,125 @@ ggsave( ) ``` -```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} +```{r, message=FALSE, warning=FALSE, echo = FALSE} # Validation of pharmacy first medication counts figure # OS data - waiting on released output -df_bsa_total_meds <- df_bsa_meds %>% +df_bsa_medication_validation_sum <- df_bsa_medication_validation %>% group_by(date) %>% - summarise( - total_meds = sum(count) * 0.4, - .groups = "drop" - ) %>% - mutate(source = "BSA") %>% - filter(date <= "2024-07-01") - -df_pfmed_total <- df_pfmed %>% + summarise(count = sum(count) * 0.4) %>% + mutate( + source = "nhs_bsa", + count_method = "count_40pct" + ) +range(df_pfmed$interval_start) +df_opensafely_pfmed_sum <- df_pfmed %>% rename(date = interval_start) %>% group_by(date) %>% - summarise( - total_meds = sum(numerator), - .groups = "drop" - ) %>% - mutate(source = "OS") %>% - filter(date >= "2024-02-01") + summarise(count = sum(numerator)) %>% + mutate( + source = "opensafely_tpp", + count_method = "opensafely_tpp" + ) -df_validation_med_counts <- bind_rows(df_pfmed_total, df_bsa_total_meds) +df_validation_med_counts <- bind_rows(df_opensafely_pfmed_sum, df_bsa_medication_validation_sum) |> + filter(date >= "2024-01-01" & date <= "2024-07-01") -fig_validation_med_counts <- ggplot(df_validation_med_counts, aes(x = date, y = total_meds)) + - geom_point() + +plot_validation_med_count <- df_validation_med_counts |> + mutate( + source = factor(source, levels = c("opensafely_tpp", "nhs_bsa"), labels = c("OpenSAFELY-TPP", "NHS BSA")), + count_method = factor(count_method, levels = c("opensafely_tpp", "count_40pct"), labels = c("OpenSAFELY-TPP", "NHS BSA (40%)")) + ) |> + ggplot(aes( + x = date, + y = count, + colour = count_method, + shape = count_method)) + + geom_point(size = 2) + + facet_wrap(~count_method, scales = "free_y") + geom_line(size = 0.5) + - facet_wrap(~source, scales = "free_y") + labs( - title = "Pharmacy First Medications by Month (NHS BSA vs OpenSAFELY Data)", - x = "Month", y = "Count" + x = NULL, + y = "Count", + colour = NULL, + shape = NULL, + ) + + scale_x_date( + labels = scales::label_date_short(), breaks = "month" ) + theme( - plot.title = element_text(hjust = 0.5), - axis.title.x = element_blank() + text = element_text(size = 14) + ) + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_colour_brewer(palette = "Dark2") + +# Another plot visualising the percentage +plot_validation_med_pct <- df_validation_med_counts %>% + filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% + pivot_wider(names_from = c(source, count_method), values_from = count) %>% + mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>% + ggplot( + aes( + x = date, + y = opensafely_tpp_opensafely_tpp / nhs_bsa_count_40pct, + shape = source, + color = source, + fill = source, + group = source + ) ) + + geom_point(size = 2.5) + + geom_line(size = 0.5) + + facet_wrap(~source, scales = "free_y") + scale_x_date( - labels = scales::label_date_short() + labels = scales::label_date_short(), breaks = "month" + ) + + labs(x = NULL, y = "Percent", colour = NULL, shape = NULL, fill = NULL) + + scale_color_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_fill_viridis_d( + option = "plasma", + end = 0.9 + ) + + scale_shape_manual( + values = c("Percentage of NHS BSA (40%) in OpenSAFELY" = 15) + ) + + theme( + text = element_text(size = 14) + ) + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_y_continuous(labels = scales::percent) + +# Combining the plots with patchwork +plot_validation_medication_count_pct <- (plot_validation_med_count + plot_validation_med_pct) + + plot_annotation(tag_levels = "A") + + plot_layout(guides = "collect", widths = c(2, 1)) & + theme( + legend.position = "bottom", + text = element_text(size = 15), + strip.background = element_rect(size = 0), + strip.text.x = element_text(size = 13, face = "bold") ) -fig_validation_med_counts +plot_validation_medication_count_pct + +ggsave( + here("released_output", "results", "figures", "plot_validation_medication_count_pct.png"), + plot_validation_medication_count_pct, + width = 15, height = 6 +) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} # GP vs PF provider graph From da8ee79dc411913173fa7b9e4a79e2c644aad9a0 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Mon, 2 Dec 2024 16:14:35 +0000 Subject: [PATCH 5/7] Improve consultation and condition provider plots --- reports/pharmacy_first_report.Rmd | 208 +++++++++++++++++++++++++----- 1 file changed, 174 insertions(+), 34 deletions(-) diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index 75e36ba..dd801de 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -227,21 +227,121 @@ To ensure comprehensive ethnicity data, we supplemented missing ethnicity values # Select measures and breakdown df_measures_selected <- df_measures %>% filter(measure_desc == "clinical_service") %>% - filter(is.na(group_by)) + filter(is.na(group_by)) |> + select(measure, interval_start, numerator) |> + mutate(measure = factor(measure, + levels = c("Consultation Service", "Pharmacy First Consultation"), + labels = c( + "Consultation Service for minor illness (1577041000000109)", + "Pharmacy First service (983341000000102)" + ) + )) -# Create visualisation -plot_measures( - df_measures_selected, - select_value = numerator, - select_interval_date = interval_end, - colour_var = NULL, - guide_nrow = 1, - facet_wrap = TRUE, - facet_var = measure, - title = "Pharmacy First Consultations", - y_label = "Number of codes for FP consultations", + + + +plot_pf_individual_consultations_count <- df_measures_selected |> + select(measure, interval_start, numerator) |> + ggplot(aes( + x = interval_start, + y = numerator, + colour = measure, + shape = measure, + )) + + geom_point(size = 2) + + geom_line(alpha = .3) + + labs( + title = NULL, + x = NULL, + y = "Total count", + colour = NULL, + shape = NULL + ) + + scale_y_continuous( + labels = scales::label_number(), + ) + + theme(legend.position = "bottom") + + guides( + colour = guide_legend(ncol = 2), + shape = guide_legend(ncol = 2) + ) + + scale_x_date( + date_breaks = "1 month", + labels = scales::label_date_short() + ) + + geom_vline( + xintercept = lubridate::as_date(c( + "2024-01-31" + )), + linetype = "dotted", + colour = "orange", + size = .7 + ) + + scale_colour_viridis_d(end = .75) + + theme( + text = element_text(size = 14) + ) + + +ggsave( + here("released_output", "results", "figures", "plot_pf_individual_consultations_count.png"), + plot_pf_individual_consultations_count, + width = 10, height = 6 ) -``` + + +plot_pf_grouped_consultations_count <- df_measures_selected |> + group_by(interval_start) |> + mutate( + pf_consultation_total = sum(numerator, na.rm = TRUE), + data_desc = "Pharmacy First Consultation") |> + ggplot(aes( + x = interval_start, + y = pf_consultation_total, + colour = data_desc, + shape = data_desc, + )) + + geom_point(size = 2) + + geom_line(alpha = .3) + + labs( + title = NULL, + x = NULL, + y = "Total count", + colour = NULL, + shape = NULL + ) + + scale_y_continuous( + labels = scales::label_number(), + ) + + theme(legend.position = "bottom") + + guides( + colour = guide_legend(ncol = 2), + shape = guide_legend(ncol = 2) + ) + + scale_x_date( + date_breaks = "1 month", + labels = scales::label_date_short() + ) + + geom_vline( + xintercept = lubridate::as_date(c( + "2024-01-31" + )), + linetype = "dotted", + colour = "orange", + size = .7 + ) + + scale_colour_viridis_d(end = .75) + + theme( + text = element_text(size = 14) + ) + + +ggsave( + here("released_output", "results", "figures", "plot_pf_consultation_count.png"), + plot_pf_consultation_count, + width = 10, height = 6 +) + ``` ```{r, message=FALSE, warning=FALSE, fig.height=10, fig.width=8} # Select measures and breakdown @@ -814,8 +914,8 @@ plot_pf_descriptive_stats <- df_descriptive_stats %>% labs( x = NULL, y = NULL, - shape = "PF consultation with:", - colour = "PF consultation with:" + shape = "PF consultation linked to:", + colour = "PF consultation linked to:" ) + scale_x_date( labels = scales::label_date_short(), breaks = "month" @@ -877,7 +977,8 @@ plot_validation_med_count <- df_validation_med_counts |> x = date, y = count, colour = count_method, - shape = count_method)) + + shape = count_method + )) + geom_point(size = 2) + facet_wrap(~count_method, scales = "free_y") + geom_line(size = 0.5) + @@ -970,32 +1071,49 @@ ggsave( df_condition_provider_grouped <- df_condition_provider %>% group_by(measure, interval_start, pf_status) %>% summarise( - count = sum(numerator), - .groups = "drop" + count = sum(numerator) ) %>% mutate( - measure = recode(measure, - "count_acute_sinusitis_total" = "Acute Sinusitis", - "count_infected_insect_bite_total" = "Infected Insect Bite", - "count_uncomplicated_urinary_tract_infection_total" = "Uncomplicated UTI", - "count_acute_otitis_media_total" = "Acute Otitis Media", - "count_acute_pharyngitis_total" = "Acute Pharyngitis", - "count_herpes_zoster_total" = "Herpes Zoster", - "count_impetigo_total" = "Impetigo", + measure = factor(measure, + levels = c( + "count_acute_sinusitis_total", + "count_infected_insect_bite_total", + "count_uncomplicated_urinary_tract_infection_total", + "count_acute_otitis_media_total", + "count_acute_pharyngitis_total", + "count_herpes_zoster_total", + "count_impetigo_total" + ), + labels = c( + "Acute Sinusitis", + "Infected Insect Bite", + "UTI", + "Acute Otitis Media", + "Acute Pharyngitis", + "Herpes Zoster", + "Impetigo" + ) ), - pf_status = case_when( - pf_status == TRUE ~ "PF", - pf_status == FALSE ~ "GP" + pf_status = factor(pf_status, + levels = c(TRUE, FALSE), + labels = c("Linked to Pharmacy First consultation", "Not linked to Pharmacy First consultation") ) ) -figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start, y = count, color = pf_status)) + - geom_point() + +plot_pf_condition_provider_count <- ggplot( + df_condition_provider_grouped, + aes( + x = interval_start, + y = count, + colour = pf_status, + shape = pf_status + ) +) + + geom_point(size = 1.5) + geom_line(size = 0.5) + facet_wrap(~measure, scales = "free_y") + labs( - title = "Count of clinical events", - x = "Month", y = "Count", color = "Provider:" + x = NULL, y = "Count", color = NULL, shape = NULL ) + theme( plot.title = element_text(hjust = 0.5), @@ -1004,9 +1122,31 @@ figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start, ) + scale_x_date( labels = scales::label_date_short() + ) + + geom_vline( + xintercept = lubridate::as_date("2024-02-01"), + linetype = "dotted", + colour = "orange", + linewidth = .7 + ) + + scale_color_viridis_d( + option = "plasma", + end = 0.75 + ) + + theme( + legend.position = "bottom", + text = element_text(size = 14), + strip.background = element_rect(size = 0), + # strip.text.x = element_text(size = 13, face = "bold") ) -figure_gp_vs_pf +plot_pf_condition_provider_count + +ggsave( + here("released_output", "results", "figures", "plot_pf_condition_provider_count.png"), + plot_pf_condition_provider_count, + width = 13, height = 8 +) ``` # References From 2aee9aa72141f2e830151083533560212cb03695 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Tue, 3 Dec 2024 10:57:51 +0000 Subject: [PATCH 6/7] Fix `.Rmd` and run `styler::style_dir()` --- lib/functions/load_opensafely_outputs.R | 2 +- lib/functions/plot_measures.R | 2 +- reports/pharmacy_first_report.Rmd | 14 ++++++-------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/functions/load_opensafely_outputs.R b/lib/functions/load_opensafely_outputs.R index 46204bd..432ce74 100644 --- a/lib/functions/load_opensafely_outputs.R +++ b/lib/functions/load_opensafely_outputs.R @@ -85,4 +85,4 @@ df_measures <- df_measures %>% labels = c("Female", "Male") )) -df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" \ No newline at end of file +df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" diff --git a/lib/functions/plot_measures.R b/lib/functions/plot_measures.R index 045ebcb..4171c0c 100644 --- a/lib/functions/plot_measures.R +++ b/lib/functions/plot_measures.R @@ -102,4 +102,4 @@ plot_measures <- function( gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey") region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey") ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey") -sex_palette <- c("red", "blue") \ No newline at end of file +sex_palette <- c("red", "blue") diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index dd801de..4645590 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -237,9 +237,6 @@ df_measures_selected <- df_measures %>% ) )) - - - plot_pf_individual_consultations_count <- df_measures_selected |> select(measure, interval_start, numerator) |> ggplot(aes( @@ -294,8 +291,9 @@ plot_pf_grouped_consultations_count <- df_measures_selected |> group_by(interval_start) |> mutate( pf_consultation_total = sum(numerator, na.rm = TRUE), - data_desc = "Pharmacy First Consultation") |> - ggplot(aes( + data_desc = "Pharmacy First Consultation" + ) |> + ggplot(aes( x = interval_start, y = pf_consultation_total, colour = data_desc, @@ -337,11 +335,11 @@ plot_pf_grouped_consultations_count <- df_measures_selected |> ggsave( - here("released_output", "results", "figures", "plot_pf_consultation_count.png"), - plot_pf_consultation_count, + here("released_output", "results", "figures", "plot_pf_grouped_consultations_count.png"), + plot_pf_grouped_consultations_count, width = 10, height = 6 ) - ``` +``` ```{r, message=FALSE, warning=FALSE, fig.height=10, fig.width=8} # Select measures and breakdown From 0700e55367a8c7c3a8a28d7eb303d9b2acd1c5a3 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Tue, 3 Dec 2024 14:43:11 +0000 Subject: [PATCH 7/7] Change `plot_*` to `fig_*` prefix for figure objects and files --- reports/pharmacy_first_report.Rmd | 55 ++++++++++++++++--------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index 4645590..da3e951 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -223,7 +223,7 @@ To ensure comprehensive ethnicity data, we supplemented missing ethnicity values ### Total population -```{r, message=FALSE, warning=FALSE, fig.height=3, fig.width=8} +```{r, message=FALSE, warning=FALSE, fig.height=6, fig.width=10} # Select measures and breakdown df_measures_selected <- df_measures %>% filter(measure_desc == "clinical_service") %>% @@ -237,7 +237,7 @@ df_measures_selected <- df_measures %>% ) )) -plot_pf_individual_consultations_count <- df_measures_selected |> +fig_pf_individual_consultations_count <- df_measures_selected |> select(measure, interval_start, numerator) |> ggplot(aes( x = interval_start, @@ -281,13 +281,14 @@ plot_pf_individual_consultations_count <- df_measures_selected |> ggsave( - here("released_output", "results", "figures", "plot_pf_individual_consultations_count.png"), - plot_pf_individual_consultations_count, + here("released_output", "results", "figures", "fig_pf_individual_consultations_count.png"), + fig_pf_individual_consultations_count, width = 10, height = 6 ) +fig_pf_individual_consultations_count -plot_pf_grouped_consultations_count <- df_measures_selected |> +fig_pf_grouped_consultations_count <- df_measures_selected |> group_by(interval_start) |> mutate( pf_consultation_total = sum(numerator, na.rm = TRUE), @@ -335,8 +336,8 @@ plot_pf_grouped_consultations_count <- df_measures_selected |> ggsave( - here("released_output", "results", "figures", "plot_pf_grouped_consultations_count.png"), - plot_pf_grouped_consultations_count, + here("released_output", "results", "figures", "fig_pf_grouped_consultations_count.png"), + fig_pf_grouped_consultations_count, width = 10, height = 6 ) ``` @@ -767,7 +768,7 @@ df_opensafely_validation <- df_measures %>% df_validation_condition_counts <- bind_rows(df_opensafely_validation, df_bsa_consultation_validation) # Line graph comparing clinical condition counts of BSA and OS data -plot_validation_condition_count <- df_validation_condition_counts %>% +fig_validation_condition_count <- df_validation_condition_counts %>% filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% mutate(source = factor(source, levels = c("opensafely", "nhs_bsa"), @@ -821,7 +822,7 @@ plot_validation_condition_count <- df_validation_condition_counts %>% scale_y_continuous(labels = scales::number) # Another plot visualising the percentage -plot_validation_condition_pct <- df_validation_condition_counts %>% +fig_validation_condition_pct <- df_validation_condition_counts %>% filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% pivot_wider(names_from = c(source, count_method), values_from = count) %>% mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>% @@ -873,7 +874,7 @@ plot_validation_condition_pct <- df_validation_condition_counts %>% scale_y_continuous(labels = scales::percent) # Combining the plots with patchwork -plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_validation_condition_pct) + +fig_validation_condition_count_pct <- (fig_validation_condition_count + fig_validation_condition_pct) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect", widths = c(2, 1)) & theme( @@ -883,18 +884,18 @@ plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_v strip.text.x = element_text(size = 13, face = "bold") ) -plot_validation_condition_count_pct +fig_validation_condition_count_pct ggsave( - here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"), - plot_validation_condition_count_pct, + here("released_output", "results", "figures", "fig_validation_condition_count_pct.png"), + fig_validation_condition_count_pct, width = 15, height = 6 ) ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} # Line graph comparing clinical condition counts of BSA and OS data -plot_pf_descriptive_stats <- df_descriptive_stats %>% +fig_pf_descriptive_stats <- df_descriptive_stats %>% mutate( measure = factor(measure, levels = c("pf_with_pfmed", "pf_with_pfcondition", "pf_with_pfmed_and_pfcondition"), @@ -933,11 +934,11 @@ plot_pf_descriptive_stats <- df_descriptive_stats %>% scale_colour_brewer(palette = "Dark2") -plot_pf_descriptive_stats +fig_pf_descriptive_stats ggsave( - here("released_output", "results", "figures", "plot_pf_descriptive_stats.png"), - plot_pf_descriptive_stats, + here("released_output", "results", "figures", "fig_pf_descriptive_stats.png"), + fig_pf_descriptive_stats, width = 10, height = 6 ) ``` @@ -966,7 +967,7 @@ df_opensafely_pfmed_sum <- df_pfmed %>% df_validation_med_counts <- bind_rows(df_opensafely_pfmed_sum, df_bsa_medication_validation_sum) |> filter(date >= "2024-01-01" & date <= "2024-07-01") -plot_validation_med_count <- df_validation_med_counts |> +fig_validation_med_count <- df_validation_med_counts |> mutate( source = factor(source, levels = c("opensafely_tpp", "nhs_bsa"), labels = c("OpenSAFELY-TPP", "NHS BSA")), count_method = factor(count_method, levels = c("opensafely_tpp", "count_40pct"), labels = c("OpenSAFELY-TPP", "NHS BSA (40%)")) @@ -1001,7 +1002,7 @@ plot_validation_med_count <- df_validation_med_counts |> scale_colour_brewer(palette = "Dark2") # Another plot visualising the percentage -plot_validation_med_pct <- df_validation_med_counts %>% +fig_validation_med_pct <- df_validation_med_counts %>% filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>% pivot_wider(names_from = c(source, count_method), values_from = count) %>% mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>% @@ -1045,7 +1046,7 @@ plot_validation_med_pct <- df_validation_med_counts %>% scale_y_continuous(labels = scales::percent) # Combining the plots with patchwork -plot_validation_medication_count_pct <- (plot_validation_med_count + plot_validation_med_pct) + +fig_validation_medication_count_pct <- (fig_validation_med_count + fig_validation_med_pct) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect", widths = c(2, 1)) & theme( @@ -1055,11 +1056,11 @@ plot_validation_medication_count_pct <- (plot_validation_med_count + plot_valida strip.text.x = element_text(size = 13, face = "bold") ) -plot_validation_medication_count_pct +fig_validation_medication_count_pct ggsave( - here("released_output", "results", "figures", "plot_validation_medication_count_pct.png"), - plot_validation_medication_count_pct, + here("released_output", "results", "figures", "fig_validation_medication_count_pct.png"), + fig_validation_medication_count_pct, width = 15, height = 6 ) ``` @@ -1098,7 +1099,7 @@ df_condition_provider_grouped <- df_condition_provider %>% ) ) -plot_pf_condition_provider_count <- ggplot( +fig_pf_condition_provider_count <- ggplot( df_condition_provider_grouped, aes( x = interval_start, @@ -1138,11 +1139,11 @@ plot_pf_condition_provider_count <- ggplot( # strip.text.x = element_text(size = 13, face = "bold") ) -plot_pf_condition_provider_count +fig_pf_condition_provider_count ggsave( - here("released_output", "results", "figures", "plot_pf_condition_provider_count.png"), - plot_pf_condition_provider_count, + here("released_output", "results", "figures", "fig_pf_condition_provider_count.png"), + fig_pf_condition_provider_count, width = 13, height = 8 ) ```