Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add manuscript figure comparing OpenSAFELY-TPP and NHS BSA data #113

Merged
merged 6 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions lib/functions/combine_os_nhsbsa_validation_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Load NHS BSA validation data and summarise total count by:
# 1. pf_consultation
# 2. pf_medication
df_bsa_validation_total <- df_bsa_validation |>
group_by(date, data_desc, count_method) |>
mutate(count_total = sum(count, na.rm = TRUE)) |>
select(-count_group, -count) |>
distinct() %>%
ungroup() %>%
filter(count_method == "count_40pct")

# Transform OpenSAFLEY data into same format as NHS BSA validation data
# 1. pf_consultation from:
# 1.1. clinical condition (not every PF consult. has a linked clin. condition)
df_os_consultation_from_condition_validation <- df_measures %>%
filter(measure_desc == "clinical_condition") %>%
filter(is.na(group_by)) %>%
select(
date = interval_start,
count_group = measure,
count = numerator
) %>%
mutate(
data_source = "opensafely",
data_desc = "pf_consultation",
count_desc = "consultation_type",
count_method = "opensafely_tpp",
) |>
filter(date >= "2024-02-01") %>%
relocate(
date, data_source, data_desc,
count_desc, count_group, count_method, count
)

# 1.1. consultation id (better description of total PF consult. in OS)
df_os_consultation_from_id_validation <- df_pf_consultations_total |>
mutate(
data_source = "opensafely",
data_desc = "pf_consultation",
count_desc = "consultation_id",
count_group = "consultation_id",
count_method = "opensafely_tpp"
) |>
select(
date = interval_start,
data_source, data_desc, count_desc, count_group, count_method,
count = pf_consultation_total
)

# 2. pf_medication from linked medication
df_os_medication_validation <- df_pfmed %>%
rename(date = interval_start) %>%
mutate(
count = numerator,
data_source = "opensafely",
data_desc = "pf_medication",
count_desc = "dmd_code",
count_method = "opensafely_tpp",
count_group = dmd_code,
) |>
filter(date >= "2024-02-01") %>%
select(
date, data_source, data_desc,
count_desc, count_group, count_method, count
)

df_os_validation_total <- bind_rows(
df_os_consultation_from_condition_validation,
df_os_consultation_from_id_validation,
df_os_medication_validation
) %>%
group_by(date, data_desc, count_desc, count_method) |>
mutate(count_total = sum(count, na.rm = TRUE)) |>
select(-count_group, -count) |>
distinct() %>%
ungroup()

# Combine NHS BSA and OpenSAFELY data
pf_validation <- bind_rows(df_bsa_validation_total, df_os_validation_total)
6 changes: 4 additions & 2 deletions lib/functions/load_opensafely_outputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
here("output", "measures", "pf_descriptive_stats_measures.csv")
)
df_pfmed <- read_csv(
here("output", "measures", "pf_medications_measures.csv")
here("output", "measures", "pf_medications_measures.csv"),
col_types = list(dmd_code = col_character())
)
df_condition_provider <- read_csv(
here("output", "measures", "pf_condition_provider_measures.csv")
Expand All @@ -22,7 +23,8 @@ if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
here("released_output", "measures", "pf_descriptive_stats_measures.csv")
)
df_pfmed <- read_csv(
here("released_output", "measures", "pf_medications_measures.csv")
here("released_output", "measures", "pf_medications_measures.csv"),
col_types = list(dmd_code = col_character())
)
df_condition_provider <- read_csv(
here("released_output", "measures", "pf_condition_provider_measures.csv")
Expand Down
35 changes: 27 additions & 8 deletions lib/functions/load_validation_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@ df_bsa_consultation_validation <- read_csv(
) %>%
rename(count_100pct = count) |>
mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>%
mutate(source = "nhs_bsa") |>
mutate(data_source = "nhs_bsa") |>
pivot_longer(
cols = c(count_100pct, count_40pct),
names_to = "count_method",
values_to = "count"
)

df_bsa_consultation_validation <- df_bsa_consultation_validation %>%
mutate(consultation_type = factor(consultation_type,
) |>
mutate(
data_desc = "pf_consultation",
count_desc = "consultation_type",
) |>
select(date, data_source, data_desc, count_desc, count_group = consultation_type, count_method, count) |>
mutate(count_group = factor(count_group,
levels = c(
"sinusitis",
"infected_insect_bites",
Expand All @@ -37,6 +40,22 @@ df_bsa_consultation_validation <- df_bsa_consultation_validation %>%
)
))

df_bsa_medication_validation <- read_csv(
here("lib", "validation", "data", "pf_medication_validation_data.csv")
)
df_bsa_medication_validation <- read_csv(here("lib", "validation", "data", "pf_medication_validation_data.csv")) %>%
rename(count_100pct = count) |>
mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>%
mutate(data_source = "nhs_bsa") |>
pivot_longer(
cols = c(count_100pct, count_40pct),
names_to = "count_method",
values_to = "count"
) |>
mutate(
data_desc = "pf_medication",
count_desc = "bnf_paragraph",
) |>
select(date, data_source, data_desc, count_desc, count_group = bnf_paragraph, count_method, count)

df_bsa_validation <- bind_rows(df_bsa_consultation_validation, df_bsa_medication_validation) %>%
filter(date >= "2024-02-01")

rm(df_bsa_consultation_validation, df_bsa_medication_validation)
7 changes: 7 additions & 0 deletions lib/validation/data/pf_consultation_validation_data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,10 @@ date,consultation_type,count
2024-09-01,shingles,4867
2024-09-01,sinusitis,15059
2024-09-01,uncomplicated_uti,56235
2024-10-01,acute_otitis_media,19971
2024-10-01,acute_sore_throat,64617
2024-10-01,impetigo,9394
2024-10-01,infected_insect_bites,14715
2024-10-01,shingles,5344
2024-10-01,sinusitis,22456
2024-10-01,uncomplicated_uti,62297
10 changes: 10 additions & 0 deletions lib/validation/data/pf_medication_validation_data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,13 @@ date,pharmacy_advanced_service,bnf_paragraph,count
2024-10-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,2349
2024-10-01,Pharmacy First Clinical Pathways,Tetracyclines,1182
2024-10-01,Pharmacy First Clinical Pathways,Urinary-tract infections,52648
2024-11-01,Pharmacy First Clinical Pathways,Antibacterial preparations,5131
2024-11-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,9477
2024-11-01,Pharmacy First Clinical Pathways,Herpesvirus infections,4335
2024-11-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,20
2024-11-01,Pharmacy First Clinical Pathways,Macrolides,8229
2024-11-01,Pharmacy First Clinical Pathways,Otitis externa,8651
2024-11-01,Pharmacy First Clinical Pathways,Penicillins,70362
2024-11-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,2426
2024-11-01,Pharmacy First Clinical Pathways,Tetracyclines,1492
2024-11-01,Pharmacy First Clinical Pathways,Urinary-tract infections,51574
129 changes: 123 additions & 6 deletions reports/create_results_manuscript.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ source(here("lib", "functions", "load_opensafely_outputs.R"))

```{r, message=FALSE, warning=FALSE}
# Create figure for total count of Pharmacy First consultations for each code (3 codes)
df_measures_selected <- df_measures %>%
df_pf_consultations <- df_measures %>%
filter(measure_desc == "clinical_service") %>%
filter(is.na(group_by)) |>
select(measure, interval_start, numerator) |>
Expand All @@ -57,16 +57,19 @@ df_measures_selected <- df_measures %>%
)
))

df_measures_selected <- df_measures_selected |>
df_pf_consultations_total <- df_pf_consultations |>
group_by(interval_start) |>
mutate(
pf_consultation_total = sum(numerator, na.rm = TRUE),
data_desc = "Pharmacy First Consultation"
) %>%
filter(interval_start >= "2024-02-01")
filter(interval_start >= "2024-02-01") %>%
select(interval_start, pf_consultation_total, data_desc) |>
ungroup() |>
distinct()

fig_pf_grouped_consultations_count <- plot_measures(
df_measures_selected,
df_pf_consultations_total,
select_value = pf_consultation_total,
select_interval_date = interval_start,
legend_position = "bottom",
Expand Down Expand Up @@ -102,7 +105,7 @@ ggsave(
### Results for Figure 1 description

```{r, message=FALSE, warning=FALSE}
df_results_pf_total_counts <- df_measures %>%
df_results_pf_total_counts <- df_measures %>%
filter(measure_desc == "clinical_service") %>%
filter(is.na(group_by)) |>
group_by(interval_start) |>
Expand All @@ -119,7 +122,6 @@ df_results_pf_total_counts %>%
filter(interval_start %in% c("2024-02-01", "2024-12-01", "2024-08-01", "2024-09-01", "2024-10-01"))
```


## OpenSAFELY Linkage

### Figure 2
Expand Down Expand Up @@ -199,3 +201,118 @@ df_results_pf_linkage <- df_pf_descriptive_stats %>%
df_results_pf_linkage %>%
filter(interval_start %in% c("2024-02-01", "2024-12-01", "2024-08-01", "2024-09-01", "2024-10-01"))
```

## Data comparison

### Figure 3

```{r, message=FALSE, warning=FALSE, echo = FALSE}
# Ideally I would like to load this at the top of this script
# But this file uses datasets created further down so it easiest to load it here
source(here("lib", "functions", "combine_os_nhsbsa_validation_data.R"))

# Pharmacy First Consultations
# Instead of only using the count of PF consultations in OS with a clinical
# condition we are using all PF consultations with a consultation id
df_pf_consultation_validatio_plot <- pf_validation |>
filter(
(data_source == "opensafely" &
data_desc == "pf_consultation" &
count_desc == "consultation_id") |
(data_source == "nhs_bsa" &
data_desc == "pf_consultation" &
count_desc == "consultation_type")
)

# Pivot wider to calculate ratio
df_pf_consultation_validatio_plot <- df_pf_consultation_validatio_plot |>
select(date, data_source, count_method, count_total) |>
pivot_wider(names_from = c(data_source, count_method), values_from = count_total) |>
mutate(ratio = opensafely_opensafely_tpp / nhs_bsa_count_40pct)

# Select colours for groups
# Group 1: #CF4446FF
colour_group_1 <- paletteer_c("viridis::inferno", n = 10)[6]
# Group 1: #1B0C42FF
colour_group_2 <- paletteer_c("viridis::inferno", n = 10)[2]

# Create consultation validation plot
pf_validatio_consultation_plot <- df_pf_consultation_validatio_plot |>
ggplot(aes(x = date)) +
geom_point(
aes(
y = nhs_bsa_count_40pct,
colour = "NHS BSA (40%)",
shape = "NHS BSA (40%)"
),
size = 2.6
) +
geom_line(
aes(y = nhs_bsa_count_40pct, colour = "NHS BSA (40%)"),
alpha = .3,
size = .5
) +
geom_point(
aes(
y = opensafely_opensafely_tpp,
colour = "OpenSAFELY-TPP",
shape = "OpenSAFELY-TPP"
),
size = 2.6
) +
geom_line(
aes(
y = opensafely_opensafely_tpp,
colour = "OpenSAFELY-TPP"
),
alpha = .3,
size = .5
) +
geom_label(
aes(
y = opensafely_opensafely_tpp,
label = scales::percent(ratio, accuracy = .1),
),
colour = colour_group_2,
vjust = -.5,
size = 3.5
) +
scale_y_continuous(labels = scales::number) +
scale_x_date(
date_breaks = "1 month",
labels = scales::label_date_short()
) +
scale_colour_manual(
values = c(
"NHS BSA (40%)" = colour_group_1,
"OpenSAFELY-TPP" = colour_group_2
),
name = NULL
) +
scale_shape_manual(
values = c(
"NHS BSA (40%)" = 15,
"OpenSAFELY-TPP" = 16
),
name = NULL
) +
labs(
x = NULL,
y = NULL,
colour = NULL,
shape = NULL
) +
theme(
panel.background = element_blank(),
axis.line = element_line(colour = "grey50"),
legend.position = "bottom",
text = element_text(size = 14)
)

ggsave(
filename = here("released_output", "results", "manuscript", "fig3_pf_os_nhsbsa_comparison.png"),
pf_validatio_consultation_plot,
height = 4,
width = 8
)
```