Skip to content

Commit

Permalink
Merge pull request #113 from opensafely/milanwiedemann/add-comparison…
Browse files Browse the repository at this point in the history
…-figure

Add manuscript figure comparing OpenSAFELY-TPP and NHS BSA data
  • Loading branch information
viv3ckj authored Jan 30, 2025
2 parents 39b101b + 16c9a2b commit 921304d
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 16 deletions.
79 changes: 79 additions & 0 deletions lib/functions/combine_os_nhsbsa_validation_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Load NHS BSA validation data and summarise total count by:
# 1. pf_consultation
# 2. pf_medication
df_bsa_validation_total <- df_bsa_validation |>
group_by(date, data_desc, count_method) |>
mutate(count_total = sum(count, na.rm = TRUE)) |>
select(-count_group, -count) |>
distinct() %>%
ungroup() %>%
filter(count_method == "count_40pct")

# Transform OpenSAFLEY data into same format as NHS BSA validation data
# 1. pf_consultation from:
# 1.1. clinical condition (not every PF consult. has a linked clin. condition)
df_os_consultation_from_condition_validation <- df_measures %>%
filter(measure_desc == "clinical_condition") %>%
filter(is.na(group_by)) %>%
select(
date = interval_start,
count_group = measure,
count = numerator
) %>%
mutate(
data_source = "opensafely",
data_desc = "pf_consultation",
count_desc = "consultation_type",
count_method = "opensafely_tpp",
) |>
filter(date >= "2024-02-01") %>%
relocate(
date, data_source, data_desc,
count_desc, count_group, count_method, count
)

# 1.1. consultation id (better description of total PF consult. in OS)
df_os_consultation_from_id_validation <- df_pf_consultations_total |>
mutate(
data_source = "opensafely",
data_desc = "pf_consultation",
count_desc = "consultation_id",
count_group = "consultation_id",
count_method = "opensafely_tpp"
) |>
select(
date = interval_start,
data_source, data_desc, count_desc, count_group, count_method,
count = pf_consultation_total
)

# 2. pf_medication from linked medication
df_os_medication_validation <- df_pfmed %>%
rename(date = interval_start) %>%
mutate(
count = numerator,
data_source = "opensafely",
data_desc = "pf_medication",
count_desc = "dmd_code",
count_method = "opensafely_tpp",
count_group = dmd_code,
) |>
filter(date >= "2024-02-01") %>%
select(
date, data_source, data_desc,
count_desc, count_group, count_method, count
)

df_os_validation_total <- bind_rows(
df_os_consultation_from_condition_validation,
df_os_consultation_from_id_validation,
df_os_medication_validation
) %>%
group_by(date, data_desc, count_desc, count_method) |>
mutate(count_total = sum(count, na.rm = TRUE)) |>
select(-count_group, -count) |>
distinct() %>%
ungroup()

# Combine NHS BSA and OpenSAFELY data
pf_validation <- bind_rows(df_bsa_validation_total, df_os_validation_total)
6 changes: 4 additions & 2 deletions lib/functions/load_opensafely_outputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
here("output", "measures", "pf_descriptive_stats_measures.csv")
)
df_pfmed <- read_csv(
here("output", "measures", "pf_medications_measures.csv")
here("output", "measures", "pf_medications_measures.csv"),
col_types = list(dmd_code = col_character())
)
df_condition_provider <- read_csv(
here("output", "measures", "pf_condition_provider_measures.csv")
Expand All @@ -22,7 +23,8 @@ if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
here("released_output", "measures", "pf_descriptive_stats_measures.csv")
)
df_pfmed <- read_csv(
here("released_output", "measures", "pf_medications_measures.csv")
here("released_output", "measures", "pf_medications_measures.csv"),
col_types = list(dmd_code = col_character())
)
df_condition_provider <- read_csv(
here("released_output", "measures", "pf_condition_provider_measures.csv")
Expand Down
35 changes: 27 additions & 8 deletions lib/functions/load_validation_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@ df_bsa_consultation_validation <- read_csv(
) %>%
rename(count_100pct = count) |>
mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>%
mutate(source = "nhs_bsa") |>
mutate(data_source = "nhs_bsa") |>
pivot_longer(
cols = c(count_100pct, count_40pct),
names_to = "count_method",
values_to = "count"
)

df_bsa_consultation_validation <- df_bsa_consultation_validation %>%
mutate(consultation_type = factor(consultation_type,
) |>
mutate(
data_desc = "pf_consultation",
count_desc = "consultation_type",
) |>
select(date, data_source, data_desc, count_desc, count_group = consultation_type, count_method, count) |>
mutate(count_group = factor(count_group,
levels = c(
"sinusitis",
"infected_insect_bites",
Expand All @@ -37,6 +40,22 @@ df_bsa_consultation_validation <- df_bsa_consultation_validation %>%
)
))

df_bsa_medication_validation <- read_csv(
here("lib", "validation", "data", "pf_medication_validation_data.csv")
)
df_bsa_medication_validation <- read_csv(here("lib", "validation", "data", "pf_medication_validation_data.csv")) %>%
rename(count_100pct = count) |>
mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>%
mutate(data_source = "nhs_bsa") |>
pivot_longer(
cols = c(count_100pct, count_40pct),
names_to = "count_method",
values_to = "count"
) |>
mutate(
data_desc = "pf_medication",
count_desc = "bnf_paragraph",
) |>
select(date, data_source, data_desc, count_desc, count_group = bnf_paragraph, count_method, count)

df_bsa_validation <- bind_rows(df_bsa_consultation_validation, df_bsa_medication_validation) %>%
filter(date >= "2024-02-01")

rm(df_bsa_consultation_validation, df_bsa_medication_validation)
7 changes: 7 additions & 0 deletions lib/validation/data/pf_consultation_validation_data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,10 @@ date,consultation_type,count
2024-09-01,shingles,4867
2024-09-01,sinusitis,15059
2024-09-01,uncomplicated_uti,56235
2024-10-01,acute_otitis_media,19971
2024-10-01,acute_sore_throat,64617
2024-10-01,impetigo,9394
2024-10-01,infected_insect_bites,14715
2024-10-01,shingles,5344
2024-10-01,sinusitis,22456
2024-10-01,uncomplicated_uti,62297
10 changes: 10 additions & 0 deletions lib/validation/data/pf_medication_validation_data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,13 @@ date,pharmacy_advanced_service,bnf_paragraph,count
2024-10-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,2349
2024-10-01,Pharmacy First Clinical Pathways,Tetracyclines,1182
2024-10-01,Pharmacy First Clinical Pathways,Urinary-tract infections,52648
2024-11-01,Pharmacy First Clinical Pathways,Antibacterial preparations,5131
2024-11-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,9477
2024-11-01,Pharmacy First Clinical Pathways,Herpesvirus infections,4335
2024-11-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,20
2024-11-01,Pharmacy First Clinical Pathways,Macrolides,8229
2024-11-01,Pharmacy First Clinical Pathways,Otitis externa,8651
2024-11-01,Pharmacy First Clinical Pathways,Penicillins,70362
2024-11-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,2426
2024-11-01,Pharmacy First Clinical Pathways,Tetracyclines,1492
2024-11-01,Pharmacy First Clinical Pathways,Urinary-tract infections,51574
129 changes: 123 additions & 6 deletions reports/create_results_manuscript.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ source(here("lib", "functions", "load_opensafely_outputs.R"))

```{r, message=FALSE, warning=FALSE}
# Create figure for total count of Pharmacy First consultations for each code (3 codes)
df_measures_selected <- df_measures %>%
df_pf_consultations <- df_measures %>%
filter(measure_desc == "clinical_service") %>%
filter(is.na(group_by)) |>
select(measure, interval_start, numerator) |>
Expand All @@ -57,16 +57,19 @@ df_measures_selected <- df_measures %>%
)
))
df_measures_selected <- df_measures_selected |>
df_pf_consultations_total <- df_pf_consultations |>
group_by(interval_start) |>
mutate(
pf_consultation_total = sum(numerator, na.rm = TRUE),
data_desc = "Pharmacy First Consultation"
) %>%
filter(interval_start >= "2024-02-01")
filter(interval_start >= "2024-02-01") %>%
select(interval_start, pf_consultation_total, data_desc) |>
ungroup() |>
distinct()
fig_pf_grouped_consultations_count <- plot_measures(
df_measures_selected,
df_pf_consultations_total,
select_value = pf_consultation_total,
select_interval_date = interval_start,
legend_position = "bottom",
Expand Down Expand Up @@ -102,7 +105,7 @@ ggsave(
### Results for Figure 1 description

```{r, message=FALSE, warning=FALSE}
df_results_pf_total_counts <- df_measures %>%
df_results_pf_total_counts <- df_measures %>%
filter(measure_desc == "clinical_service") %>%
filter(is.na(group_by)) |>
group_by(interval_start) |>
Expand All @@ -123,7 +126,6 @@ df_results_pf_total_counts %>%
filter(interval_start %in% c("2024-02-01", "2024-12-01", "2024-08-01", "2024-09-01", "2024-10-01"))
```


## OpenSAFELY Linkage

### Figure 2
Expand Down Expand Up @@ -204,3 +206,118 @@ df_results_pf_linkage <- df_pf_descriptive_stats %>%
df_results_pf_linkage %>%
filter(interval_start %in% c("2024-02-01", "2024-12-01", "2024-08-01", "2024-09-01", "2024-10-01"))
```

## Data comparison

### Figure 3

```{r, message=FALSE, warning=FALSE, echo = FALSE}
# Ideally I would like to load this at the top of this script
# But this file uses datasets created further down so it easiest to load it here
source(here("lib", "functions", "combine_os_nhsbsa_validation_data.R"))
# Pharmacy First Consultations
# Instead of only using the count of PF consultations in OS with a clinical
# condition we are using all PF consultations with a consultation id
df_pf_consultation_validatio_plot <- pf_validation |>
filter(
(data_source == "opensafely" &
data_desc == "pf_consultation" &
count_desc == "consultation_id") |
(data_source == "nhs_bsa" &
data_desc == "pf_consultation" &
count_desc == "consultation_type")
)
# Pivot wider to calculate ratio
df_pf_consultation_validatio_plot <- df_pf_consultation_validatio_plot |>
select(date, data_source, count_method, count_total) |>
pivot_wider(names_from = c(data_source, count_method), values_from = count_total) |>
mutate(ratio = opensafely_opensafely_tpp / nhs_bsa_count_40pct)
# Select colours for groups
# Group 1: #CF4446FF
colour_group_1 <- paletteer_c("viridis::inferno", n = 10)[6]
# Group 1: #1B0C42FF
colour_group_2 <- paletteer_c("viridis::inferno", n = 10)[2]
# Create consultation validation plot
pf_validatio_consultation_plot <- df_pf_consultation_validatio_plot |>
ggplot(aes(x = date)) +
geom_point(
aes(
y = nhs_bsa_count_40pct,
colour = "NHS BSA (40%)",
shape = "NHS BSA (40%)"
),
size = 2.6
) +
geom_line(
aes(y = nhs_bsa_count_40pct, colour = "NHS BSA (40%)"),
alpha = .3,
size = .5
) +
geom_point(
aes(
y = opensafely_opensafely_tpp,
colour = "OpenSAFELY-TPP",
shape = "OpenSAFELY-TPP"
),
size = 2.6
) +
geom_line(
aes(
y = opensafely_opensafely_tpp,
colour = "OpenSAFELY-TPP"
),
alpha = .3,
size = .5
) +
geom_label(
aes(
y = opensafely_opensafely_tpp,
label = scales::percent(ratio, accuracy = .1),
),
colour = colour_group_2,
vjust = -.5,
size = 3.5
) +
scale_y_continuous(labels = scales::number) +
scale_x_date(
date_breaks = "1 month",
labels = scales::label_date_short()
) +
scale_colour_manual(
values = c(
"NHS BSA (40%)" = colour_group_1,
"OpenSAFELY-TPP" = colour_group_2
),
name = NULL
) +
scale_shape_manual(
values = c(
"NHS BSA (40%)" = 15,
"OpenSAFELY-TPP" = 16
),
name = NULL
) +
labs(
x = NULL,
y = NULL,
colour = NULL,
shape = NULL
) +
theme(
panel.background = element_blank(),
axis.line = element_line(colour = "grey50"),
legend.position = "bottom",
text = element_text(size = 14)
)
ggsave(
filename = here("released_output", "results", "manuscript", "fig3_pf_os_nhsbsa_comparison.png"),
pf_validatio_consultation_plot,
height = 4,
width = 8
)
```

0 comments on commit 921304d

Please sign in to comment.