From 70e8857de5324dcf6a70c40d6b7a56c8eca11c60 Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Sun, 1 Dec 2024 17:51:16 +0000
Subject: [PATCH 1/7] Add more validation data

---
 .../get_pf_consultation_validation_data.R     |  3 +-
 .../get_pf_medication_validation_data.R       |  3 +-
 .../data/pf_consultation_validation_data.csv  | 28 +++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/lib/functions/get_pf_consultation_validation_data.R b/lib/functions/get_pf_consultation_validation_data.R
index 6d64c7a..f62b678 100644
--- a/lib/functions/get_pf_consultation_validation_data.R
+++ b/lib/functions/get_pf_consultation_validation_data.R
@@ -100,7 +100,7 @@ get_dispensing_data <- function(start_date = NULL, end_date = NULL) {
 }
 
 # Calculate summary of counts
-df_dispensing_data <- get_dispensing_data(start_date = "2024-02-01")
+df_dispensing_data <- get_dispensing_data(start_date = "2023-11-01")
 
 df_dispensing_data_summary <- df_dispensing_data |>
   group_by(date) |>
@@ -132,4 +132,5 @@ df_dispensing_data_summary <- df_dispensing_data |>
   ) |>
   mutate(consultation_type = str_replace(consultation_type, "^n_pf_consultation_", ""))
 
+range(df_dispensing_data_summary$date)
 write_csv(df_dispensing_data_summary, here("lib", "validation", "data", "pf_consultation_validation_data.csv"))
diff --git a/lib/functions/get_pf_medication_validation_data.R b/lib/functions/get_pf_medication_validation_data.R
index 7536ff5..4c4eef4 100644
--- a/lib/functions/get_pf_medication_validation_data.R
+++ b/lib/functions/get_pf_medication_validation_data.R
@@ -69,7 +69,7 @@ get_dataset_table_names <- function(dataset_id, start_date = NULL, end_date = NU
   dataset_tables
 }
 
-get_dataset_table_names("prescription-cost-analysis-pca-monthly-data", "2024-09-01")
+get_dataset_table_names("prescription-cost-analysis-pca-monthly-data", "2024-01-01")
 
 construct_sql_query <- function(table_name, sql_query) {
   gsub("{FROM_TABLE}", sprintf("FROM `%s`", table_name), sql_query, fixed = TRUE)
@@ -108,6 +108,7 @@ df_validate <- get_nhsbsa_data("prescription-cost-analysis-pca-monthly-data", sq
 
 names(df_validate)
 unique(df_validate$pharmacy_advanced_service)
+range(df_validate$date)
 
 pf_medication_validation_data <- df_validate |>
   select(date, snomed_code, pharmacy_advanced_service, bnf_section, bnf_paragraph, items) |>
diff --git a/lib/validation/data/pf_consultation_validation_data.csv b/lib/validation/data/pf_consultation_validation_data.csv
index 7bfc6e0..7a1c139 100644
--- a/lib/validation/data/pf_consultation_validation_data.csv
+++ b/lib/validation/data/pf_consultation_validation_data.csv
@@ -1,4 +1,25 @@
 date,consultation_type,count
+2023-11-01,acute_otitis_media,0
+2023-11-01,acute_sore_throat,0
+2023-11-01,impetigo,0
+2023-11-01,infected_insect_bites,0
+2023-11-01,shingles,0
+2023-11-01,sinusitis,0
+2023-11-01,uncomplicated_uti,0
+2023-12-01,acute_otitis_media,0
+2023-12-01,acute_sore_throat,0
+2023-12-01,impetigo,0
+2023-12-01,infected_insect_bites,0
+2023-12-01,shingles,0
+2023-12-01,sinusitis,0
+2023-12-01,uncomplicated_uti,0
+2024-01-01,acute_otitis_media,0
+2024-01-01,acute_sore_throat,0
+2024-01-01,impetigo,0
+2024-01-01,infected_insect_bites,0
+2024-01-01,shingles,0
+2024-01-01,sinusitis,0
+2024-01-01,uncomplicated_uti,0
 2024-02-01,acute_otitis_media,18893
 2024-02-01,acute_sore_throat,41014
 2024-02-01,impetigo,6816
@@ -41,3 +62,10 @@ date,consultation_type,count
 2024-07-01,shingles,5103
 2024-07-01,sinusitis,14115
 2024-07-01,uncomplicated_uti,51321
+2024-08-01,acute_otitis_media,17915
+2024-08-01,acute_sore_throat,40771
+2024-08-01,impetigo,7448
+2024-08-01,infected_insect_bites,44391
+2024-08-01,shingles,5151
+2024-08-01,sinusitis,10970
+2024-08-01,uncomplicated_uti,53389

From d89d6e316f65451d0a1ca457984f84ada8ee8aca Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Mon, 2 Dec 2024 09:07:08 +0000
Subject: [PATCH 2/7] Refactor data loading and tidying into separate scripts

This will improve readability and modularity and make it easier for us to make further changes later on. It's only a start, there's much more we could do.
---
 lib/functions/load_opensafely_outputs.R | 88 +++++++++++++++++++++++++
 lib/functions/load_validation_data.R    | 42 ++++++++++++
 lib/functions/plot_measures.R           |  6 ++
 lib/functions/tidy_measures.R           | 35 ++++++++++
 4 files changed, 171 insertions(+)
 create mode 100644 lib/functions/load_opensafely_outputs.R
 create mode 100644 lib/functions/load_validation_data.R

diff --git a/lib/functions/load_opensafely_outputs.R b/lib/functions/load_opensafely_outputs.R
new file mode 100644
index 0000000..46204bd
--- /dev/null
+++ b/lib/functions/load_opensafely_outputs.R
@@ -0,0 +1,88 @@
+# Load data based on execution environment
+if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
+  # Load data from generate_pf_measures action
+  df_measures <- readr::read_csv(
+    here("output", "measures", "pf_codes_conditions_measures.csv")
+  )
+  df_descriptive_stats <- read_csv(
+    here("output", "measures", "pf_descriptive_stats_measures.csv")
+  )
+  df_pfmed <- read_csv(
+    here("output", "measures", "pf_medications_measures.csv")
+  )
+  df_condition_provider <- read_csv(
+    here("output", "measures", "pf_condition_provider_measures.csv")
+  )
+} else {
+  # Load data from released_output directory
+  df_measures <- readr::read_csv(
+    here("released_output", "measures", "pf_codes_conditions_measures.csv")
+  )
+  df_descriptive_stats <- read_csv(
+    here("released_output", "measures", "pf_descriptive_stats_measures.csv")
+  )
+  df_pfmed <- read_csv(
+    here("released_output", "measures", "pf_medications_measures.csv")
+  )
+  df_condition_provider <- read_csv(
+    here("released_output", "measures", "pf_condition_provider_measures.csv")
+  )
+}
+
+df_measures <- tidy_measures(
+  data = df_measures,
+  pf_measures_name_dict = pf_measures_name_dict,
+  pf_measures_name_mapping = pf_measures_name_mapping,
+  pf_measures_groupby_dict = pf_measures_groupby_dict
+)
+
+df_measures$ethnicity <- factor(
+  df_measures$ethnicity,
+  levels = c(
+    "White",
+    "Mixed",
+    "Asian or Asian British",
+    "Black or Black British",
+    "Chinese or Other Ethnic Groups",
+    "Missing"
+  ),
+  ordered = TRUE
+)
+
+df_measures$age_band <- factor(
+  df_measures$age_band,
+  levels = c(
+    "0-19",
+    "20-39",
+    "40-59",
+    "60-79",
+    "80+",
+    "Missing"
+  ),
+  ordered = TRUE
+)
+
+df_measures$region <- factor(
+  df_measures$region,
+  levels = c(
+    "East",
+    "East Midlands",
+    "London",
+    "North East",
+    "North West",
+    "South East",
+    "South West",
+    "West Midlands",
+    "Yorkshire and The Humber",
+    "Missing"
+  ),
+  ordered = TRUE
+)
+
+df_measures <- df_measures %>%
+  mutate(sex = factor(sex,
+    levels = c("female", "male"),
+    labels = c("Female", "Male")
+  ))
+
+df_measures$age_band[is.na(df_measures$age_band)] <- "Missing"
\ No newline at end of file
diff --git a/lib/functions/load_validation_data.R b/lib/functions/load_validation_data.R
new file mode 100644
index 0000000..539dc41
--- /dev/null
+++ b/lib/functions/load_validation_data.R
@@ -0,0 +1,42 @@
+library(readr)
+library(tidyr)
+library(dplyr)
+library(here)
+
+df_bsa_consultation_validation <- read_csv(
+  here("lib", "validation", "data", "pf_consultation_validation_data.csv")
+) %>%
+  rename(count_100pct = count) |>
+  mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>%
+  mutate(source = "nhs_bsa") |>
+  pivot_longer(
+    cols = c(count_100pct, count_40pct),
+    names_to = "count_method",
+    values_to = "count"
+  )
+
+df_bsa_consultation_validation <- df_bsa_consultation_validation %>%
+  mutate(consultation_type = factor(consultation_type,
+    levels = c(
+      "sinusitis",
+      "infected_insect_bites",
+      "uncomplicated_uti",
+      "acute_otitis_media",
+      "acute_sore_throat",
+      "shingles",
+      "impetigo"
+    ),
+    labels = c(
+      "Acute Sinusitis",
+      "Infected Insect Bite",
+      "UTI",
+      "Acute Otitis Media",
+      "Acute Pharyngitis",
+      "Herpes Zoster",
+      "Impetigo"
+    )
+  ))
+
+df_bsa_medication_validation <- read_csv(
+  here("lib", "validation", "data", "pf_medication_validation_data.csv")
+)
diff --git a/lib/functions/plot_measures.R b/lib/functions/plot_measures.R
index 9525dfa..045ebcb 100644
--- a/lib/functions/plot_measures.R
+++ b/lib/functions/plot_measures.R
@@ -97,3 +97,9 @@ plot_measures <- function(
 
   plot_tmp
 }
+
+# Colour palettes
+gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey")
+region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey")
+ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey")
+sex_palette <- c("red", "blue")
\ No newline at end of file
diff --git a/lib/functions/tidy_measures.R b/lib/functions/tidy_measures.R
index 2689ac4..44ab4e6 100644
--- a/lib/functions/tidy_measures.R
+++ b/lib/functions/tidy_measures.R
@@ -1,3 +1,38 @@
+# Define dictionaries with tidy names and mappings for measures
+pf_measures_name_dict <- list(
+  consultation_service = "Consultation Service",
+  pharmacy_first_service = "Pharmacy First Consultation",
+  combined_pf_service = "Pharmacy First Consultations (Combined)",
+  acute_otitis_media = "Acute Otitis Media",
+  herpes_zoster = "Herpes Zoster",
+  acute_sinusitis = "Acute Sinusitis",
+  impetigo = "Impetigo",
+  infected_insect_bite = "Infected Insect Bite",
+  acute_pharyngitis = "Acute Pharyngitis",
+  uncomplicated_urinary_tract_infection = "UTI"
+)
+
+pf_measures_name_mapping <- list(
+  consultation_service = "clinical_service",
+  pharmacy_first_service = "clinical_service",
+  combined_pf_service = "pharmacy_first_services",
+  acute_otitis_media = "clinical_condition",
+  herpes_zoster = "clinical_condition",
+  acute_sinusitis = "clinical_condition",
+  impetigo = "clinical_condition",
+  infected_insect_bite = "clinical_condition",
+  acute_pharyngitis = "clinical_condition",
+  uncomplicated_urinary_tract_infection = "clinical_condition"
+)
+
+pf_measures_groupby_dict <- list(
+  age_band = "Age band",
+  sex = "Sex",
+  imd = "IMD",
+  region = "Region",
+  ethnicity = "Ethnicity"
+)
+
 #' Tidy measures data
 #'
 #' Creates a tidier dataframe of measures data.

From aacb97e60d878e879095c3dc4630d752b99fcbbe Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Mon, 2 Dec 2024 09:09:31 +0000
Subject: [PATCH 3/7] Improve figures and save as `.png` files

---
 reports/pharmacy_first_report.Rmd | 503 +++++++++++-------------------
 1 file changed, 187 insertions(+), 316 deletions(-)

diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd
index 50c1e09..f53a0a0 100644
--- a/reports/pharmacy_first_report.Rmd
+++ b/reports/pharmacy_first_report.Rmd
@@ -16,116 +16,25 @@ library(tidyverse)
 library(here)
 library(readr)
 library(gt)
+library(patchwork)
 ```
 
 ```{r load-data, message=FALSE, warning=FALSE}
-# Load plotting function
-source(here::here("lib", "functions", "tidy_measures.R"))
-source(here::here("lib", "functions", "plot_measures.R"))
-
-# Load data based on environment
-if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
-  # Load data from generate_pf_measures action
-  df_measures <- readr::read_csv(
-    here::here("output", "measures", "pf_codes_conditions_measures.csv")
-  )
-} else {
-  # Load data from released_output directory
-  df_measures <- readr::read_csv(
-    here::here("released_output", "measures", "pf_codes_conditions_measures.csv")
-  )
-}
-
-# Load validation data (NHS BSA)
-df_bsa_validation <- read_csv(here("lib", "validation", "data", "pf_consultation_validation_data.csv"))
-df_bsa_meds <- read_csv(here("lib", "validation", "data", "pf_medication_validation_data.csv"))
-
-df_descriptive_stats <- read_csv(here("released_output", "measures", "pf_descriptive_stats_measures.csv"))
-df_pfmed <- read_csv(here("released_output", "measures", "pf_medications_measures.csv"))
-df_condition_provider <- read_csv(here("released_output", "measures", "pf_condition_provider_measures.csv"))
-
-# Define dictionaries with tidy names and mappings for measures
-pf_measures_name_dict <- list(
-  consultation_service = "Consultation Service",
-  pharmacy_first_service = "Pharmacy First Consultation",
-  combined_pf_service = "Pharmacy First Consultations (Combined)",
-  acute_otitis_media = "Acute Otitis Media",
-  herpes_zoster = "Herpes Zoster",
-  acute_sinusitis = "Acute Sinusitis",
-  impetigo = "Impetigo",
-  infected_insect_bite = "Infected Insect Bite",
-  acute_pharyngitis = "Acute Pharyngitis",
-  uncomplicated_urinary_tract_infection = "UTI"
-)
-
-pf_measures_name_mapping <- list(
-  consultation_service = "clinical_service",
-  pharmacy_first_service = "clinical_service",
-  combined_pf_service = "pharmacy_first_services",
-  acute_otitis_media = "clinical_condition",
-  herpes_zoster = "clinical_condition",
-  acute_sinusitis = "clinical_condition",
-  impetigo = "clinical_condition",
-  infected_insect_bite = "clinical_condition",
-  acute_pharyngitis = "clinical_condition",
-  uncomplicated_urinary_tract_infection = "clinical_condition"
-)
-
-pf_measures_groupby_dict <- list(
-  age_band = "Age band",
-  sex = "Sex",
-  imd = "IMD",
-  region = "Region",
-  ethnicity = "Ethnicity"
-)
-
-df_measures <- tidy_measures(
-  data = df_measures,
-  pf_measures_name_dict = pf_measures_name_dict,
-  pf_measures_name_mapping = pf_measures_name_mapping,
-  pf_measures_groupby_dict = pf_measures_groupby_dict
-)
-
-df_measures$ethnicity <- factor(
-  df_measures$ethnicity,
-  levels = c(
-    "White", "Mixed", "Asian or Asian British",
-    "Black or Black British", "Chinese or Other Ethnic Groups",
-    "Missing"
-  ),
-  ordered = TRUE
-)
-
-df_measures$age_band <- factor(
-  df_measures$age_band,
-  levels = c(
-    "0-19", "20-39", "40-59",
-    "60-79", "80+",
-    "Missing"
-  ),
-  ordered = TRUE
-)
-
-df_measures$region <- factor(
-  df_measures$region,
-  levels = c(
-    "East", "East Midlands", "London",
-    "North East", "North West", "South East",
-    "South West", "West Midlands", "Yorkshire and The Humber",
-    "Missing"
-  ),
-  ordered = TRUE
-)
-
-df_measures <- df_measures %>%
-  mutate(sex = factor(sex, levels = c("female", "male"), labels = c("Female", "Male")))
-
-df_measures$age_band[is.na(df_measures$age_band)] <- "Missing"
-
-gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey")
-region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey")
-ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey")
-sex_palette <- c("red", "blue")
+# Load functions
+source(here("lib", "functions", "tidy_measures.R"))
+source(here("lib", "functions", "plot_measures.R"))
+
+# Load validation data:
+# - df_bsa_medication_validation: date, pharmacy_advanced_service, bnf_paragraph, count
+# - df_bsa_consultation_validation: date, consultation_type, source, count_method, count
+source(here("lib", "functions", "load_validation_data.R"))
+
+# Load opensafely ouputs:
+# - df_measures: measure, interval_start, interval_end, ratio numerator, denominator, age_band, sex,imd, region, ethnicity
+# - df_descriptive_stats: measure, interval_start, interval_end, ratio numerator, denominator
+# - df_pfmed: measure, interval_start, interval_end, ratio, numerator, denominator, dmd_code
+# - df_condition_provider: measure, interval_start, interval_end, ratio, numerator, denominator, pf_status, imd
+source(here("lib", "functions", "load_opensafely_outputs.R"))
 ```
 
 # Background
@@ -310,35 +219,6 @@ The [Pregnancy Codelist](https://www.opencodelists.org/codelist/nhsd-primary-car
 We used the [Ethnicity Codelist](https://www.opencodelists.org/codelist/opensafely/ethnicity-snomed-0removed/2e641f61/) identify ethnicity in Electronic Health Records.
 To ensure comprehensive ethnicity data, we supplemented missing ethnicity values with data from the Secondary Uses Service (SUS).
 
-```{r echo=FALSE}
-ethnicity_table <- data.frame(
-  Code = c(1, 2, 3, 4, 5),
-  Ethnicity = c(
-    "White",
-    "Mixed",
-    "Asian or Asian British",
-    "Black or Black British",
-    "Chinese or Other Ethnic Groups"
-  )
-)
-
-# ethnicity_table %>%
-#   gt() %>%
-#   tab_header(
-#     title = "Table 2. Ethnic Group Codes",
-#     subtitle = "Codes representing different ethnic groups in the study"
-#   ) %>%
-#   cols_label(
-#     Code = "Code",
-#     `Ethnicity` = "Ethnic Group"
-#   ) %>%
-#   tab_options(
-#     table.font.size = "medium",
-#     heading.title.font.size = "large",
-#     heading.subtitle.font.size = "small"
-#   )
-```
-
 # Results
 
 ### Total population
@@ -772,226 +652,216 @@ plot_measures(
 ) + scale_color_manual(values = ethnicity_palette)
 ```
 ```{r, message=FALSE, warning=FALSE, echo = FALSE}
-# Multiply by 0.4 to get 40% of data
-df_bsa_validation <- df_bsa_validation %>%
-  mutate(count_40pct = round(as.numeric(count * .4), digits = 0))
-
 # OpenSAFELY data for clinical conditions into a tidy df
-df_opensafely <- df_measures %>%
+df_opensafely_validation <- df_measures %>%
   filter(measure_desc == "clinical_condition") %>%
-  filter(interval_start >= as.Date("2024-02-01") & interval_start <= as.Date("2024-07-30")) %>%
+  # filter(interval_start >= as.Date("2024-02-01") & interval_start <= as.Date("2024-07-30")) %>%
   filter(is.na(group_by)) %>%
-  select(date = interval_start, consultation_type = measure, count = numerator)
-
-# Add a new column to each data frame to identify the source
-df_opensafely <- df_opensafely %>%
-  mutate(source = "OS")
-df_bsa_validation <- df_bsa_validation %>%
-  mutate(source = "BSA")
-
-# Drop the original 'count' column from the BSA data to allow for easy consistent grouping by 'count'
-df_validation_condition_counts <- df_bsa_validation %>%
-  select(-count) %>%
-  rename(count = count_40pct)
-# Combining rows from OS and BSA dataframes
-df_validation_condition_counts <- bind_rows(df_opensafely, df_validation_condition_counts)
-
-# format both sources of data as MM-YYYY
-df_validation_condition_counts <- df_validation_condition_counts %>%
-  mutate(month = format(as.Date(date), "%m-%Y"))
-
-# Pivot the data so that we get two sub columns per month for each source (OS and BSA)
-df_pivoted <- df_validation_condition_counts %>%
-  pivot_wider(names_from = c(month, source), values_from = count)
-
-# Changing names to make column naming the same to ensure grouping
-df_pivoted <- df_pivoted %>%
+  select(date = interval_start, consultation_type = measure, count = numerator) %>%
   mutate(
-    consultation_type = recode(consultation_type,
-      "sinusitis" = "Acute Sinusitis",
-      "infected_insect_bites" = "Infected Insect Bite",
-      "uncomplicated_uti" = "UTI",
-      "acute_otitis_media" = "Acute Otitis Media",
-      "acute_sore_throat" = "Acute Pharyngitis",
-      "shingles" = "Herpes Zoster",
-      "impetigo" = "Impetigo",
-    )
-  )
-# Removing date column as this will prevent grouping (date is already pivot columns)
-df_pivoted <- df_pivoted %>%
-  select(-date)
+    source = "opensafely",
+    count_method = "opensafely_tpp"
+  ) |>
+  filter(date >= "2024-01-01") %>%
+  relocate(date, consultation_type, source, count_method, count)
 
-# Group by consultation type and summarise to get rid of NAs and multiple consultation_types of same name
-df_pivoted <- df_pivoted %>%
-  group_by(consultation_type) %>%
-  summarise(across(everything(), sum, na.rm = TRUE), .groups = "drop")
-
-# Now create the gt table
-tab_pf_conditions_validation <- df_pivoted %>%
-  gt() %>%
-  tab_header(
-    title = "Validation Data of Pharmacy First Clinical Condition Counts",
-    subtitle = "Timeframe: 1st Feb 2024 to 31st July 2024"
-  ) %>%
-  cols_label(
-    consultation_type = "Clinical Condition",
-    `02-2024_OS` = "OS",
-    `02-2024_BSA` = "BSA",
-    `03-2024_OS` = "OS",
-    `03-2024_BSA` = "BSA",
-    `04-2024_OS` = "OS",
-    `04-2024_BSA` = "BSA",
-    `05-2024_OS` = "OS",
-    `05-2024_BSA` = "BSA",
-    `06-2024_OS` = "OS",
-    `06-2024_BSA` = "BSA",
-    `07-2024_OS` = "OS",
-    `07-2024_BSA` = "BSA"
-  ) %>%
-  tab_spanner(
-    label = "February 2024",
-    columns = c(`02-2024_OS`, `02-2024_BSA`)
-  ) %>%
-  tab_spanner(
-    label = "March 2024",
-    columns = c(`03-2024_OS`, `03-2024_BSA`)
-  ) %>%
-  tab_spanner(
-    label = "April 2024",
-    columns = c(`04-2024_OS`, `04-2024_BSA`)
-  ) %>%
-  tab_spanner(
-    label = "May 2024",
-    columns = c(`05-2024_OS`, `05-2024_BSA`)
-  ) %>%
-  tab_spanner(
-    label = "June 2024",
-    columns = c(`06-2024_OS`, `06-2024_BSA`)
-  ) %>%
-  tab_spanner(
-    label = "July 2024",
-    columns = c(`07-2024_OS`, `07-2024_BSA`)
-  ) %>%
-  tab_footnote(
-    footnote = "OS - OpenSAFELY data, BSA - Validation data (NHS BSA)"
-  ) %>%
-  fmt_number(
-    columns = everything(),
-    decimals = 0
-  )
-
-tab_pf_conditions_validation
-```
-
-```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
-df_long <- df_pivoted %>% pivot_longer(
-  cols = c("02-2024_OS", "02-2024_BSA", "03-2024_OS", "03-2024_BSA", "04-2024_OS", "04-2024_BSA", "05-2024_OS", "05-2024_BSA", "06-2024_OS", "06-2024_BSA", "07-2024_OS", "07-2024_BSA"),
-  names_to = c("month", "source"),
-  names_sep = "_",
-  values_to = "count"
-)
-# Changing format of date to use label_date_short to keep dates consistent for figures
-df_long$month <- as.Date(paste0("01-", df_long$month), format = "%d-%m-%Y")
+# Combining rows from OS and BSA validation dataframes
+df_validation_condition_counts <- bind_rows(df_opensafely_validation, df_bsa_consultation_validation)
 
 # Line graph comparing clinical condition counts of BSA and OS data
-validation_total_counts_figure <- ggplot(df_long, aes(x = month, y = count, color = consultation_type, group = consultation_type)) +
-  geom_point() +
+plot_validation_condition_count <- df_validation_condition_counts %>%
+  filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>%
+  mutate(source = factor(source,
+    levels = c("opensafely", "nhs_bsa"),
+    labels = c("OpenSAFELY-TPP", "NHS BSA (40%)")
+  )) %>%
+  ggplot(
+    aes(
+      x = date,
+      y = count,
+      shape = consultation_type,
+      color = consultation_type,
+      fill = consultation_type,
+      group = consultation_type
+    )
+  ) +
+  geom_point(size = 2.5) +
   geom_line(size = 0.5) +
   facet_wrap(~source, scales = "free_y") +
-  labs(
-    title = "Clinical Conditions Count by Month (NHS BSA vs OpenSAFELY Data)",
-    x = "Month", y = "Count", color = "Clinical Condition"
+  scale_x_date(
+    labels = scales::label_date_short()
+  ) +
+  labs(x = NULL, y = "Count", colour = NULL, shape = NULL, fill = NULL) +
+  scale_color_viridis_d(
+    option = "plasma",
+    end = 0.9
+  ) +
+  scale_fill_viridis_d(
+    option = "plasma",
+    end = 0.9
+  ) +
+  scale_shape_manual(
+    values = c(
+      "Acute Sinusitis" = 15,
+      "Infected Insect Bite" = 19,
+      "UTI" = 4,
+      "Acute Otitis Media" = 23,
+      "Acute Pharyngitis" = 3,
+      "Herpes Zoster" = 17,
+      "Impetigo" = 8
+    )
   ) +
   theme(
-    plot.title = element_text(hjust = 0.5)
+    text = element_text(size = 14)
   ) +
-  scale_x_date(
-    labels = scales::label_date_short()
-  )
-
-validation_total_counts_figure
-```
-
-```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
-df_consultation_totals <- df_long %>%
-  group_by(month, source) %>% 
-  summarise(count = sum(count),
-  .groups = 'drop')
-
-# Line graph comparing clinical condition counts of BSA and OS data
-validation_total_consultation_counts_figure <- ggplot(df_consultation_totals, aes(x = month, y = count)) +
-  geom_point() +
+  geom_vline(
+    xintercept = lubridate::as_date("2024-02-01"),
+    linetype = "dotted",
+    colour = "orange",
+    linewidth = .7
+  ) +
+  scale_y_continuous(labels = scales::number)
+
+# Another plot visualising the percentage
+plot_validation_condition_pct <- df_validation_condition_counts %>%
+  filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>%
+  pivot_wider(names_from = c(source, count_method), values_from = count) %>%
+  mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>%
+  ggplot(
+    aes(
+      x = date,
+      y = opensafely_opensafely_tpp / nhs_bsa_count_40pct,
+      shape = consultation_type,
+      color = consultation_type,
+      fill = consultation_type,
+      group = consultation_type
+    )
+  ) +
+  geom_point(size = 2.5) +
   geom_line(size = 0.5) +
   facet_wrap(~source, scales = "free_y") +
-  labs(
-    title = "Consultations by Month (NHS BSA vs OpenSAFELY Data)",
-    x = "Month", y = "Count"
+  scale_x_date(
+    labels = scales::label_date_short()
+  ) +
+  labs(x = NULL, y = "Percent", colour = NULL, shape = NULL, fill = NULL) +
+  scale_color_viridis_d(
+    option = "plasma",
+    end = 0.9
+  ) +
+  scale_fill_viridis_d(
+    option = "plasma",
+    end = 0.9
+  ) +
+  scale_shape_manual(
+    values = c(
+      "Acute Sinusitis" = 15,
+      "Infected Insect Bite" = 19,
+      "UTI" = 4,
+      "Acute Otitis Media" = 23,
+      "Acute Pharyngitis" = 3,
+      "Herpes Zoster" = 17,
+      "Impetigo" = 8
+    )
   ) +
   theme(
-    plot.title = element_text(hjust = 0.5),
-    axis.title.x = element_blank()
+    text = element_text(size = 14)
   ) +
-  scale_x_date(
-    labels = scales::label_date_short()
-  )
+  geom_vline(
+    xintercept = lubridate::as_date("2024-02-01"),
+    linetype = "dotted",
+    colour = "orange",
+    linewidth = .7
+  ) +
+  scale_y_continuous(labels = scales::percent)
 
-validation_total_consultation_counts_figure
+# Combining the plots with patchwork
+plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_validation_condition_pct) +
+  plot_annotation(tag_levels = "A") +
+  plot_layout(guides = "collect", widths = c(2, 1)) &
+  theme(
+    legend.position = "bottom",
+    text = element_text(size = 15),
+    strip.background = element_rect(size = 0),
+    strip.text.x = element_text(size = 13, face = "bold")
+  )
 
+ggsave(
+  here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"),
+  plot_validation_condition_count_pct,
+  width = 15, height = 6
+)
 ```
 
 ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
 # Line graph comparing clinical condition counts of BSA and OS data
-df_descriptive_stats <- df_descriptive_stats %>%
+plot_pf_descriptive_stats <- df_descriptive_stats %>%
   mutate(
-    measure = recode(measure,
-      "pf_with_pfmed" = "PF Med",
-      "pf_with_pfcondition" = "PF Condition",
-      "pf_with_pfmed_and_pfcondition" = "PF Med & PF Condition",
+    measure = factor(measure,
+      levels = c("pf_with_pfmed", "pf_with_pfcondition", "pf_with_pfmed_and_pfcondition"),
+      labels = c("PF Med", "PF Condition", "PF Med & PF Condition")
     )
-  )
-
-descriptive_stats_figure <- ggplot(df_descriptive_stats, aes(x = interval_start, y = ratio, color = measure, group = measure)) +
-  geom_point() +
+  ) |>
+  ggplot(aes(
+    x = interval_start,
+    y = ratio,
+    colour = measure,
+    shape = measure,
+  )) +
+  geom_point(size = 2.5) +
   geom_line(size = 0.5) +
-  # facet_wrap(~ measure, scales = "free_y") +
   labs(
-    title = "Breakdown of PF consultations with linked PF conditions and medications",
-    color = "PF consultation with:"
-  ) +
-  theme(
-    plot.title = element_text(hjust = 0.5)
+    x = NULL,
+    y = NULL,
+    shape = "PF consultation with:",
+    colour = "PF consultation with:"
   ) +
   scale_x_date(
-    labels = scales::label_date_short()
+    labels = scales::label_date_short(), breaks = "month"
   ) +
   scale_y_continuous(
     labels = scales::percent,
   ) +
   theme(
-    axis.title.x = element_blank(),
-    axis.title.y = element_blank()
-  )
+    text = element_text(size = 14)
+  ) +
+  geom_vline(
+    xintercept = lubridate::as_date("2024-02-01"),
+    linetype = "dotted",
+    colour = "orange",
+    linewidth = .7
+  ) +
+  scale_colour_brewer(palette = "Dark2")
 
-descriptive_stats_figure
+
+plot_pf_descriptive_stats
+
+ggsave(
+  here("released_output", "results", "figures", "plot_pf_descriptive_stats.png"),
+  plot_pf_descriptive_stats,
+  width = 10, height = 6
+)
 ```
 
 ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
-
 # Validation of pharmacy first medication counts figure
 # OS data - waiting on released output
 
 df_bsa_total_meds <- df_bsa_meds %>%
   group_by(date) %>%
-    summarise(total_meds = sum(count) * 0.4,
-    .groups = 'drop') %>% 
-    mutate(source = "BSA") %>%
-    filter(date <= "2024-07-01")
+  summarise(
+    total_meds = sum(count) * 0.4,
+    .groups = "drop"
+  ) %>%
+  mutate(source = "BSA") %>%
+  filter(date <= "2024-07-01")
 
 df_pfmed_total <- df_pfmed %>%
   rename(date = interval_start) %>%
-  group_by(date) %>% 
-  summarise(total_meds = sum(numerator),
-  .groups = 'drop') %>%
+  group_by(date) %>%
+  summarise(
+    total_meds = sum(numerator),
+    .groups = "drop"
+  ) %>%
   mutate(source = "OS") %>%
   filter(date >= "2024-02-01")
 
@@ -1016,12 +886,14 @@ fig_validation_med_counts <- ggplot(df_validation_med_counts, aes(x = date, y =
 fig_validation_med_counts
 ```
 ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
-# GP vs PF provider graph 
+# GP vs PF provider graph
 
-df_condition_provider_grouped <- df_condition_provider %>% 
-  group_by(measure, interval_start, pf_status) %>% 
-  summarise(count = sum(numerator),
-  .groups = 'drop') %>%
+df_condition_provider_grouped <- df_condition_provider %>%
+  group_by(measure, interval_start, pf_status) %>%
+  summarise(
+    count = sum(numerator),
+    .groups = "drop"
+  ) %>%
   mutate(
     measure = recode(measure,
       "count_acute_sinusitis_total" = "Acute Sinusitis",
@@ -1056,7 +928,6 @@ figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start,
   )
 
 figure_gp_vs_pf
-
 ```
 
 # References

From 84a498a1019423e1d325431831615a7546254143 Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Mon, 2 Dec 2024 13:24:17 +0000
Subject: [PATCH 4/7] Update meds validation plot

---
 reports/pharmacy_first_report.Rmd | 131 ++++++++++++++++++++++++------
 1 file changed, 105 insertions(+), 26 deletions(-)

diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd
index f53a0a0..75e36ba 100644
--- a/reports/pharmacy_first_report.Rmd
+++ b/reports/pharmacy_first_report.Rmd
@@ -785,6 +785,8 @@ plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_v
     strip.text.x = element_text(size = 13, face = "bold")
   )
 
+plot_validation_condition_count_pct
+
 ggsave(
   here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"),
   plot_validation_condition_count_pct,
@@ -842,48 +844,125 @@ ggsave(
 )
 ```
 
-```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
+```{r, message=FALSE, warning=FALSE, echo = FALSE}
 # Validation of pharmacy first medication counts figure
 # OS data - waiting on released output
 
-df_bsa_total_meds <- df_bsa_meds %>%
+df_bsa_medication_validation_sum <- df_bsa_medication_validation %>%
   group_by(date) %>%
-  summarise(
-    total_meds = sum(count) * 0.4,
-    .groups = "drop"
-  ) %>%
-  mutate(source = "BSA") %>%
-  filter(date <= "2024-07-01")
-
-df_pfmed_total <- df_pfmed %>%
+  summarise(count = sum(count) * 0.4) %>%
+  mutate(
+    source = "nhs_bsa",
+    count_method = "count_40pct"
+  )
+range(df_pfmed$interval_start)
+df_opensafely_pfmed_sum <- df_pfmed %>%
   rename(date = interval_start) %>%
   group_by(date) %>%
-  summarise(
-    total_meds = sum(numerator),
-    .groups = "drop"
-  ) %>%
-  mutate(source = "OS") %>%
-  filter(date >= "2024-02-01")
+  summarise(count = sum(numerator)) %>%
+  mutate(
+    source = "opensafely_tpp",
+    count_method = "opensafely_tpp"
+  )
 
-df_validation_med_counts <- bind_rows(df_pfmed_total, df_bsa_total_meds)
+df_validation_med_counts <- bind_rows(df_opensafely_pfmed_sum, df_bsa_medication_validation_sum) |>
+  filter(date >= "2024-01-01" & date <= "2024-07-01")
 
-fig_validation_med_counts <- ggplot(df_validation_med_counts, aes(x = date, y = total_meds)) +
-  geom_point() +
+plot_validation_med_count <- df_validation_med_counts |>
+  mutate(
+    source = factor(source, levels = c("opensafely_tpp", "nhs_bsa"), labels = c("OpenSAFELY-TPP", "NHS BSA")),
+    count_method = factor(count_method, levels = c("opensafely_tpp", "count_40pct"), labels = c("OpenSAFELY-TPP", "NHS BSA (40%)"))
+  ) |>
+  ggplot(aes(
+    x = date,
+    y = count,
+    colour = count_method,
+    shape = count_method)) +
+  geom_point(size = 2) +
+  facet_wrap(~count_method, scales = "free_y") +
   geom_line(size = 0.5) +
-  facet_wrap(~source, scales = "free_y") +
   labs(
-    title = "Pharmacy First Medications by Month (NHS BSA vs OpenSAFELY Data)",
-    x = "Month", y = "Count"
+    x = NULL,
+    y = "Count",
+    colour = NULL,
+    shape = NULL,
+  ) +
+  scale_x_date(
+    labels = scales::label_date_short(), breaks = "month"
   ) +
   theme(
-    plot.title = element_text(hjust = 0.5),
-    axis.title.x = element_blank()
+    text = element_text(size = 14)
+  ) +
+  geom_vline(
+    xintercept = lubridate::as_date("2024-02-01"),
+    linetype = "dotted",
+    colour = "orange",
+    linewidth = .7
+  ) +
+  scale_colour_brewer(palette = "Dark2")
+
+# Another plot visualising the percentage
+plot_validation_med_pct <- df_validation_med_counts %>%
+  filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>%
+  pivot_wider(names_from = c(source, count_method), values_from = count) %>%
+  mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>%
+  ggplot(
+    aes(
+      x = date,
+      y = opensafely_tpp_opensafely_tpp / nhs_bsa_count_40pct,
+      shape = source,
+      color = source,
+      fill = source,
+      group = source
+    )
   ) +
+  geom_point(size = 2.5) +
+  geom_line(size = 0.5) +
+  facet_wrap(~source, scales = "free_y") +
   scale_x_date(
-    labels = scales::label_date_short()
+    labels = scales::label_date_short(), breaks = "month"
+  ) +
+  labs(x = NULL, y = "Percent", colour = NULL, shape = NULL, fill = NULL) +
+  scale_color_viridis_d(
+    option = "plasma",
+    end = 0.9
+  ) +
+  scale_fill_viridis_d(
+    option = "plasma",
+    end = 0.9
+  ) +
+  scale_shape_manual(
+    values = c("Percentage of NHS BSA (40%) in OpenSAFELY" = 15)
+  ) +
+  theme(
+    text = element_text(size = 14)
+  ) +
+  geom_vline(
+    xintercept = lubridate::as_date("2024-02-01"),
+    linetype = "dotted",
+    colour = "orange",
+    linewidth = .7
+  ) +
+  scale_y_continuous(labels = scales::percent)
+
+# Combining the plots with patchwork
+plot_validation_medication_count_pct <- (plot_validation_med_count + plot_validation_med_pct) +
+  plot_annotation(tag_levels = "A") +
+  plot_layout(guides = "collect", widths = c(2, 1)) &
+  theme(
+    legend.position = "bottom",
+    text = element_text(size = 15),
+    strip.background = element_rect(size = 0),
+    strip.text.x = element_text(size = 13, face = "bold")
   )
 
-fig_validation_med_counts
+plot_validation_medication_count_pct
+
+ggsave(
+  here("released_output", "results", "figures", "plot_validation_medication_count_pct.png"),
+  plot_validation_medication_count_pct,
+  width = 15, height = 6
+)
 ```
 ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
 # GP vs PF provider graph

From da8ee79dc411913173fa7b9e4a79e2c644aad9a0 Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Mon, 2 Dec 2024 16:14:35 +0000
Subject: [PATCH 5/7] Improve consultation and condition provider plots

---
 reports/pharmacy_first_report.Rmd | 208 +++++++++++++++++++++++++-----
 1 file changed, 174 insertions(+), 34 deletions(-)

diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd
index 75e36ba..dd801de 100644
--- a/reports/pharmacy_first_report.Rmd
+++ b/reports/pharmacy_first_report.Rmd
@@ -227,21 +227,121 @@ To ensure comprehensive ethnicity data, we supplemented missing ethnicity values
 # Select measures and breakdown
 df_measures_selected <- df_measures %>%
   filter(measure_desc == "clinical_service") %>%
-  filter(is.na(group_by))
+  filter(is.na(group_by)) |>
+  select(measure, interval_start, numerator) |>
+  mutate(measure = factor(measure,
+    levels = c("Consultation Service", "Pharmacy First Consultation"),
+    labels = c(
+      "Consultation Service for minor illness (1577041000000109)",
+      "Pharmacy First service (983341000000102)"
+    )
+  ))
 
-# Create visualisation
-plot_measures(
-  df_measures_selected,
-  select_value = numerator,
-  select_interval_date = interval_end,
-  colour_var = NULL,
-  guide_nrow = 1,
-  facet_wrap = TRUE,
-  facet_var = measure,
-  title = "Pharmacy First Consultations",
-  y_label = "Number of codes for FP consultations",
+
+
+
+plot_pf_individual_consultations_count <- df_measures_selected |>
+  select(measure, interval_start, numerator) |>
+  ggplot(aes(
+    x = interval_start,
+    y = numerator,
+    colour = measure,
+    shape = measure,
+  )) +
+  geom_point(size = 2) +
+  geom_line(alpha = .3) +
+  labs(
+    title = NULL,
+    x = NULL,
+    y = "Total count",
+    colour = NULL,
+    shape = NULL
+  ) +
+  scale_y_continuous(
+    labels = scales::label_number(),
+  ) +
+  theme(legend.position = "bottom") +
+  guides(
+    colour = guide_legend(ncol = 2),
+    shape = guide_legend(ncol = 2)
+  ) +
+  scale_x_date(
+    date_breaks = "1 month",
+    labels = scales::label_date_short()
+  ) +
+  geom_vline(
+    xintercept = lubridate::as_date(c(
+      "2024-01-31"
+    )),
+    linetype = "dotted",
+    colour = "orange",
+    size = .7
+  ) +
+  scale_colour_viridis_d(end = .75) +
+  theme(
+    text = element_text(size = 14)
+  )
+
+
+ggsave(
+  here("released_output", "results", "figures", "plot_pf_individual_consultations_count.png"),
+  plot_pf_individual_consultations_count,
+  width = 10, height = 6
 )
-```
+
+
+plot_pf_grouped_consultations_count <- df_measures_selected |>
+  group_by(interval_start) |>
+  mutate(
+    pf_consultation_total = sum(numerator, na.rm = TRUE),
+    data_desc = "Pharmacy First Consultation") |>
+    ggplot(aes(
+    x = interval_start,
+    y = pf_consultation_total,
+    colour = data_desc,
+    shape = data_desc,
+  )) +
+  geom_point(size = 2) +
+  geom_line(alpha = .3) +
+  labs(
+    title = NULL,
+    x = NULL,
+    y = "Total count",
+    colour = NULL,
+    shape = NULL
+  ) +
+  scale_y_continuous(
+    labels = scales::label_number(),
+  ) +
+  theme(legend.position = "bottom") +
+  guides(
+    colour = guide_legend(ncol = 2),
+    shape = guide_legend(ncol = 2)
+  ) +
+  scale_x_date(
+    date_breaks = "1 month",
+    labels = scales::label_date_short()
+  ) +
+  geom_vline(
+    xintercept = lubridate::as_date(c(
+      "2024-01-31"
+    )),
+    linetype = "dotted",
+    colour = "orange",
+    size = .7
+  ) +
+  scale_colour_viridis_d(end = .75) +
+  theme(
+    text = element_text(size = 14)
+  )
+
+
+ggsave(
+  here("released_output", "results", "figures", "plot_pf_consultation_count.png"),
+  plot_pf_consultation_count,
+  width = 10, height = 6
+)
+ ```
 
 ```{r, message=FALSE, warning=FALSE, fig.height=10, fig.width=8}
 # Select measures and breakdown
@@ -814,8 +914,8 @@ plot_pf_descriptive_stats <- df_descriptive_stats %>%
   labs(
     x = NULL,
     y = NULL,
-    shape = "PF consultation with:",
-    colour = "PF consultation with:"
+    shape = "PF consultation linked to:",
+    colour = "PF consultation linked to:"
   ) +
   scale_x_date(
     labels = scales::label_date_short(), breaks = "month"
@@ -877,7 +977,8 @@ plot_validation_med_count <- df_validation_med_counts |>
     x = date,
     y = count,
     colour = count_method,
-    shape = count_method)) +
+    shape = count_method
+  )) +
   geom_point(size = 2) +
   facet_wrap(~count_method, scales = "free_y") +
   geom_line(size = 0.5) +
@@ -970,32 +1071,49 @@ ggsave(
 df_condition_provider_grouped <- df_condition_provider %>%
   group_by(measure, interval_start, pf_status) %>%
   summarise(
-    count = sum(numerator),
-    .groups = "drop"
+    count = sum(numerator)
   ) %>%
   mutate(
-    measure = recode(measure,
-      "count_acute_sinusitis_total" = "Acute Sinusitis",
-      "count_infected_insect_bite_total" = "Infected Insect Bite",
-      "count_uncomplicated_urinary_tract_infection_total" = "Uncomplicated UTI",
-      "count_acute_otitis_media_total" = "Acute Otitis Media",
-      "count_acute_pharyngitis_total" = "Acute Pharyngitis",
-      "count_herpes_zoster_total" = "Herpes Zoster",
-      "count_impetigo_total" = "Impetigo",
+    measure = factor(measure,
+      levels = c(
+        "count_acute_sinusitis_total",
+        "count_infected_insect_bite_total",
+        "count_uncomplicated_urinary_tract_infection_total",
+        "count_acute_otitis_media_total",
+        "count_acute_pharyngitis_total",
+        "count_herpes_zoster_total",
+        "count_impetigo_total"
+      ),
+      labels = c(
+        "Acute Sinusitis",
+        "Infected Insect Bite",
+        "UTI",
+        "Acute Otitis Media",
+        "Acute Pharyngitis",
+        "Herpes Zoster",
+        "Impetigo"
+      )
     ),
-    pf_status = case_when(
-      pf_status == TRUE ~ "PF",
-      pf_status == FALSE ~ "GP"
+    pf_status = factor(pf_status,
+      levels = c(TRUE, FALSE),
+      labels = c("Linked to Pharmacy First consultation", "Not linked to Pharmacy First consultation")
     )
   )
 
-figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start, y = count, color = pf_status)) +
-  geom_point() +
+plot_pf_condition_provider_count <- ggplot(
+  df_condition_provider_grouped,
+  aes(
+    x = interval_start,
+    y = count,
+    colour = pf_status,
+    shape = pf_status
+  )
+) +
+  geom_point(size = 1.5) +
   geom_line(size = 0.5) +
   facet_wrap(~measure, scales = "free_y") +
   labs(
-    title = "Count of clinical events",
-    x = "Month", y = "Count", color = "Provider:"
+    x = NULL, y = "Count", color = NULL, shape = NULL
   ) +
   theme(
     plot.title = element_text(hjust = 0.5),
@@ -1004,9 +1122,31 @@ figure_gp_vs_pf <- ggplot(df_condition_provider_grouped, aes(x = interval_start,
   ) +
   scale_x_date(
     labels = scales::label_date_short()
+  ) +
+  geom_vline(
+    xintercept = lubridate::as_date("2024-02-01"),
+    linetype = "dotted",
+    colour = "orange",
+    linewidth = .7
+  ) +
+  scale_color_viridis_d(
+    option = "plasma",
+    end = 0.75
+  ) +
+  theme(
+    legend.position = "bottom",
+    text = element_text(size = 14),
+    strip.background = element_rect(size = 0),
+    # strip.text.x = element_text(size = 13, face = "bold")
   )
 
-figure_gp_vs_pf
+plot_pf_condition_provider_count
+
+ggsave(
+  here("released_output", "results", "figures", "plot_pf_condition_provider_count.png"),
+  plot_pf_condition_provider_count,
+  width = 13, height = 8
+)
 ```
 
 # References

From 2aee9aa72141f2e830151083533560212cb03695 Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Tue, 3 Dec 2024 10:57:51 +0000
Subject: [PATCH 6/7] Fix `.Rmd` and run `styler::style_dir()`

---
 lib/functions/load_opensafely_outputs.R |  2 +-
 lib/functions/plot_measures.R           |  2 +-
 reports/pharmacy_first_report.Rmd       | 14 ++++++--------
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/lib/functions/load_opensafely_outputs.R b/lib/functions/load_opensafely_outputs.R
index 46204bd..432ce74 100644
--- a/lib/functions/load_opensafely_outputs.R
+++ b/lib/functions/load_opensafely_outputs.R
@@ -85,4 +85,4 @@ df_measures <- df_measures %>%
     labels = c("Female", "Male")
   ))
 
-df_measures$age_band[is.na(df_measures$age_band)] <- "Missing"
\ No newline at end of file
+df_measures$age_band[is.na(df_measures$age_band)] <- "Missing"
diff --git a/lib/functions/plot_measures.R b/lib/functions/plot_measures.R
index 045ebcb..4171c0c 100644
--- a/lib/functions/plot_measures.R
+++ b/lib/functions/plot_measures.R
@@ -102,4 +102,4 @@ plot_measures <- function(
 gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey")
 region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey")
 ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey")
-sex_palette <- c("red", "blue")
\ No newline at end of file
+sex_palette <- c("red", "blue")
diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd
index dd801de..4645590 100644
--- a/reports/pharmacy_first_report.Rmd
+++ b/reports/pharmacy_first_report.Rmd
@@ -237,9 +237,6 @@ df_measures_selected <- df_measures %>%
     )
   ))
 
-
-
-
 plot_pf_individual_consultations_count <- df_measures_selected |>
   select(measure, interval_start, numerator) |>
   ggplot(aes(
@@ -294,8 +291,9 @@ plot_pf_grouped_consultations_count <- df_measures_selected |>
   group_by(interval_start) |>
   mutate(
     pf_consultation_total = sum(numerator, na.rm = TRUE),
-    data_desc = "Pharmacy First Consultation") |>
-    ggplot(aes(
+    data_desc = "Pharmacy First Consultation"
+  ) |>
+  ggplot(aes(
     x = interval_start,
     y = pf_consultation_total,
     colour = data_desc,
@@ -337,11 +335,11 @@ plot_pf_grouped_consultations_count <- df_measures_selected |>
 
 
 ggsave(
-  here("released_output", "results", "figures", "plot_pf_consultation_count.png"),
-  plot_pf_consultation_count,
+  here("released_output", "results", "figures", "plot_pf_grouped_consultations_count.png"),
+  plot_pf_grouped_consultations_count,
   width = 10, height = 6
 )
- ```
+```
 
 ```{r, message=FALSE, warning=FALSE, fig.height=10, fig.width=8}
 # Select measures and breakdown

From 0700e55367a8c7c3a8a28d7eb303d9b2acd1c5a3 Mon Sep 17 00:00:00 2001
From: Milan Wiedemann <milan.wiedemann@gmail.com>
Date: Tue, 3 Dec 2024 14:43:11 +0000
Subject: [PATCH 7/7] Change `plot_*` to `fig_*` prefix for figure objects and
 files

---
 reports/pharmacy_first_report.Rmd | 55 ++++++++++++++++---------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd
index 4645590..da3e951 100644
--- a/reports/pharmacy_first_report.Rmd
+++ b/reports/pharmacy_first_report.Rmd
@@ -223,7 +223,7 @@ To ensure comprehensive ethnicity data, we supplemented missing ethnicity values
 
 ### Total population
 
-```{r, message=FALSE, warning=FALSE, fig.height=3, fig.width=8}
+```{r, message=FALSE, warning=FALSE, fig.height=6, fig.width=10}
 # Select measures and breakdown
 df_measures_selected <- df_measures %>%
   filter(measure_desc == "clinical_service") %>%
@@ -237,7 +237,7 @@ df_measures_selected <- df_measures %>%
     )
   ))
 
-plot_pf_individual_consultations_count <- df_measures_selected |>
+fig_pf_individual_consultations_count <- df_measures_selected |>
   select(measure, interval_start, numerator) |>
   ggplot(aes(
     x = interval_start,
@@ -281,13 +281,14 @@ plot_pf_individual_consultations_count <- df_measures_selected |>
 
 
 ggsave(
-  here("released_output", "results", "figures", "plot_pf_individual_consultations_count.png"),
-  plot_pf_individual_consultations_count,
+  here("released_output", "results", "figures", "fig_pf_individual_consultations_count.png"),
+  fig_pf_individual_consultations_count,
   width = 10, height = 6
 )
 
+fig_pf_individual_consultations_count
 
-plot_pf_grouped_consultations_count <- df_measures_selected |>
+fig_pf_grouped_consultations_count <- df_measures_selected |>
   group_by(interval_start) |>
   mutate(
     pf_consultation_total = sum(numerator, na.rm = TRUE),
@@ -335,8 +336,8 @@ plot_pf_grouped_consultations_count <- df_measures_selected |>
 
 
 ggsave(
-  here("released_output", "results", "figures", "plot_pf_grouped_consultations_count.png"),
-  plot_pf_grouped_consultations_count,
+  here("released_output", "results", "figures", "fig_pf_grouped_consultations_count.png"),
+  fig_pf_grouped_consultations_count,
   width = 10, height = 6
 )
 ```
@@ -767,7 +768,7 @@ df_opensafely_validation <- df_measures %>%
 df_validation_condition_counts <- bind_rows(df_opensafely_validation, df_bsa_consultation_validation)
 
 # Line graph comparing clinical condition counts of BSA and OS data
-plot_validation_condition_count <- df_validation_condition_counts %>%
+fig_validation_condition_count <- df_validation_condition_counts %>%
   filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>%
   mutate(source = factor(source,
     levels = c("opensafely", "nhs_bsa"),
@@ -821,7 +822,7 @@ plot_validation_condition_count <- df_validation_condition_counts %>%
   scale_y_continuous(labels = scales::number)
 
 # Another plot visualising the percentage
-plot_validation_condition_pct <- df_validation_condition_counts %>%
+fig_validation_condition_pct <- df_validation_condition_counts %>%
   filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>%
   pivot_wider(names_from = c(source, count_method), values_from = count) %>%
   mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>%
@@ -873,7 +874,7 @@ plot_validation_condition_pct <- df_validation_condition_counts %>%
   scale_y_continuous(labels = scales::percent)
 
 # Combining the plots with patchwork
-plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_validation_condition_pct) +
+fig_validation_condition_count_pct <- (fig_validation_condition_count + fig_validation_condition_pct) +
   plot_annotation(tag_levels = "A") +
   plot_layout(guides = "collect", widths = c(2, 1)) &
   theme(
@@ -883,18 +884,18 @@ plot_validation_condition_count_pct <- (plot_validation_condition_count + plot_v
     strip.text.x = element_text(size = 13, face = "bold")
   )
 
-plot_validation_condition_count_pct
+fig_validation_condition_count_pct
 
 ggsave(
-  here("released_output", "results", "figures", "plot_validation_condition_count_pct.png"),
-  plot_validation_condition_count_pct,
+  here("released_output", "results", "figures", "fig_validation_condition_count_pct.png"),
+  fig_validation_condition_count_pct,
   width = 15, height = 6
 )
 ```
 
 ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8}
 # Line graph comparing clinical condition counts of BSA and OS data
-plot_pf_descriptive_stats <- df_descriptive_stats %>%
+fig_pf_descriptive_stats <- df_descriptive_stats %>%
   mutate(
     measure = factor(measure,
       levels = c("pf_with_pfmed", "pf_with_pfcondition", "pf_with_pfmed_and_pfcondition"),
@@ -933,11 +934,11 @@ plot_pf_descriptive_stats <- df_descriptive_stats %>%
   scale_colour_brewer(palette = "Dark2")
 
 
-plot_pf_descriptive_stats
+fig_pf_descriptive_stats
 
 ggsave(
-  here("released_output", "results", "figures", "plot_pf_descriptive_stats.png"),
-  plot_pf_descriptive_stats,
+  here("released_output", "results", "figures", "fig_pf_descriptive_stats.png"),
+  fig_pf_descriptive_stats,
   width = 10, height = 6
 )
 ```
@@ -966,7 +967,7 @@ df_opensafely_pfmed_sum <- df_pfmed %>%
 df_validation_med_counts <- bind_rows(df_opensafely_pfmed_sum, df_bsa_medication_validation_sum) |>
   filter(date >= "2024-01-01" & date <= "2024-07-01")
 
-plot_validation_med_count <- df_validation_med_counts |>
+fig_validation_med_count <- df_validation_med_counts |>
   mutate(
     source = factor(source, levels = c("opensafely_tpp", "nhs_bsa"), labels = c("OpenSAFELY-TPP", "NHS BSA")),
     count_method = factor(count_method, levels = c("opensafely_tpp", "count_40pct"), labels = c("OpenSAFELY-TPP", "NHS BSA (40%)"))
@@ -1001,7 +1002,7 @@ plot_validation_med_count <- df_validation_med_counts |>
   scale_colour_brewer(palette = "Dark2")
 
 # Another plot visualising the percentage
-plot_validation_med_pct <- df_validation_med_counts %>%
+fig_validation_med_pct <- df_validation_med_counts %>%
   filter(count_method %in% c("opensafely_tpp", "count_40pct")) %>%
   pivot_wider(names_from = c(source, count_method), values_from = count) %>%
   mutate(source = "Percentage of NHS BSA (40%) in OpenSAFELY") %>%
@@ -1045,7 +1046,7 @@ plot_validation_med_pct <- df_validation_med_counts %>%
   scale_y_continuous(labels = scales::percent)
 
 # Combining the plots with patchwork
-plot_validation_medication_count_pct <- (plot_validation_med_count + plot_validation_med_pct) +
+fig_validation_medication_count_pct <- (fig_validation_med_count + fig_validation_med_pct) +
   plot_annotation(tag_levels = "A") +
   plot_layout(guides = "collect", widths = c(2, 1)) &
   theme(
@@ -1055,11 +1056,11 @@ plot_validation_medication_count_pct <- (plot_validation_med_count + plot_valida
     strip.text.x = element_text(size = 13, face = "bold")
   )
 
-plot_validation_medication_count_pct
+fig_validation_medication_count_pct
 
 ggsave(
-  here("released_output", "results", "figures", "plot_validation_medication_count_pct.png"),
-  plot_validation_medication_count_pct,
+  here("released_output", "results", "figures", "fig_validation_medication_count_pct.png"),
+  fig_validation_medication_count_pct,
   width = 15, height = 6
 )
 ```
@@ -1098,7 +1099,7 @@ df_condition_provider_grouped <- df_condition_provider %>%
     )
   )
 
-plot_pf_condition_provider_count <- ggplot(
+fig_pf_condition_provider_count <- ggplot(
   df_condition_provider_grouped,
   aes(
     x = interval_start,
@@ -1138,11 +1139,11 @@ plot_pf_condition_provider_count <- ggplot(
     # strip.text.x = element_text(size = 13, face = "bold")
   )
 
-plot_pf_condition_provider_count
+fig_pf_condition_provider_count
 
 ggsave(
-  here("released_output", "results", "figures", "plot_pf_condition_provider_count.png"),
-  plot_pf_condition_provider_count,
+  here("released_output", "results", "figures", "fig_pf_condition_provider_count.png"),
+  fig_pf_condition_provider_count,
   width = 13, height = 8
 )
 ```