Skip to content

Commit

Permalink
Add validation data for consultations
Browse files Browse the repository at this point in the history
  • Loading branch information
milanwiedemann committed Nov 19, 2024
1 parent f88c7ea commit 48c5463
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 0 deletions.
133 changes: 133 additions & 0 deletions lib/functions/function_get_validation_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
library(httr)
library(here)
library(rvest)
library(dplyr)
library(lubridate)
library(tidyverse)
library(readr)

#' Extract dates from dispensing data URL
#'
#' @param urls List of URLs
#' @return String, of date in YYYY-MM-DD format if date can be found, otherwise NA.
extract_dates <- function(urls) {
urls %>%
map(~ {
match <- str_match(.x, ".*Dispensing%20Data%20(\\w+)%20(\\d{2})")[, 2:3]

if (all(!is.na(match))) {
month <- match[1]
year <- paste0("20", match[2])

parsed_date <- parse_date_time(
paste("1", month, year),
orders = "dmy",
quiet = TRUE
)
if (!is.na(parsed_date)) {
return(format(parsed_date, "%Y-%m-%d"))
}
}
return(NA)
})
}

get_dispensing_urls <- function(start_date = NULL, end_date = NULL) {
# This is the URL where the data is linked from
url <- "https://www.nhsbsa.nhs.uk/prescription-data/dispensing-data/dispensing-contractors-data"

# The URL to the data we are interested in follows this URL structure
# https://www.nhsbsa.nhs.uk/sites/default/files/2024-05/Dispensing%20Data%20Jan%2024%20-%20CSV.csv
base_url <- "https://www.nhsbsa.nhs.uk"

response <- GET(url)
html_content <- content(response, "text", encoding = "UTF-8")

csv_links <- read_html(html_content) %>%
html_nodes("a") %>%
html_attr("href") %>%
.[grepl("Dispensing%20Data.*\\.csv", .)]

df <- tibble(
date = as.Date(extract_dates(csv_links) |> unlist()),
url = paste0(base_url, csv_links)
)

if (is.null(start_date)) {
start_date <- as.Date(min(df$date))
}

if (is.null(end_date)) {
end_date <- as.Date(max(df$date))
}

df <- df |>
filter(between(date, as.Date(start_date), as.Date(end_date)))

setNames(as.list(df$url), as.character(df$date))
}

get_dispensing_data <- function(start_date = NULL, end_date = NULL) {
dispensing_urls <- get_dispensing_urls(start_date = start_date, end_date = end_date)

icb_var_list <- c(
"ICBCode",
"ICB"
)

pf_var_list <- c(
"NumberofPharmacyFirstClinicalPathwaysConsultations-AcuteOtitisMedia",
"NumberofPharmacyFirstClinicalPathwaysConsultations-AcuteSoreThroat",
"NumberofPharmacyFirstClinicalPathwaysConsultations-Impetigo",
"NumberofPharmacyFirstClinicalPathwaysConsultations-InfectedInsectBites",
"NumberofPharmacyFirstClinicalPathwaysConsultations-Shingles",
"NumberofPharmacyFirstClinicalPathwaysConsultations-Sinusitis",
"NumberofPharmacyFirstClinicalPathwaysConsultations-UncomplicatedUTI",
"NumberofPharmacyFirstUrgentMedicineSupplyConsultations",
"NumberofPharmacyFirstMinorIllnessReferralConsultations"
)

df <- dispensing_urls |>
map(read_csv,
name_repair = janitor::make_clean_names
) |>
bind_rows(.id = "date") |>
select(date, all_of(janitor::make_clean_names(c(icb_var_list, pf_var_list))))

df |>
rename_with(~ str_replace(., "^numberof_pharmacy_first", "n_pf")) |>
rename_with(~ str_replace(., "clinical_pathways_consultations", "consultation"))
}

# Calculate summary of counts
df_dispensing_data <- get_dispensing_data(start_date = "2024-02-01")

df_dispensing_data_summary <- df_dispensing_data |>
group_by(date) |>
summarise(
n_pf_consultation_acute_otitis_media = sum(n_pf_consultation_acute_otitis_media, na.rm = TRUE),
n_pf_consultation_acute_sore_throat = sum(n_pf_consultation_acute_sore_throat, na.rm = TRUE),
n_pf_consultation_impetigo = sum(n_pf_consultation_impetigo, na.rm = TRUE),
n_pf_consultation_infected_insect_bites = sum(n_pf_consultation_infected_insect_bites, na.rm = TRUE),
n_pf_consultation_shingles = sum(n_pf_consultation_shingles, na.rm = TRUE),
n_pf_consultation_sinusitis = sum(n_pf_consultation_sinusitis, na.rm = TRUE),
n_pf_consultation_uncomplicated_uti = sum(n_pf_consultation_uncomplicated_uti, na.rm = TRUE),
# n_pf_urgent_medicine_supply_consultations = sum(n_pf_urgent_medicine_supply_consultations, na.rm = TRUE),
# n_pf_minor_illness_referral_consultations = sum(n_pf_minor_illness_referral_consultations, na.rm = TRUE)
) |>
pivot_longer(cols = c(
n_pf_consultation_acute_otitis_media,
n_pf_consultation_acute_sore_throat,
n_pf_consultation_impetigo,
n_pf_consultation_infected_insect_bites,
n_pf_consultation_shingles,
n_pf_consultation_sinusitis,
n_pf_consultation_uncomplicated_uti,
# n_pf_urgent_medicine_supply_consultations,
# n_pf_minor_illness_referral_consultations
),
names_to = "consultation_type",
values_to = "count") |>
mutate(consultation_type = str_replace(consultation_type, "^n_pf_consultation_", ""))

write_csv(df_dispensing_data_summary, here("lib", "validation", "data", "pf_consultation_validation_data.csv"))
43 changes: 43 additions & 0 deletions lib/validation/data/pf_consultation_validation_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
date,consultation_type,count
2024-02-01,acute_otitis_media,18893
2024-02-01,acute_sore_throat,41014
2024-02-01,impetigo,6816
2024-02-01,infected_insect_bites,3412
2024-02-01,shingles,4078
2024-02-01,sinusitis,18384
2024-02-01,uncomplicated_uti,32678
2024-03-01,acute_otitis_media,22888
2024-03-01,acute_sore_throat,54756
2024-03-01,impetigo,6740
2024-03-01,infected_insect_bites,4180
2024-03-01,shingles,4069
2024-03-01,sinusitis,17220
2024-03-01,uncomplicated_uti,34536
2024-04-01,acute_otitis_media,19295
2024-04-01,acute_sore_throat,56925
2024-04-01,impetigo,7191
2024-04-01,infected_insect_bites,6099
2024-04-01,shingles,4470
2024-04-01,sinusitis,17715
2024-04-01,uncomplicated_uti,41951
2024-05-01,acute_otitis_media,20240
2024-05-01,acute_sore_throat,58022
2024-05-01,impetigo,6235
2024-05-01,infected_insect_bites,19010
2024-05-01,shingles,4691
2024-05-01,sinusitis,15909
2024-05-01,uncomplicated_uti,44340
2024-06-01,acute_otitis_media,15826
2024-06-01,acute_sore_throat,52288
2024-06-01,impetigo,6362
2024-06-01,infected_insect_bites,27294
2024-06-01,shingles,4626
2024-06-01,sinusitis,15052
2024-06-01,uncomplicated_uti,43289
2024-07-01,acute_otitis_media,17682
2024-07-01,acute_sore_throat,54859
2024-07-01,impetigo,6740
2024-07-01,infected_insect_bites,36370
2024-07-01,shingles,5103
2024-07-01,sinusitis,14115
2024-07-01,uncomplicated_uti,51321

0 comments on commit 48c5463

Please sign in to comment.