From 58a487f053ea6041f9ec7d3fd73405a68e0ba616 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Sun, 24 Nov 2024 09:32:52 +0000 Subject: [PATCH 1/4] Add function and data to validate PF medication data --- ... => get_pf_consultation_validation_data.R} | 28 +++-- .../get_pf_medication_validation_data.R | 117 ++++++++++++++++++ ...nction_plot_measures.R => plot_measures.R} | 0 ...nction_tidy_measures.R => tidy_measures.R} | 0 .../data/pf_medication_validation_data.csv | 78 ++++++++++++ reports/pharmacy_first_report.Rmd | 86 +++++++------ 6 files changed, 257 insertions(+), 52 deletions(-) rename lib/functions/{function_get_validation_data.R => get_pf_consultation_validation_data.R} (89%) create mode 100644 lib/functions/get_pf_medication_validation_data.R rename lib/functions/{function_plot_measures.R => plot_measures.R} (100%) rename lib/functions/{function_tidy_measures.R => tidy_measures.R} (100%) create mode 100644 lib/validation/data/pf_medication_validation_data.csv diff --git a/lib/functions/function_get_validation_data.R b/lib/functions/get_pf_consultation_validation_data.R similarity index 89% rename from lib/functions/function_get_validation_data.R rename to lib/functions/get_pf_consultation_validation_data.R index dc4af6a..6d64c7a 100644 --- a/lib/functions/function_get_validation_data.R +++ b/lib/functions/get_pf_consultation_validation_data.R @@ -115,19 +115,21 @@ df_dispensing_data_summary <- df_dispensing_data |> # n_pf_urgent_medicine_supply_consultations = sum(n_pf_urgent_medicine_supply_consultations, na.rm = TRUE), # n_pf_minor_illness_referral_consultations = sum(n_pf_minor_illness_referral_consultations, na.rm = TRUE) ) |> - pivot_longer(cols = c( - n_pf_consultation_acute_otitis_media, - n_pf_consultation_acute_sore_throat, - n_pf_consultation_impetigo, - n_pf_consultation_infected_insect_bites, - n_pf_consultation_shingles, - n_pf_consultation_sinusitis, - n_pf_consultation_uncomplicated_uti, - # n_pf_urgent_medicine_supply_consultations, - # n_pf_minor_illness_referral_consultations - ), - names_to = "consultation_type", - values_to = "count") |> + pivot_longer( + cols = c( + n_pf_consultation_acute_otitis_media, + n_pf_consultation_acute_sore_throat, + n_pf_consultation_impetigo, + n_pf_consultation_infected_insect_bites, + n_pf_consultation_shingles, + n_pf_consultation_sinusitis, + n_pf_consultation_uncomplicated_uti, + # n_pf_urgent_medicine_supply_consultations, + # n_pf_minor_illness_referral_consultations + ), + names_to = "consultation_type", + values_to = "count" + ) |> mutate(consultation_type = str_replace(consultation_type, "^n_pf_consultation_", "")) write_csv(df_dispensing_data_summary, here("lib", "validation", "data", "pf_consultation_validation_data.csv")) diff --git a/lib/functions/get_pf_medication_validation_data.R b/lib/functions/get_pf_medication_validation_data.R new file mode 100644 index 0000000..e0f7bf4 --- /dev/null +++ b/lib/functions/get_pf_medication_validation_data.R @@ -0,0 +1,117 @@ +library("httr") +library("jsonlite") +library("crul") +library("here") +library("rvest") +library("dplyr") +library("lubridate") +library("tidyverse") +library("readr") + +base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" +package_list_method <- "package_list" +package_show_method <- "package_show?id=" +action_method <- "datastore_search_sql?" + +get_available_datasets <- function(remove_foi = TRUE) { + base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" + package_list_method <- "package_list" + + datasets_response <- fromJSON(paste0( + base_endpoint, + package_list_method + ))$result + + if (remove_foi) { + datasets_response <- datasets_response[!grepl("^foi", datasets_response)] + } + + datasets_response +} + +get_available_datasets() + +get_dataset_table_names <- function(dataset_id, start_date = NULL, end_date = NULL) { + available_datasets <- get_available_datasets(remove_foi = FALSE) + + if (!dataset_id %in% available_datasets) { + stop("The provided 'dataset_id' is not available. Run 'get_available_datasets()' to see all available datasets.", call. = FALSE) + } + + base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" + package_show_method <- "package_show?id=" + + metadata_response <- GET(paste0(base_endpoint, package_show_method, dataset_id)) + resources_table <- content(metadata_response)$result$resources + + dataset_tables <- tibble( + table_name = map_chr( + resources_table, + "bq_table_name", + .default = NA_character_ + ), + date = ym(str_extract(table_name, "\\d{6}")) + ) |> + relocate(date) + + if (is.null(start_date)) { + start_date <- as.Date(min(dataset_tables$date)) + } + + if (is.null(end_date)) { + end_date <- as.Date(max(dataset_tables$date)) + } + + dataset_tables <- dataset_tables |> + filter(between(date, as.Date(start_date), as.Date(end_date))) + + dataset_tables +} + +get_dataset_table_names("prescription-cost-analysis-pca-monthly-data", "2024-09-01") + +construct_sql_query <- function(table_name, sql_query) { + gsub("{FROM_TABLE}", sprintf("FROM `%s`", table_name), sql_query, fixed = TRUE) +} + +get_nhsbsa_data <- function(dataset_id, sql, start_date = NULL, end_date = NULL) { + base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" + action_method <- "datastore_search_sql?" + + table_names <- get_dataset_table_names(dataset_id, start_date, end_date)$table_name + + async_api_calls <- paste0( + base_endpoint, + action_method, + "resource_id=", table_names, + "&sql=", URLencode(map_chr(table_names, construct_sql_query, sql)) + ) + + responses <- crul::Async$new(urls = async_api_calls)$get() + + df_tmp <- bind_rows(map(responses, ~ as_tibble(jsonlite::fromJSON(.x$parse("UTF-8"))$result$result$records))) + + df_tmp |> + janitor::clean_names() |> + mutate(year_month = ym(year_month)) |> + select(date = year_month, everything()) +} + +sql <- (" + SELECT * + {FROM_TABLE} + WHERE PHARMACY_ADVANCED_SERVICE = 'Pharmacy First Clinical Pathways' + ") + +df_validate <- get_nhsbsa_data("prescription-cost-analysis-pca-monthly-data", sql, start_date = "2024-02-01") + +names(df_validate) +unique(df_validate$pharmacy_advanced_service) + +pf_medication_validation_data <- df_validate |> + select(date, snomed_code, pharmacy_advanced_service, bnf_section, bnf_paragraph, items) |> + group_by(date, pharmacy_advanced_service, bnf_paragraph) |> + summarise(count = sum(items, na.rm = TRUE)) |> + ungroup() + +write_csv(pf_medication_validation_data, here("lib", "validation", "data", "pf_medication_validation_data.csv")) diff --git a/lib/functions/function_plot_measures.R b/lib/functions/plot_measures.R similarity index 100% rename from lib/functions/function_plot_measures.R rename to lib/functions/plot_measures.R diff --git a/lib/functions/function_tidy_measures.R b/lib/functions/tidy_measures.R similarity index 100% rename from lib/functions/function_tidy_measures.R rename to lib/functions/tidy_measures.R diff --git a/lib/validation/data/pf_medication_validation_data.csv b/lib/validation/data/pf_medication_validation_data.csv new file mode 100644 index 0000000..4ff59ed --- /dev/null +++ b/lib/validation/data/pf_medication_validation_data.csv @@ -0,0 +1,78 @@ +date,pharmacy_advanced_service,bnf_paragraph,count +2024-02-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3113 +2024-02-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,7476 +2024-02-01,Pharmacy First Clinical Pathways,Herpesvirus infections,3336 +2024-02-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,13 +2024-02-01,Pharmacy First Clinical Pathways,Macrolides,4528 +2024-02-01,Pharmacy First Clinical Pathways,Otitis externa,3831 +2024-02-01,Pharmacy First Clinical Pathways,Penicillins,42008 +2024-02-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,2251 +2024-02-01,Pharmacy First Clinical Pathways,Tetracyclines,1002 +2024-02-01,Pharmacy First Clinical Pathways,Urinary-tract infections,26877 +2024-03-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3386 +2024-03-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,6588 +2024-03-01,Pharmacy First Clinical Pathways,Herpesvirus infections,3451 +2024-03-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,32 +2024-03-01,Pharmacy First Clinical Pathways,Macrolides,6201 +2024-03-01,Pharmacy First Clinical Pathways,Otitis externa,5325 +2024-03-01,Pharmacy First Clinical Pathways,Penicillins,56231 +2024-03-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1820 +2024-03-01,Pharmacy First Clinical Pathways,Tetracyclines,1028 +2024-03-01,Pharmacy First Clinical Pathways,Urinary-tract infections,29345 +2024-04-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3834 +2024-04-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,6598 +2024-04-01,Pharmacy First Clinical Pathways,Herpesvirus infections,3818 +2024-04-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,5 +2024-04-01,Pharmacy First Clinical Pathways,Macrolides,6530 +2024-04-01,Pharmacy First Clinical Pathways,Otitis externa,4375 +2024-04-01,Pharmacy First Clinical Pathways,Penicillins,58070 +2024-04-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1673 +2024-04-01,Pharmacy First Clinical Pathways,Tetracyclines,1036 +2024-04-01,Pharmacy First Clinical Pathways,Urinary-tract infections,35592 +2024-05-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3224 +2024-05-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,5789 +2024-05-01,Pharmacy First Clinical Pathways,Herpesvirus infections,4065 +2024-05-01,Pharmacy First Clinical Pathways,Macrolides,7864 +2024-05-01,Pharmacy First Clinical Pathways,Otitis externa,5457 +2024-05-01,Pharmacy First Clinical Pathways,Penicillins,68200 +2024-05-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1558 +2024-05-01,Pharmacy First Clinical Pathways,Tetracyclines,935 +2024-05-01,Pharmacy First Clinical Pathways,Urinary-tract infections,38165 +2024-06-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3262 +2024-06-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,5671 +2024-06-01,Pharmacy First Clinical Pathways,Herpesvirus infections,3979 +2024-06-01,Pharmacy First Clinical Pathways,Macrolides,7910 +2024-06-01,Pharmacy First Clinical Pathways,Otitis externa,4541 +2024-06-01,Pharmacy First Clinical Pathways,Penicillins,67691 +2024-06-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1625 +2024-06-01,Pharmacy First Clinical Pathways,Tetracyclines,813 +2024-06-01,Pharmacy First Clinical Pathways,Urinary-tract infections,36995 +2024-07-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3327 +2024-07-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,5022 +2024-07-01,Pharmacy First Clinical Pathways,Herpesvirus infections,4332 +2024-07-01,Pharmacy First Clinical Pathways,Macrolides,9023 +2024-07-01,Pharmacy First Clinical Pathways,Otitis externa,4950 +2024-07-01,Pharmacy First Clinical Pathways,Penicillins,76597 +2024-07-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1664 +2024-07-01,Pharmacy First Clinical Pathways,Tetracyclines,819 +2024-07-01,Pharmacy First Clinical Pathways,Urinary-tract infections,43848 +2024-08-01,Pharmacy First Clinical Pathways,Antibacterial preparations,3628 +2024-08-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,3996 +2024-08-01,Pharmacy First Clinical Pathways,Herpesvirus infections,4372 +2024-08-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,9 +2024-08-01,Pharmacy First Clinical Pathways,Macrolides,8538 +2024-08-01,Pharmacy First Clinical Pathways,Otitis externa,5554 +2024-08-01,Pharmacy First Clinical Pathways,Penicillins,70102 +2024-08-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1761 +2024-08-01,Pharmacy First Clinical Pathways,Tetracyclines,572 +2024-08-01,Pharmacy First Clinical Pathways,Urinary-tract infections,45658 +2024-09-01,Pharmacy First Clinical Pathways,Antibacterial preparations,4165 +2024-09-01,Pharmacy First Clinical Pathways,Drugs used in nasal allergy,5464 +2024-09-01,Pharmacy First Clinical Pathways,Herpesvirus infections,4042 +2024-09-01,Pharmacy First Clinical Pathways,Individually formulated preparations bought in,11 +2024-09-01,Pharmacy First Clinical Pathways,Macrolides,7177 +2024-09-01,Pharmacy First Clinical Pathways,Otitis externa,4644 +2024-09-01,Pharmacy First Clinical Pathways,Penicillins,58958 +2024-09-01,Pharmacy First Clinical Pathways,Preparations for minor cuts and abrasions,1939 +2024-09-01,Pharmacy First Clinical Pathways,Tetracyclines,735 +2024-09-01,Pharmacy First Clinical Pathways,Urinary-tract infections,47327 diff --git a/reports/pharmacy_first_report.Rmd b/reports/pharmacy_first_report.Rmd index 885c500..85125d1 100644 --- a/reports/pharmacy_first_report.Rmd +++ b/reports/pharmacy_first_report.Rmd @@ -20,8 +20,8 @@ library(gt) ```{r load-data, message=FALSE, warning=FALSE} # Load plotting function -source(here::here("lib", "functions", "function_tidy_measures.R")) -source(here::here("lib", "functions", "function_plot_measures.R")) +source(here::here("lib", "functions", "tidy_measures.R")) +source(here::here("lib", "functions", "plot_measures.R")) # Load data based on environment if (Sys.getenv("OPENSAFELY_BACKEND") != "") { @@ -117,7 +117,7 @@ df_measures$region <- factor( df_measures <- df_measures %>% mutate(sex = factor(sex, levels = c("female", "male"), labels = c("Female", "Male"))) -df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" +df_measures$age_band[is.na(df_measures$age_band)] <- "Missing" gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey") region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey") @@ -213,7 +213,6 @@ clinical_pathways_table %>% style = cell_text(weight = "bold"), locations = cells_column_labels(columns = everything()) ) - ``` ### Codelists @@ -226,7 +225,6 @@ The following two SNOMED codes were used to identify Pharmacy First consultation For clarity, we combined these codes for the presentation of the results. ```{r echo=FALSE} - # Create pharmacy first service codes dataframe pharmacy_first_table <- data.frame( codelist = c( @@ -262,7 +260,6 @@ pharmacy_first_table %>% To categorise clinical events related to Pharmacy First services used the Pharmacy First [Clinical Pathways Codelist](https://www.opencodelists.org/codelist/opensafely/pharmacy-first-clinical-pathway-conditions/7ec97762/#full-list). ```{r echo=FALSE, message=FALSE} - clinical_codes_table <- data.frame( condition = c( "Acute otitis media", @@ -337,7 +334,6 @@ ethnicity_table <- data.frame( # heading.title.font.size = "large", # heading.subtitle.font.size = "small" # ) - ``` # Results @@ -786,9 +782,9 @@ df_opensafely <- df_measures %>% # Add a new column to each data frame to identify the source df_opensafely <- df_opensafely %>% - mutate(source = 'OS') + mutate(source = "OS") df_bsa_validation <- df_bsa_validation %>% - mutate(source = 'BSA') + mutate(source = "BSA") # Drop the original 'count' column from the BSA data to allow for easy consistent grouping by 'count' df_validation_condition_counts <- df_bsa_validation %>% @@ -809,14 +805,14 @@ df_pivoted <- df_validation_condition_counts %>% df_pivoted <- df_pivoted %>% mutate( consultation_type = recode(consultation_type, - "sinusitis" = "Acute Sinusitis", - "infected_insect_bites" = "Infected Insect Bite", - "uncomplicated_uti" = "UTI", - "acute_otitis_media" = "Acute Otitis Media", - "acute_sore_throat" = "Acute Pharyngitis", - "shingles" = "Herpes Zoster", - "impetigo" = "Impetigo",) - + "sinusitis" = "Acute Sinusitis", + "infected_insect_bites" = "Infected Insect Bite", + "uncomplicated_uti" = "UTI", + "acute_otitis_media" = "Acute Otitis Media", + "acute_sore_throat" = "Acute Pharyngitis", + "shingles" = "Herpes Zoster", + "impetigo" = "Impetigo", + ) ) # Removing date column as this will prevent grouping (date is already pivot columns) df_pivoted <- df_pivoted %>% @@ -885,10 +881,12 @@ tab_pf_conditions_validation ``` ```{r, message=FALSE, warning=FALSE, echo = FALSE, fig.width=8} -df_long <- df_pivoted %>% pivot_longer(cols=c('02-2024_OS','02-2024_BSA','03-2024_OS','03-2024_BSA','04-2024_OS','04-2024_BSA','05-2024_OS','05-2024_BSA','06-2024_OS','06-2024_BSA','07-2024_OS','07-2024_BSA'), - names_to=c('month', 'source'), +df_long <- df_pivoted %>% pivot_longer( + cols = c("02-2024_OS", "02-2024_BSA", "03-2024_OS", "03-2024_BSA", "04-2024_OS", "04-2024_BSA", "05-2024_OS", "05-2024_BSA", "06-2024_OS", "06-2024_BSA", "07-2024_OS", "07-2024_BSA"), + names_to = c("month", "source"), names_sep = "_", - values_to='count') + values_to = "count" +) # Changing format of date to use label_date_short to keep dates consistent for figures df_long$month <- as.Date(paste0("01-", df_long$month), format = "%d-%m-%Y") @@ -896,13 +894,16 @@ df_long$month <- as.Date(paste0("01-", df_long$month), format = "%d-%m-%Y") validation_total_counts_figure <- ggplot(df_long, aes(x = month, y = count, color = consultation_type, group = consultation_type)) + geom_point() + geom_line(size = 0.5) + - facet_wrap(~ source, scales = "free_y") + - labs(title = "Clinical Conditions Count by Month (NHS BSA vs OpenSAFELY Data)", - x = "Month", y = "Count", color = "Clinical Condition") + + facet_wrap(~source, scales = "free_y") + + labs( + title = "Clinical Conditions Count by Month (NHS BSA vs OpenSAFELY Data)", + x = "Month", y = "Count", color = "Clinical Condition" + ) + theme( - plot.title = element_text(hjust = 0.5)) + + plot.title = element_text(hjust = 0.5) + ) + scale_x_date( - labels = scales::label_date_short() + labels = scales::label_date_short() ) validation_total_counts_figure @@ -913,29 +914,36 @@ validation_total_counts_figure df_descriptive_stats <- df_descriptive_stats %>% mutate( measure = recode(measure, - "pf_with_pfmed" = "PF Med", - "pf_with_pfcondition" = "PF Condition", - "pf_with_pfmed_and_pfcondition" = "PF Med & PF Condition", - )) + "pf_with_pfmed" = "PF Med", + "pf_with_pfcondition" = "PF Condition", + "pf_with_pfmed_and_pfcondition" = "PF Med & PF Condition", + ) + ) descriptive_stats_figure <- ggplot(df_descriptive_stats, aes(x = interval_start, y = ratio, color = measure, group = measure)) + geom_point() + geom_line(size = 0.5) + # facet_wrap(~ measure, scales = "free_y") + - labs(title = "Breakdown of PF consultations with linked PF conditions and medications", - color = "PF consultation with:") + + labs( + title = "Breakdown of PF consultations with linked PF conditions and medications", + color = "PF consultation with:" + ) + theme( - plot.title = element_text(hjust = 0.5)) + + plot.title = element_text(hjust = 0.5) + ) + scale_x_date( - labels = scales::label_date_short() + labels = scales::label_date_short() + ) + + scale_y_continuous( + labels = scales::percent, + limits = c(0, 1) ) + - scale_y_continuous(labels = scales::percent, - limits = c(0,1)) + - theme(axis.title.x = element_blank(), - axis.title.y = element_blank()) + theme( + axis.title.x = element_blank(), + axis.title.y = element_blank() + ) descriptive_stats_figure - ``` -# References \ No newline at end of file +# References From 5c6ba556655df4304c628502b65c5855485194ba Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Sun, 24 Nov 2024 09:33:17 +0000 Subject: [PATCH 2/4] Add function to explore table schema for NHSBA tables --- .../get_dataset_nhsbsa_table_schema.R | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 lib/functions/get_dataset_nhsbsa_table_schema.R diff --git a/lib/functions/get_dataset_nhsbsa_table_schema.R b/lib/functions/get_dataset_nhsbsa_table_schema.R new file mode 100644 index 0000000..04ee65d --- /dev/null +++ b/lib/functions/get_dataset_nhsbsa_table_schema.R @@ -0,0 +1,50 @@ +library("httr") +library("jsonlite") +library("crul") +library("here") +library("rvest") +library("dplyr") +library("lubridate") +library("tidyverse") +library("readr") + +# This may be useful for writing SQL queries but reading the schema from +# JSON in the metadata seems more tricky and maybe there's another way +get_dataset_table_schema <- function(dataset_id) { + base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" + package_show_method <- "package_show?id=" + + available_datasets <- get_available_datasets(remove_foi = FALSE) + + if (!dataset_id %in% available_datasets) { + stop("The provided 'dataset_id' is not available. Run 'get_available_datasets()' to see all available datasets.", call. = FALSE) + } + + metadata_response <- GET(paste0(base_endpoint, package_show_method, dataset_id)) + resources <- content(metadata_response)$result$resources + + schema_raw <- resources[[1]]$schema + + # There seems to be a lot of odd strings in the JSON + # that needs to be fixed before we can read it + schema_fixed <- schema_raw + schema_fixed <- gsub("u\'", '"', schema_fixed) + schema_fixed <- gsub("u\"", '"', schema_fixed) + schema_fixed <- gsub("':", '":', schema_fixed) + schema_fixed <- gsub("',", '",', schema_fixed) + schema_fixed <- gsub("'}", '"}', schema_fixed) + schema_fixed <- gsub("']", '"]', schema_fixed) + schema_fixed <- gsub("-", "", schema_fixed) + schema_fixed <- gsub("-", "", schema_fixed) + tidyjson::json(schema_fixed) + schema_list <- fromJSON(schema_fixed, flatten = TRUE) + + tibble(schema_list$fields) |> + select(name, title, type, description) +} + +get_available_datasets()[3:10] +get_dataset_table_schema("contractor-details") +get_dataset_table_schema("dental-activity-delivered-by-newly-qualified-foundation-dentists") +get_dataset_table_schema("secondary-care-medicines-data") +map(get_available_datasets()[3:10], get_dataset_table_schema) From 103af265845ec1918aac82dc3f23602cb886ecf3 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Sun, 24 Nov 2024 12:30:13 +0000 Subject: [PATCH 3/4] Remove empty datasets from `get_available_datasets()` --- lib/functions/get_pf_medication_validation_data.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/functions/get_pf_medication_validation_data.R b/lib/functions/get_pf_medication_validation_data.R index e0f7bf4..7536ff5 100644 --- a/lib/functions/get_pf_medication_validation_data.R +++ b/lib/functions/get_pf_medication_validation_data.R @@ -13,7 +13,7 @@ package_list_method <- "package_list" package_show_method <- "package_show?id=" action_method <- "datastore_search_sql?" -get_available_datasets <- function(remove_foi = TRUE) { +get_available_datasets <- function() { base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" package_list_method <- "package_list" @@ -22,9 +22,10 @@ get_available_datasets <- function(remove_foi = TRUE) { package_list_method ))$result - if (remove_foi) { - datasets_response <- datasets_response[!grepl("^foi", datasets_response)] - } + # Remove datasets with FOI and starting with a number + # There does not seem to be any data that we can query from these tables + datasets_response <- datasets_response[!grepl("foi", datasets_response)] + datasets_response <- datasets_response[!grepl("^[0-9]", datasets_response)] datasets_response } @@ -32,7 +33,7 @@ get_available_datasets <- function(remove_foi = TRUE) { get_available_datasets() get_dataset_table_names <- function(dataset_id, start_date = NULL, end_date = NULL) { - available_datasets <- get_available_datasets(remove_foi = FALSE) + available_datasets <- get_available_datasets() if (!dataset_id %in% available_datasets) { stop("The provided 'dataset_id' is not available. Run 'get_available_datasets()' to see all available datasets.", call. = FALSE) From e18ce660b371cab2374057644882a246f28458a0 Mon Sep 17 00:00:00 2001 From: Milan Wiedemann Date: Sun, 24 Nov 2024 12:30:45 +0000 Subject: [PATCH 4/4] Improve` get_dataset_table_schema()` --- .../get_dataset_nhsbsa_table_schema.R | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/functions/get_dataset_nhsbsa_table_schema.R b/lib/functions/get_dataset_nhsbsa_table_schema.R index 04ee65d..5cb502f 100644 --- a/lib/functions/get_dataset_nhsbsa_table_schema.R +++ b/lib/functions/get_dataset_nhsbsa_table_schema.R @@ -8,13 +8,15 @@ library("lubridate") library("tidyverse") library("readr") +source(here("lib/functions/get_pf_medication_validation_data.R")) + # This may be useful for writing SQL queries but reading the schema from # JSON in the metadata seems more tricky and maybe there's another way get_dataset_table_schema <- function(dataset_id) { base_endpoint <- "https://opendata.nhsbsa.net/api/3/action/" package_show_method <- "package_show?id=" - available_datasets <- get_available_datasets(remove_foi = FALSE) + available_datasets <- get_available_datasets() if (!dataset_id %in% available_datasets) { stop("The provided 'dataset_id' is not available. Run 'get_available_datasets()' to see all available datasets.", call. = FALSE) @@ -22,7 +24,7 @@ get_dataset_table_schema <- function(dataset_id) { metadata_response <- GET(paste0(base_endpoint, package_show_method, dataset_id)) resources <- content(metadata_response)$result$resources - + schema_raw <- resources[[1]]$schema # There seems to be a lot of odd strings in the JSON @@ -36,15 +38,17 @@ get_dataset_table_schema <- function(dataset_id) { schema_fixed <- gsub("']", '"]', schema_fixed) schema_fixed <- gsub("-", "", schema_fixed) schema_fixed <- gsub("-", "", schema_fixed) - tidyjson::json(schema_fixed) + schema_list <- fromJSON(schema_fixed, flatten = TRUE) tibble(schema_list$fields) |> select(name, title, type, description) } -get_available_datasets()[3:10] -get_dataset_table_schema("contractor-details") -get_dataset_table_schema("dental-activity-delivered-by-newly-qualified-foundation-dentists") -get_dataset_table_schema("secondary-care-medicines-data") -map(get_available_datasets()[3:10], get_dataset_table_schema) +nhsbsa_table_schemas <- map( + set_names(get_available_datasets()), + safely(get_dataset_table_schema) +) + +nhsbsa_table_schemas_results <- map(nhsbsa_table_schemas, "result") +nhsbsa_table_schemas_errors <- map(nhsbsa_table_schemas, "error")