Skip to content

Commit

Permalink
New feature - Translate natural geographic level naming to API abbrev…
Browse files Browse the repository at this point in the history
…iations on query input (#70)

* Added geographic level human-api lookup data

* Added logic to todf_geographies to process natural geog level names into the API-expected ones

* Updated hum friendly geog level terms

* Remove unnecessary text from geog_level_lookup code

* Updated documentation with geog_level_lookup

* Fixing usethis warning by adding to the suggests dependencies

* Update R/datasets_documentation.R

Co-authored-by: Cam Race <52536248+cjrace@users.noreply.github.com>

---------

Co-authored-by: Cam Race <52536248+cjrace@users.noreply.github.com>
  • Loading branch information
rmbielby and cjrace authored Jan 31, 2025
1 parent e089e60 commit cc85c11
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 4 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ Imports:
Suggests:
knitr,
rmarkdown,
testthat (>= 3.0.0)
testthat (>= 3.0.0),
usethis
VignetteBuilder:
knitr
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-GB
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
LazyData: true
36 changes: 36 additions & 0 deletions R/create_package_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' Create geographic_level API shorthand to natural language look-up
#'
#' @description
#' Script for updating the package data file geog_level_lookup.rda with the look-up between API
#' shorthands and natural language versions of geographic levels.
#'
#' @returns NULL
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' create_geog_level_lookup()
#' }
create_geog_level_lookup <- function() {
geog_level_lookup <- data.frame(
api_friendly = c(
"EDA", "INST", "LA", "LAD",
"LEP", "LSIP",
"MAT", "MCA",
"NAT", "OA",
"PA", "PCON", "PROV",
"REG", "RSC", "SCH", "SPON", "WARD"
),
human_friendly = c(
"English devolved area", "Institution", "Local authority", "Local authority district",
"Local enterprise partnership", "Local skills improvement plan area",
"Multi-academy trust", "MCA",
"National", "Opportunity area",
"Planning area", "Parliamentary constituency", "Provider",
"Regional", "Regional school commissioner region", "School", "Sponsor", "Ward"
)
)
# write it out to the data folder

usethis::use_data(geog_level_lookup, overwrite = TRUE)
}
12 changes: 12 additions & 0 deletions R/datasets_documentation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#' Look-up for API geographic_level shorthands
#'
#' A look-up of API geographic_level shorthands and their respective natural forms.
#'
#' @format ## `geog_level_lookup`
#' A data frame with 18 rows and 2 columns:
#' \describe{
#' \item{api_friendly}{API geographic level shorthands}
#' \item{human_friendly}{Natural language geographic levels}
#' }
#' @source curated by explore.statistics@@education.gov.uk
"geog_level_lookup"
25 changes: 24 additions & 1 deletion R/query_dataset_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,29 @@ todf_geographies <- function(geographies) {
} else {
stop("The geographies parameter should be given as either a data frame, vector or string.")
}
geographies <- geographies |> dplyr::distinct()
if (any(eesyapi::geog_level_lookup$human_friendly %in% geographies$geographic_level)) {
geographies <- geographies |>
dplyr::left_join(eesyapi::geog_level_lookup, by = c("geographic_level" = "human_friendly")) |>
dplyr::mutate(
geographic_level = dplyr::case_when(
!is.na(api_friendly) ~ api_friendly,
.default = !!rlang::sym("geographic_level")
)
) |>
dplyr::select(-"api_friendly")
}
if (any(eesyapi::geog_level_lookup$human_friendly %in% geographies$location_level)) {
geographies <- geographies |>
dplyr::left_join(eesyapi::geog_level_lookup, by = c("location_level" = "human_friendly")) |>
dplyr::mutate(
location_level = dplyr::case_when(
!is.na(api_friendly) ~ api_friendly,
.default = !!rlang::sym("location_level")
)
) |>
dplyr::select(-"api_friendly")
}
geographies <- geographies |>
dplyr::distinct()
return(geographies)
}
3 changes: 2 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ reference:
contents:
- starts_with("api_url")

- title: Generate example IDs and data
- title: Generate example IDs and reference data
desc: These functions are used widely to create working example code and tests
contents:
- starts_with("example")
- geog_level_lookup
Binary file added data/geog_level_lookup.rda
Binary file not shown.
18 changes: 18 additions & 0 deletions man/create_geog_level_lookup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions man/geog_level_lookup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 35 additions & 1 deletion tests/testthat/test-query_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,44 @@ test_that("Time period query errors on badly formatted time period", {
)
})

test_that("Geography query works with NAT", {
expect_equal(
query_dataset(
example_id(group = "attendance"),
indicators = example_id("indicator", group = "attendance"),
time_periods = eesyapi::example_id("time_period", group = "attendance"),
geographies = "NAT",
filter_items = eesyapi::example_id("filter_item", group = "attendance")
) |>
dplyr::select("geographic_level") |>
dplyr::distinct(),
data.frame(
geographic_level = c("NAT")
)
)
})

test_that("Geography query works with National", {
expect_equal(
query_dataset(
example_id(group = "attendance"),
indicators = example_id("indicator", group = "attendance"),
time_periods = eesyapi::example_id("time_period", group = "attendance"),
geographies = "National",
filter_items = eesyapi::example_id("filter_item", group = "attendance")
) |>
dplyr::select("geographic_level") |>
dplyr::distinct(),
data.frame(
geographic_level = c("NAT")
)
)
})


test_that("Geography query returns expected geographies", {
expect_equal(
post_dataset(
query_dataset(
example_id(group = "attendance"),
indicators = example_id("indicator", group = "attendance"),
time_periods = eesyapi::example_id("time_period", group = "attendance"),
Expand Down

0 comments on commit cc85c11

Please sign in to comment.