From ae6f14f6063d5784392f71d545ef193ff7f7c8cf Mon Sep 17 00:00:00 2001 From: Egor Kotov Date: Sun, 15 Dec 2024 21:20:54 +0100 Subject: [PATCH] add column finder and data retrieval functions --- DESCRIPTION | 7 ++- NAMESPACE | 2 + R/column-values.R | 86 ++++++++++++++++++++++++++ R/data.R | 130 ++++++++++++++++++++++++++++++++++++++++ man/mi_column_values.Rd | 36 +++++++++++ man/mi_data.Rd | 77 ++++++++++++++++++++++++ tests/testthat.R | 4 +- 7 files changed, 338 insertions(+), 4 deletions(-) create mode 100644 R/column-values.R create mode 100644 R/data.R create mode 100644 man/mi_column_values.Rd create mode 100644 man/mi_data.Rd diff --git a/DESCRIPTION b/DESCRIPTION index de15132..362c56a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,11 +10,14 @@ Imports: checkmate, dplyr, httr2, + jsonlite, purrr, rlang, - tibble + tibble, + tidyr Suggests: - testthat (>= 3.0.0) + eurostat, + nuts Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 297d339..8258c48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(mi_column_values) +export(mi_data) export(mi_nuts_levels) export(mi_source_coverage) export(mi_sources) diff --git a/R/column-values.R b/R/column-values.R new file mode 100644 index 0000000..18f5f2a --- /dev/null +++ b/R/column-values.R @@ -0,0 +1,86 @@ +#' Get column values for filtering a source +#' +#' Fetches the possible filtering values for a given source, year, and NUTS level. +#' +#' @param source_name A `character` string specifying the source name (f_resource). +#' @param year A `character` or `integer` specifying the year. +#' @param level A `character` string specifying the NUTS level ("0", "1", "2", or "3"). +#' @param filters A `named list` where the names are the filter fields and +#' the values are the selected values for those fields. Default is an empty list. +#' @inheritParams mi_sources +#' +#' @return A `tibble` with the fields, labels, and their possible values for filtering. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' mi_column_values( +#' source_name = "DEMO_R_FIND2", +#' year = 2020, +#' level = "2", +#' filters = list(unit = "YR") +#' ) +#' } +mi_column_values <- function( + source_name, + year, + level, + filters = list(), + limit = 40 +) { + # Validate inputs + checkmate::assert_string(source_name) + checkmate::assert_character(level, len = 1) + checkmate::assert_list(filters, types = c("character", "NULL")) + checkmate::assert_integerish(year, null.ok = TRUE, max.len = 1) + + # Convert the named list to the required structure for the API + selected <- if (length(filters) == 0) { + list() + } else { + lapply(names(filters), function(name) { + list(field = name, value = filters[[name]]) + }) + } + + # Build API endpoint + base_api_endpoint <- getOption("mapineqr.base_api_endpoint") + url_endpoint <- paste0(base_api_endpoint, + "get_column_values_source_json/items.json") + + # Prepare JSON for source selections + source_selections <- list( + year = as.character(year), + level = level, + selected = selected + ) + source_selections_json <- jsonlite::toJSON( + source_selections, + auto_unbox = TRUE + ) + + # Prepare query parameters + query_params <- list( + `_resource` = source_name, + `source_selections` = source_selections_json, + limit = limit + ) + + # Perform API request + response <- httr2::request(url_endpoint) |> + httr2::req_headers( + "Content-Type" = "application/json", + "User-Agent" = getOption("mapineqr.user_agent") + ) |> + httr2::req_url_query(!!!query_params) |> + httr2::req_method("GET") |> + httr2::req_perform() + + # Parse response + response_data <- httr2::resp_body_json(response, simplifyVector = TRUE) |> + tibble::as_tibble() |> + tidyr::unnest(col = .data$field_values) + + return(response_data) +} diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..d31c2f1 --- /dev/null +++ b/R/data.R @@ -0,0 +1,130 @@ +#' Get univariate or bivariate data for a specific source +#' +#' Fetches univariate or bivariate data for a given source, year, NUTS level, and selected filters. +#' +#' @param x_source A `character` string specifying the source name for the x variable. +#' @param y_source (Optional) A `character` string specifying the source name for the y variable. +#' @param year A `character` or `integer` specifying the year. +#' @param level A `character` string specifying the NUTS level ("0", "1", "2", or "3"). +#' @param x_filters A `named list` where the names are the filter fields for the x variable +#' and the values are the selected values for those fields. Default is an empty list. +#' @param y_filters (Optional) A `named list` where the names are the filter fields for the y variable +#' and the values are the selected values for those fields. Default is `NULL`. +#' @param limit An `integer` specifying the maximum number of results to return. Default is 1500. +#' +#' @return A `tibble` with the following columns: +#' +#' **For univariate data** (when `y_source` is not provided): +#' +#' * `best_year`: the best available year, closest to the requested year. +#' * `geo`: code for the NUTS region at the requested level. +#' * `geo_name`: name of the NUTS region at the requested level. +#' * `x`: the value of the univariate variable. +#' +#' **For bivariate data** (when `y_source` is provided): +#' +#' * `best_year`: the best available year, closest to the requested year (same for both x and y variables). +#' * `geo`: code for the NUTS region at the requested level. +#' * `geo_name`: name of the NUTS region at the requested level. +#' * `x`: the value of the x variable. +#' * `y`: the value of the y variable. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # Univariate example +#' mi_data( +#' x_source = "TGS00010", +#' year = 2020, +#' level = "2", +#' x_filters = list(isced11 = "TOTAL", unit = "PC", age = "Y_GE15", freq = "A") +#' ) +#' +#' # Bivariate example +#' mi_data( +#' x_source = "TGS00010", +#' y_source = "DEMO_R_MLIFEXP", +#' year = 2020, +#' level = "2", +#' x_filters = list(isced11 = "TOTAL", unit = "PC", age = "Y_GE15", freq = "A"), +#' y_filters = list(unit = "YR", age = "Y_LT1", freq = "A") +#' ) +#' } +mi_data <- function( + x_source, + y_source = NULL, + year, + level, + x_filters = list(), + y_filters = NULL, + limit = 1500 +) { + # Validate inputs + checkmate::assert_string(x_source) + checkmate::assert_character(level, len = 1) + checkmate::assert_list(x_filters, types = c("character", "NULL")) + checkmate::assert_integerish(year, null.ok = TRUE, max.len = 1) + checkmate::assert_list(y_filters, types = c("character", "NULL"), null.ok = TRUE) + if (!is.null(y_source)) checkmate::assert_string(y_source) + + # Build filter JSONs for X and Y + x_conditions <- lapply(names(x_filters), function(name) { + list(field = name, value = x_filters[[name]]) + }) + + x_json <- list( + source = x_source, + conditions = x_conditions + ) + x_json_string <- jsonlite::toJSON(x_json, auto_unbox = TRUE) + + # Check if it's bivariate (Y filters are provided) + if (!is.null(y_source) && !is.null(y_filters)) { + y_conditions <- lapply(names(y_filters), function(name) { + list(field = name, value = y_filters[[name]]) + }) + y_json <- list( + source = y_source, + conditions = y_conditions + ) + y_json_string <- jsonlite::toJSON(y_json, auto_unbox = TRUE) + } + + # Build API endpoint + base_api_endpoint <- getOption("mapineqr.base_api_endpoint") + url_endpoint <- if (is.null(y_source)) { + paste0(base_api_endpoint, "get_x_data/items.json") + } else { + paste0(base_api_endpoint, "get_xy_data/items.json") + } + + # Prepare query parameters + query_params <- list( + `_level` = level, + `_year` = as.character(year), + `X_JSON` = x_json_string, + `limit` = limit + ) + + # Add Y_JSON to query parameters if bivariate + if (!is.null(y_source) && !is.null(y_filters)) { + query_params$`Y_JSON` <- y_json_string + } + + # Perform API request + response <- httr2::request(url_endpoint) |> + httr2::req_headers( + "Content-Type" = "application/json", + "User-Agent" = getOption("mapineqr.user_agent") + ) |> + httr2::req_url_query(!!!query_params) |> + httr2::req_method("GET") |> + httr2::req_perform() + + # Parse response + response_data <- httr2::resp_body_json(response, simplifyVector = TRUE) |> + tibble::as_tibble() + + return(response_data) +} diff --git a/man/mi_column_values.Rd b/man/mi_column_values.Rd new file mode 100644 index 0000000..159421d --- /dev/null +++ b/man/mi_column_values.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/column-values.R +\name{mi_column_values} +\alias{mi_column_values} +\title{Get column values for filtering a source} +\usage{ +mi_column_values(source_name, year, level, filters = list(), limit = 40) +} +\arguments{ +\item{source_name}{A \code{character} string specifying the source name (f_resource).} + +\item{year}{A \code{character} or \code{integer} specifying the year.} + +\item{level}{A \code{character} string specifying the NUTS level ("0", "1", "2", or "3").} + +\item{filters}{A \verb{named list} where the names are the filter fields and +the values are the selected values for those fields. Default is an empty list.} + +\item{limit}{an \code{integer} of length 1 specifying the maximum number of sources to return. Defaults to 1000.} +} +\value{ +A \code{tibble} with the fields, labels, and their possible values for filtering. +} +\description{ +Fetches the possible filtering values for a given source, year, and NUTS level. +} +\examples{ +\dontrun{ +mi_column_values( + source_name = "DEMO_R_FIND2", + year = 2020, + level = "2", + filters = list(unit = "YR") +) +} +} diff --git a/man/mi_data.Rd b/man/mi_data.Rd new file mode 100644 index 0000000..db3cae5 --- /dev/null +++ b/man/mi_data.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\name{mi_data} +\alias{mi_data} +\title{Get univariate or bivariate data for a specific source} +\usage{ +mi_data( + x_source, + y_source = NULL, + year, + level, + x_filters = list(), + y_filters = NULL, + limit = 1500 +) +} +\arguments{ +\item{x_source}{A \code{character} string specifying the source name for the x variable.} + +\item{y_source}{(Optional) A \code{character} string specifying the source name for the y variable.} + +\item{year}{A \code{character} or \code{integer} specifying the year.} + +\item{level}{A \code{character} string specifying the NUTS level ("0", "1", "2", or "3").} + +\item{x_filters}{A \verb{named list} where the names are the filter fields for the x variable +and the values are the selected values for those fields. Default is an empty list.} + +\item{y_filters}{(Optional) A \verb{named list} where the names are the filter fields for the y variable +and the values are the selected values for those fields. Default is \code{NULL}.} + +\item{limit}{An \code{integer} specifying the maximum number of results to return. Default is 1500.} +} +\value{ +A \code{tibble} with the following columns: + +\strong{For univariate data} (when \code{y_source} is not provided): +\itemize{ +\item \code{best_year}: the best available year, closest to the requested year. +\item \code{geo}: code for the NUTS region at the requested level. +\item \code{geo_name}: name of the NUTS region at the requested level. +\item \code{x}: the value of the univariate variable. +} + +\strong{For bivariate data} (when \code{y_source} is provided): +\itemize{ +\item \code{best_year}: the best available year, closest to the requested year (same for both x and y variables). +\item \code{geo}: code for the NUTS region at the requested level. +\item \code{geo_name}: name of the NUTS region at the requested level. +\item \code{x}: the value of the x variable. +\item \code{y}: the value of the y variable. +} +} +\description{ +Fetches univariate or bivariate data for a given source, year, NUTS level, and selected filters. +} +\examples{ +\dontrun{ +# Univariate example +mi_data( + x_source = "TGS00010", + year = 2020, + level = "2", + x_filters = list(isced11 = "TOTAL", unit = "PC", age = "Y_GE15", freq = "A") +) + +# Bivariate example +mi_data( + x_source = "TGS00010", + y_source = "DEMO_R_MLIFEXP", + year = 2020, + level = "2", + x_filters = list(isced11 = "TOTAL", unit = "PC", age = "Y_GE15", freq = "A"), + y_filters = list(unit = "YR", age = "Y_LT1", freq = "A") +) +} +} diff --git a/tests/testthat.R b/tests/testthat.R index 134da24..9fac7ba 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -6,7 +6,7 @@ # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview # * https://testthat.r-lib.org/articles/special-files.html -library(testthat) -library(mapineqr) +# library(testthat) +# library(mapineqr) # test_check("mapineqr")