Skip to content

Commit

Permalink
Small chunk of optimisation/paging redesign (#49)
Browse files Browse the repository at this point in the history
* Re-jigged the paging on post_dataset to not do the parsing until after everything's been retrieved

* Re-jigged the paging on get_dataset to not do the parsing until after everything's been retrieved

* Updated logic on setting indicator defaulting and updated associated test

* Updated default pagesize on query dataset to 10,000

* Updated large volume data warning on post_dataset

* Minor updates to docs

* Another minor update to docs
  • Loading branch information
rmbielby authored Oct 22, 2024
1 parent c6635a8 commit 33972a4
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 19 deletions.
6 changes: 5 additions & 1 deletion R/api_url_pages.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
#' automatically generate API request URLs, but is exported as part of the package for any users
#' who wish to generate their own URLs.
#'
#' @param page_size Number of results to return in a single query (max 40)
#' @param page_size Number of rows to return in a single query. The maximum allowable value varies
#' between query type:
#' - get_publications: 40
#' - get_data_catalogue: 20
#' - query_dataset: 10000
#' @param page Page number to return
#'
#' @return String containing pages query
Expand Down
24 changes: 19 additions & 5 deletions R/get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ get_dataset <- function(
dataset_version = NULL,
api_version = NULL,
page = NULL,
page_size = 1000,
page_size = 10000,
parse = TRUE,
verbose = FALSE) {
response <- eesyapi::api_url(
Expand All @@ -56,12 +56,22 @@ get_dataset <- function(
if (verbose) {
message(paste("Total number of pages: ", response_json$paging$totalPages))
}
dfresults <- response_json$results |>
eesyapi::parse_api_dataset(dataset_id = dataset_id, verbose = verbose)
dfresults <- response_json |>
magrittr::extract2("results")
# Unless the user has requested a specific page, then assume they'd like all pages collated and
# recursively run the query.
if (is.null(page)) {
if (response_json$paging$totalPages > 1) {
if (response_json$paging$totalPages * page_size > 100000) {
message(
paste(
"Downloading up to", response_json$paging$totalPages * page_size, "rows.",
"This may take a while.",
"We recommend downloading the full data set using download_dataset()",
"for large volumes of data"
)
)
}
for (page in c(2:response_json$paging$totalPages)) {
response_page <- eesyapi::api_url(
"get-data",
Expand All @@ -81,11 +91,15 @@ get_dataset <- function(
response_page |> eesyapi::warning_max_pages()
dfresults <- dfresults |>
dplyr::bind_rows(
response_page$results |>
eesyapi::parse_api_dataset(verbose = verbose)
response_page |>
magrittr::extract2("results")
)
}
}
}
if (parse) {
dfresults <- dfresults |>
eesyapi::parse_api_dataset(dataset_id, verbose = verbose)
}
return(dfresults)
}
27 changes: 21 additions & 6 deletions R/post_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ post_dataset <- function(
dataset_version = NULL,
api_version = NULL,
page = NULL,
page_size = 1000,
page_size = 10000,
parse = TRUE,
debug = FALSE,
verbose = FALSE) {
Expand All @@ -67,7 +67,7 @@ post_dataset <- function(
warning(
paste(
"json_query is set - ignoring indicators, time_periods, geographies",
" and filter_items params."
"and filter_items params."
)
)
}
Expand Down Expand Up @@ -120,12 +120,23 @@ post_dataset <- function(
if (verbose) {
message(paste("Total number of pages: ", response_json$paging$totalPages))
}
dfresults <- response_json$results |>
eesyapi::parse_api_dataset(dataset_id, verbose = verbose)
dfresults <- response_json |>
magrittr::extract2("results")

# Unless the user has requested a specific page, then assume they'd like all pages collated and
# recursively run the query.
if (is.null(page) && is.null(json_query)) {
if (response_json$paging$totalPages > 1) {
if (response_json$paging$totalPages * page_size > 100000) {
message(
paste(
"Downloading up to", response_json$paging$totalPages * page_size, "rows.",
"This may take a while.",
"We recommend downloading the full data set using preview_dataset()",
"for large volumes of data."
)
)
}
for (page in c(2:response_json$paging$totalPages)) {
json_body <- eesyapi::parse_tojson_params(
indicators = indicators,
Expand All @@ -151,11 +162,15 @@ post_dataset <- function(
response_page |> eesyapi::warning_max_pages()
dfresults <- dfresults |>
dplyr::bind_rows(
response_page$results |>
eesyapi::parse_api_dataset(dataset_id, verbose = verbose)
response_page |>
magrittr::extract2("results")
)
}
}
}
if (parse) {
dfresults <- dfresults |>
eesyapi::parse_api_dataset(dataset_id, verbose = verbose)
}
return(dfresults)
}
8 changes: 7 additions & 1 deletion R/query_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ query_dataset <- function(
method = "POST",
dataset_version = NULL,
api_version = NULL,
page_size = 1000,
page_size = 10000,
page = NULL,
debug = FALSE,
verbose = FALSE) {
Expand All @@ -193,6 +193,12 @@ query_dataset <- function(
)
)
}
if (is.null(indicators) && (is.null(json_query) || method == "GET")) {
warning("No indicators provided, defaulted to using all indicators from meta data")
indicators <- eesyapi::get_meta(dataset_id) |>
magrittr::extract2("indicators") |>
dplyr::pull("col_id")
}
if (method == "POST") {
eesyapi::post_dataset(
dataset_id = dataset_id,
Expand Down
8 changes: 7 additions & 1 deletion man/api_url_pages.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/post_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/query_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions tests/testthat/test-query_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ test_that("Invalid dataset_id", {
})

test_that("No indicator supplied", {
expect_error(
query_dataset(example_id())
expect_warning(
query_dataset(example_id()),
"No indicators provided, defaulted to using all indicators from meta data"
)
})

Expand Down

0 comments on commit 33972a4

Please sign in to comment.