Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

read_erp function #261

Merged
merged 2 commits into from
Jan 14, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions R/read_erp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#' Download a tidy tibble containing the Estimated Residential Population from the ABS
#'
#' \code{read_erp()} uses the \code{read_abs()} function to download, import,
#' and tidy the Estimated Residential Population from the ABS. It allows the user
#' to specify age, sex and states/territories of interest. It returns a tibble
#' containing five columns: the date, the age range, sex and states that the ERP
#' corresponds to. This makes joining the ERP to another dataframe easy.
#'
#' @param age_range numeric; default is "0:100". A vector containing ages in single
#' years for which an ERP is sought. The ABS top-code ages at 100.
#'
#' @param sex character; default is "Persons". Other values are "Male" and
#' "Female".
#'
#' @param states character; default is "Australia". Other values are the full
#' or abbreviated names of the states and self-governing territories.
#'
#' @param path character; default is "data/ABS". Only used if
#' retain_files is set to TRUE. Local directory in which to save
#' downloaded ABS time series spreadsheets.
#'
#' @param show_progress_bars logical; TRUE by default. If set to FALSE, progress
#' bars will not be shown when ABS spreadsheets are downloading.
#'
#' @param check_local logical; FALSE by default. See \code{?read_abs}.
#'
#' @param retain_files logical; FALSE by default. When TRUE, the spreadsheets
#' downloaded from the ABS website will be saved in the
#' directory specified with 'path'.
#'
#' @examples
#'
#' # Create a tibble called 'erp' that contains the ERP index
#' # numbers for 30 June each year for Australia.
#' \donttest{
#' erp <- read_erp()
#' }
#'
#' @importFrom dplyr filter select tibble mutate group_by summarise arrange
#' @importFrom tools toTitleCase
#'
#' @export

read_erp <- function(age_range = 0:100,
sex = c("Persons", "Male", "Female"),
states = c("Australia", "New South Wales", "Victoria", "Queensland",
"South Australia", "Western Australia", "Tasmania",
"Northern Territory", "Australian Capital Territory"),
path = Sys.getenv("R_READABS_PATH", unset = tempdir()),
show_progress_bars = TRUE,
check_local = FALSE,
retain_files = FALSE) {
if (!is.numeric(age_range)) {
stop("The age_range argument must be a numeric vector.")

Check warning on line 54 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L53-L54

Added lines #L53 - L54 were not covered by tests
}

# Check if all elements are integers
if (any(age_range != as.integer(age_range))) {
stop("All elements in age_range must be integers.")

Check warning on line 59 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L58-L59

Added lines #L58 - L59 were not covered by tests
}

# Check if all elements are between 0 and 100
if (any(age_range < 0 | age_range > 100)) {
stop("All elements in age_range must be between 0 and 100.")

Check warning on line 64 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L63-L64

Added lines #L63 - L64 were not covered by tests
}

# Restrict the 'sex' argument to the valid choices
sex_options <- c("Persons", "Male", "Female")

Check warning on line 68 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L68

Added line #L68 was not covered by tests

# Check if the input is a subset of the valid options
if (!all(sex %in% sex_options)) {
invalid_values <- sex[!sex %in% sex_options]
stop("Invalid value(s): ", paste(invalid_values, collapse = ", "),
". Allowed values are: Persons, Male, Female.")

Check warning on line 74 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L71-L74

Added lines #L71 - L74 were not covered by tests
}

# Restrict the states argument to valid choices but include abbreviations
# Always return the full name if an abbreviation has been used.
stes <- unname(vapply(states, validate_state, FUN.VALUE = character(1)))

Check warning on line 79 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L79

Added line #L79 was not covered by tests

if (!is.logical(retain_files)) {
stop("`retain_files` must be either `TRUE` or `FALSE`.")

Check warning on line 82 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L81-L82

Added lines #L81 - L82 were not covered by tests
}

if (!is.logical(show_progress_bars)) {
stop("`show_progress_bars` must be either `TRUE` or `FALSE`")

Check warning on line 86 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L85-L86

Added lines #L85 - L86 were not covered by tests
}

if (retain_files == TRUE & !is.character(path)) {
stop(
"If `retain_files` is `TRUE`,",
" you must specify a valid file path in `path`."

Check warning on line 92 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L89-L92

Added lines #L89 - L92 were not covered by tests
)
}

ste_tbls <- state_lookup$tbl_n[state_lookup$full_name %in% stes]

Check warning on line 96 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L96

Added line #L96 was not covered by tests

erp_raw <- read_abs(
cat_no = "3101.0",
tables = ste_tbls,
retain_files = retain_files,
check_local = check_local,
show_progress_bars = show_progress_bars,
path = path

Check warning on line 104 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L98-L104

Added lines #L98 - L104 were not covered by tests
)


x <- erp_raw %>%
dplyr::mutate(
age = gsub("[^0-9]", "", series),
series_sex = gsub(".*;\\s*(Male|Female|Persons)\\s*;.*", "\\1", series),
state = trimws(gsub(".*,(\\s*[^,]+)$", "\\1", table_title))

Check warning on line 112 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L108-L112

Added lines #L108 - L112 were not covered by tests
) %>%
filter(age %in% age_range,
series_sex %in% sex) %>%
group_by(date, series_sex, state) %>%
summarise(erp = sum(value)) %>%
arrange(state, series_sex, date) %>%
select(date, state, sex=series_sex, erp)

Check warning on line 119 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L114-L119

Added lines #L114 - L119 were not covered by tests

x

Check warning on line 121 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L121

Added line #L121 was not covered by tests
}

# Lookup table of state/territory names, abbreviations and ABS ERP table numbers
state_lookup <- tibble(full_name=c("Australia", "New South Wales", "Victoria", "Queensland",
"South Australia", "Western Australia", "Tasmania",
"Northern Territory", "Australian Capital Territory"),
abbrev=c("Aus", "NSW", "Vic", "Qld", "SA", "WA", "Tas", "NT", "ACT"),
tbl_n=c(59, 51:58))

# Ensure that user specified state/territory names are acceptable. Allow full
# names or abbreviations. Throw an error if they are incorrect.
# Always return the full name.
validate_state <- function(state) {
# Define valid states and their abbreviations
valid_states <- state_lookup$full_name
valid_abbreviations <- state_lookup$abbrev

Check warning on line 137 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L136-L137

Added lines #L136 - L137 were not covered by tests

# Allow trailing periods in abbreviations
valid_abbreviations_with_period <- paste0(valid_abbreviations, ".")

Check warning on line 140 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L140

Added line #L140 was not covered by tests

# Combine all valid inputs
all_valid_inputs <- c(valid_states, valid_abbreviations, valid_abbreviations_with_period)

Check warning on line 143 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L143

Added line #L143 was not covered by tests

# Create a lookup table (map all variants to full state names)
state_map <- setNames(rep(valid_states, times = 3),
c(valid_states, valid_abbreviations, valid_abbreviations_with_period))

Check warning on line 147 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L146-L147

Added lines #L146 - L147 were not covered by tests

# Standardize input to lowercase and remove trailing periods in case
# people do things like "Vic." or "Tas."
state_cleaned <- gsub("\\.$", "", state) # Remove trailing period
state_cleaned <- tolower(state_cleaned) # Convert to lowercase

Check warning on line 152 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L151-L152

Added lines #L151 - L152 were not covered by tests

# Create a cleaned version of the map for case-insensitive matching
names(state_map) <- tolower(names(state_map))

Check warning on line 155 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L155

Added line #L155 was not covered by tests

# Check if the input matches a valid state or abbreviation
if (!state_cleaned %in% names(state_map)) {
stop("Invalid state. Please use a valid state name or abbreviation.")

Check warning on line 159 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L158-L159

Added lines #L158 - L159 were not covered by tests
}

# Return the standard full state name
return(unname(tools::toTitleCase(state_map[state_cleaned])))

Check warning on line 163 in R/read_erp.R

View check run for this annotation

Codecov / codecov/patch

R/read_erp.R#L163

Added line #L163 was not covered by tests
}
Loading