From 07a91ff0d00465afd50206d8b6ca764387eb6758 Mon Sep 17 00:00:00 2001 From: Angus Moore <18376371+angusmoore@users.noreply.github.com> Date: Fri, 28 Jun 2024 14:41:25 +1000 Subject: [PATCH 1/4] Add support for bespoke MRM formatting in `read_lfs_datacube` --- R/download_data_cube.r | 54 +++++++++++++++++++----- tests/testthat/test-download_data_cube.R | 7 +++ 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/R/download_data_cube.r b/R/download_data_cube.r index cc9e0d8..99bb0cb 100644 --- a/R/download_data_cube.r +++ b/R/download_data_cube.r @@ -99,6 +99,35 @@ download_abs_data_cube <- function(catalogue_string, return(invisible(filepath)) } +read_lfs_mrm_table <- function(file, sheet, variable_name) { + df <- file |> + readxl::read_excel( + sheet = sheet, + skip = 4 + ) |> + tidyr::pivot_longer(!tidyr::matches("SA4"), names_to = "date", values_to = "value") |> + dplyr::mutate(date = as.numeric(.data$date)) |> + dplyr::filter(!is.na({{variable_name}})) |> + dplyr::mutate(SA4_code = substr(.data$SA4, 1, 3)) |> + dplyr::mutate(SA4_name = substr(.data$SA4, 5, nchar(.data$SA4))) |> + dplyr::mutate( + variable = variable_name, + date = as.Date(.data$date, origin = "1899-12-30") + ) + df[, c("SA4_code", "SA4_name", "variable", "date", "value")] +} + +read_lfs_mrm <- function(file) { + bind_rows( + read_lfs_mrm_table(file, "Table 1", "employed_persons_000s"), + read_lfs_mrm_table(file, "Table 2", "unemployed_persons_000s"), + read_lfs_mrm_table(file, "Table 3", "nilf_persons_000s"), + read_lfs_mrm_table(file, "Table 4", "emp_to_pop_ratio"), + read_lfs_mrm_table(file, "Table 5", "unemployment_rate"), + read_lfs_mrm_table(file, "Table 6", "participation_rate") + ) +} + #' Convenience function to download and tidy data cubes from #' ABS Labour Force, Australia, Detailed. #' @param cube character. A character string that is either the complete filename @@ -118,17 +147,22 @@ read_lfs_datacube <- function(cube, cube = cube, path = path ) - df <- file |> - readxl::read_excel( - sheet = "Data 1", - skip = 3 - ) |> - rename(date = 1) %>% - mutate(date = as.Date(date)) - colnames(df) <- tolower(colnames(df)) - colnames(df) <- gsub(" |-|:", "_", colnames(df)) - colnames(df) <- gsub("\\(|\\)|\\'", "", colnames(df)) + if (cube == "MRM" || cube == "MRM1") { + df <- read_lfs_mrm(file) + } else { + df <- file |> + readxl::read_excel( + sheet = "Data 1", + skip = 3 + ) |> + rename(date = 1) %>% + mutate(date = as.Date(date)) + + colnames(df) <- tolower(colnames(df)) + colnames(df) <- gsub(" |-|:", "_", colnames(df)) + colnames(df) <- gsub("\\(|\\)|\\'", "", colnames(df)) + } df } diff --git a/tests/testthat/test-download_data_cube.R b/tests/testthat/test-download_data_cube.R index f8351bc..f41d211 100644 --- a/tests/testthat/test-download_data_cube.R +++ b/tests/testthat/test-download_data_cube.R @@ -9,3 +9,10 @@ test_that("read_lfs_datacube", { expect_length(lm1, 10) expect_gt(nrow(lm1), 300000) }) + +# Test for MRM, which has a different format, and there parsing logic +test_that("read_lfs_datacube - MRM", { + mrm1 <- read_lfs_datacube("MRM1") + expect_s3_class(mrm1, "tbl_df") + expect_gt(nrow(mrm1), 30000) +}) From 484b0a750200251807544138c0d303fe8ad6327e Mon Sep 17 00:00:00 2001 From: Matt Cowgill Date: Sun, 4 Aug 2024 09:23:58 +1000 Subject: [PATCH 2/4] iterate version, add @AngusMoore as ctb --- DESCRIPTION | 6 ++++-- NEWS.md | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index da57ac2..a847c82 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,16 @@ Package: readabs Type: Package Title: Download and Tidy Time Series Data from the Australian Bureau of Statistics -Version: 0.4.16.903 +Version: 0.4.16.904 Authors@R: c( person("Matt", "Cowgill", role = c("aut", "cre"), email = "mattcowgill@gmail.com", comment = c(ORCID = "0000-0003-0422-3300")), person("Zoe", "Meers", role = "aut", email = "zoe.meers@sydney.edu.au"), person("Jaron", "Lee", role = "aut", email = "jaron.lee@sydney.edu.au"), person("David", "Diviny", role = "aut", email = "David.Diviny@nousgroup.com.au"), person("Hugh", "Parsonage", role = "ctb", email = "hugh.parsonage@gmail.com"), - person("Kinto", "Behr", role = "ctb", email = "kinto.behr@gmail.com")) + person("Kinto", "Behr", role = "ctb", email = "kinto.behr@gmail.com") + person("Angus", "Moore", role = "ctb") + ) Maintainer: Matt Cowgill Description: Downloads, imports, and tidies time series data from the Australian Bureau of Statistics . diff --git a/NEWS.md b/NEWS.md index c2de488..cf8c56e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # readabs development version * The experimental `readabs::read_api()` function no longer coerces columns describing the data categories to numeric. Thanks to @kletts. +* read_lfs_datacube() now able to fetch modelled SA4 labour force estimates, thanks to @AngusMoore * Bug fix to read_job_mobility() # readabs 0.4.16 From bd09b03a1d3694749bdc0583f69b6439a602a24a Mon Sep 17 00:00:00 2001 From: Matt Cowgill Date: Sun, 4 Aug 2024 10:03:49 +1000 Subject: [PATCH 3/4] fix DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a847c82..827a26d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,7 @@ Authors@R: c( person("Jaron", "Lee", role = "aut", email = "jaron.lee@sydney.edu.au"), person("David", "Diviny", role = "aut", email = "David.Diviny@nousgroup.com.au"), person("Hugh", "Parsonage", role = "ctb", email = "hugh.parsonage@gmail.com"), - person("Kinto", "Behr", role = "ctb", email = "kinto.behr@gmail.com") + person("Kinto", "Behr", role = "ctb", email = "kinto.behr@gmail.com"), person("Angus", "Moore", role = "ctb") ) Maintainer: Matt Cowgill From 3af673ef7af92fbca052d396beca5a6521a6ad60 Mon Sep 17 00:00:00 2001 From: Matt Cowgill Date: Sun, 4 Aug 2024 10:04:31 +1000 Subject: [PATCH 4/4] use newer Roxygen --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 827a26d..60e2a17 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,7 @@ Imports: labelled URL: https://github.com/mattcowgill/readabs BugReports: https://github.com/mattcowgill/readabs/issues -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr Suggests: knitr,