Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

run data-raw files + bug fixes #113

Merged
merged 2 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Maintainer: Will Mackey <wfmackey@gmail.com>
License: GPL-3
Description: This package provides data and functions for working with common structures and classifications used in Australia.
Depends:
R (>= 3.5)
R (>= 3.5)
Imports:
dplyr (>= 0.7),
lifecycle,
Expand All @@ -33,11 +33,14 @@ Imports:
parsedate
Suggests:
testthat,
curl,
sf,
here,
ggplot2,
bench
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
URL: https://runapp-aus.github.io/strayr/
BugReports: https://github.com/runapp-aus/strayr/issues/
RdMacros: lifecycle
18 changes: 9 additions & 9 deletions R/seifa.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
#' \url{https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021}

#' @param structure character value for the desired spatial area. Must be one of:
#' \itemize{
#' \describe{
#' \item{sa1}{ - download size 51.6 MB}
#' \item{sa2}{ - download size 1.9 MB}
#' \item{lga}{ - download size 660 KB}
#' \item{postcode}{ - download size 2.3 MB}
#' \item{suburb}{ - download size 11.3 MB}
#' }
#' @param data_subclass character vector matching available SEIFA indexes:
#' \itemize{
#' \describe{
#' \item{irsed}{ - Index of Relative Socio-economic Disadvantage}
#' \item{irsead}{ - Index of Relative Socio-economic Advantage and Disadvantage}
#' \item{ier}{ - Index of Economic Resources}
Expand Down Expand Up @@ -297,18 +297,18 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
col_names = column_names,
na = c("", "NA", "-")
) %>%
dplyr::filter(if_all(ends_with(c("_name","_code")), ~ !is.na(.x))) %>%
select(-starts_with("blank")) %>%
mutate(
dplyr::filter(dplyr::if_all(dplyr::ends_with(c("_name","_code")), ~ !is.na(.x))) %>%
dplyr::select(-dplyr::starts_with("blank")) %>%
dplyr::mutate(
structure = structure,
year = year
) %>%
mutate(across(
.cols = any_of(ends_with("_code")), # Specify the column name
dplyr::mutate(dplyr::across(
.cols = dplyr::any_of(ends_with("_code")), # Specify the column name
.fns = ~ as.character(.) # Conditionally convert to character
)) %>%
relocate(structure)
dplyr::relocate(structure)
})

return(df)
}
Binary file modified R/sysdata.rda
Binary file not shown.
131 changes: 79 additions & 52 deletions data-raw/create_anzsco2009.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,58 +20,75 @@ download.file(anzsco_url, temp_path, mode = "wb")

# Read
raw <- readxl::read_excel(temp_path,
sheet = 6,
range = "A11:G1555",
col_names = FALSE) %>%
sheet = 6,
range = "A11:G1555",
col_names = FALSE
) %>%
janitor::clean_names()

# Extract each level:
anzsco1 <- raw %>%
filter(!is.na(x1)) %>%
select(anzsco1_code = 1,
anzsco1 = 2) %>%
select(
anzsco1_code = 1,
anzsco1 = 2
) %>%
mutate(anzsco1_code = as.character(anzsco1_code))

anzsco2 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
filter(!is.na(x2)) %>%
select(anzsco2_code = 2,
anzsco2 = 3) %>%
select(
anzsco2_code = 2,
anzsco2 = 3
) %>%
mutate(anzsco1_code = substr(anzsco2_code, 1, 1))

anzsco3 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
filter(!is.na(x3)) %>%
select(anzsco3_code = 3,
anzsco3 = 4) %>%
mutate(anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco3_code = 3,
anzsco3 = 4
) %>%
mutate(
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

anzsco4 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
anti_join(anzsco3, by = c("x4" = "anzsco3")) %>%
filter(!is.na(x4)) %>%
select(anzsco4_code = 4,
anzsco4 = 5) %>%
mutate(anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco4_code = 4,
anzsco4 = 5
) %>%
mutate(
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

anzsco6 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
anti_join(anzsco3, by = c("x4" = "anzsco3")) %>%
anti_join(anzsco4, by = c("x5" = "anzsco4")) %>%
filter(!is.na(x5)) %>%
select(anzsco6_code = 5,
anzsco6 = 6,
skill_level = 7) %>%
mutate(anzsco4_code = substr(anzsco6_code, 1, 4),
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco6_code = 5,
anzsco6 = 6,
skill_level = 7
) %>%
mutate(
anzsco4_code = substr(anzsco6_code, 1, 4),
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

# Join into wide anzscoupation list
comb <- anzsco1 %>%
Expand All @@ -86,39 +103,47 @@ comb <- anzsco1 %>%
nfd1 <- comb %>%
select(anzsco1_code, anzsco1) %>%
distinct() %>%
mutate(anzsco2 = glue("{anzsco1}, nfd"),
anzsco2_code = glue("{anzsco1_code}0"),
anzsco3 = glue("{anzsco1}, nfd"),
anzsco3_code = glue("{anzsco1_code}00"),
anzsco4 = glue("{anzsco1}, nfd"),
anzsco4_code = glue("{anzsco1_code}000"),
anzsco6 = glue("{anzsco1}, nfd"),
anzsco6_code = glue("{anzsco1_code}00000"))
mutate(
anzsco2 = glue("{anzsco1}, nfd"),
anzsco2_code = glue("{anzsco1_code}0"),
anzsco3 = glue("{anzsco1}, nfd"),
anzsco3_code = glue("{anzsco1_code}00"),
anzsco4 = glue("{anzsco1}, nfd"),
anzsco4_code = glue("{anzsco1_code}000"),
anzsco6 = glue("{anzsco1}, nfd"),
anzsco6_code = glue("{anzsco1_code}00000")
)

nfd2 <- comb %>%
select(anzsco1_code, anzsco1, anzsco2_code, anzsco2) %>%
distinct() %>%
mutate(anzsco3 = glue("{anzsco2}, nfd"),
anzsco3_code = glue("{anzsco2_code}0"),
anzsco4 = glue("{anzsco2}, nfd"),
anzsco4_code = glue("{anzsco2_code}00"),
anzsco6 = glue("{anzsco2}, nfd"),
anzsco6_code = glue("{anzsco2_code}0000"))
mutate(
anzsco3 = glue("{anzsco2}, nfd"),
anzsco3_code = glue("{anzsco2_code}0"),
anzsco4 = glue("{anzsco2}, nfd"),
anzsco4_code = glue("{anzsco2_code}00"),
anzsco6 = glue("{anzsco2}, nfd"),
anzsco6_code = glue("{anzsco2_code}0000")
)


nfd3 <- comb %>%
select(anzsco1_code, anzsco1, anzsco2_code, anzsco2, anzsco3_code, anzsco3) %>%
distinct() %>%
mutate(anzsco4 = glue("{anzsco3}, nfd"),
anzsco4_code = glue("{anzsco3_code}0"),
anzsco6 = glue("{anzsco3}, nfd"),
anzsco6_code = glue("{anzsco3_code}000"))
mutate(
anzsco4 = glue("{anzsco3}, nfd"),
anzsco4_code = glue("{anzsco3_code}0"),
anzsco6 = glue("{anzsco3}, nfd"),
anzsco6_code = glue("{anzsco3_code}000")
)

anzsco2009 <- comb %>%
bind_rows(nfd1, nfd2, nfd3) %>%
arrange(anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code) %>%
mutate(across(.fns = as.character)) %>%
arrange(
anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code
) %>%
mutate(across(everything(), .fns = as.character)) %>%
arrange(anzsco6_code)

if (include_factor_variants) {
Expand All @@ -129,15 +154,17 @@ if (include_factor_variants) {
anzsco3_f = as_factor(anzsco3),
anzsco4_f = as_factor(anzsco4),
anzsco6_f = as_factor(anzsco6),
skill_level = as_factor(skill_level)) %>%
skill_level = as_factor(skill_level)
) %>%
# order
select(anzsco1_code, anzsco1, anzsco1_f,
anzsco2_code, anzsco2, anzsco2_f,
anzsco3_code, anzsco3, anzsco3_f,
anzsco4_code, anzsco4, anzsco4_f,
anzsco6_code, anzsco6, anzsco6_f,
skill_level)

select(
anzsco1_code, anzsco1, anzsco1_f,
anzsco2_code, anzsco2, anzsco2_f,
anzsco3_code, anzsco3, anzsco3_f,
anzsco4_code, anzsco4, anzsco4_f,
anzsco6_code, anzsco6, anzsco6_f,
skill_level
)
}

# Rename using new conventions: https://github.com/runapp-aus/abscorr/issues/17
Expand Down
Loading
Loading