Skip to content

Commit

Permalink
Variable population size update (#78)
Browse files Browse the repository at this point in the history
* Variable pop size for simulate functions

* Update GeoTox.print

* Message format consistency

* Ran devtools::document()

* Added set_population function

* Updated paper doi

* Increment version number to 0.2.0.9001

* Updated NEWS

* Added set_population to _pkgdown.yml

* Updated documentation

* Fix for devtools::check() note
  • Loading branch information
SkylarMarvel authored Jan 31, 2025
1 parent f2b0809 commit c1a6017
Show file tree
Hide file tree
Showing 26 changed files with 567 additions and 155 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GeoTox
Title: Spatiotemporal Mixture Risk Assessment
Version: 0.2.0.9000
Version: 0.2.0.9001
Authors@R: c(
person("Skylar", "Marvel", , "skylar.marvel@nih.gov", role = c("aut"),
comment = c(ORCID = "0000-0002-2971-9743")),
Expand All @@ -12,8 +12,8 @@ Authors@R: c(
)
Description: Connecting spatiotemporal exposure to individual and
population-level risk via source-to-outcome continuum modeling. The package,
methods, and case-studies are described in Messier, Reif, and Marvel (2024)
<doi:10.1101/2024.09.23.24314096> and Eccles et al. (2023)
methods, and case-studies are described in Messier, Reif, and Marvel (2025)
<doi:10.1186/s40246-024-00711-8> and Eccles et al. (2023)
<doi:10.1016/j.scitotenv.2022.158905>.
License: MIT + file LICENSE
URL: https://niehs.github.io/GeoTox/, https://github.com/NIEHS/GeoTox
Expand All @@ -23,6 +23,7 @@ Imports:
dplyr,
ggplot2,
ggridges,
purrr,
rlang,
sf,
stats,
Expand All @@ -37,7 +38,6 @@ Suggests:
httk,
httr2,
knitr,
purrr,
readr,
readxl,
rmarkdown,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export(sample_Css)
export(sensitivity_analysis)
export(set_boundaries)
export(set_hill_params)
export(set_population)
export(simulate_age)
export(simulate_exposure)
export(simulate_inhalation_rate)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# GeoTox (development version)

* Added set_population() function.

* The simulate\_\* functions can now handle population sizes that vary
across regions.

# GeoTox 0.2.0

* Initial CRAN submission.
150 changes: 88 additions & 62 deletions R/GeoTox.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@
#' # Plot exposure data
#' plot(geoTox, type = "exposure", ncol = 5)
#' # Plot response data
#' plot(geoTox)
#' plot(geoTox, assays = "TOX21_H2AX_HTRF_CHO_Agonist_ratio")
#' # Plot sensitivity data
#' plot(geoTox, type = "sensitivity")
#' plot(geoTox, type = "sensitivity", assay = "TOX21_H2AX_HTRF_CHO_Agonist_ratio")
#' plot(geoTox,
#' type = "sensitivity",
#' assay = "TOX21_H2AX_HTRF_CHO_Agonist_ratio")
GeoTox <- function() {
structure(
list(
Expand All @@ -70,78 +70,104 @@ GeoTox <- function() {
#' @export
print.GeoTox <- function(x, ...) {

names_simulated <- c("age", "IR", "obesity", "C_ext", "C_ss")
names_computed <- c("D_int", "C_invitro", "resp", "sensitivity")
names_other <- setdiff(names(x),
c(names_simulated, names_computed))

get_info <- function(names) {
info <- lapply(names, \(name) {
class <- dim <- ""
if (is.null(x[[name]])) {
return(data.frame(Name = name, Class = "", Dim = ""))
}
is_list <- inherits(x[[name]], "list")
if (is_list && length(x[[name]]) > 0) {
item <- x[[name]][[1]]
} else if (!is_list) {
item <- x[[name]]
} else {
item <- NULL
}
class <- class(item)
if (any(c("matrix", "data.frame") %in% class)) {
dim <- paste(dim(item), collapse = " x ")
} else {
dim <- length(item)
}
if (is_list) {
dim <- paste0(length(x[[name]]), " x (", dim, ")")
class <- paste0("list(", class[[1]], ")")
} else {
class <- paste(class, collapse = ", ")
}
data.frame(Name = name, Class = class, Dim = dim)
})
do.call(rbind, info)
}

info_simulated <- get_info(names_simulated)
info_simulated <- info_simulated[info_simulated$Class != "", , drop = FALSE]
info_computed <- get_info(names_computed)
info_computed <- info_computed[info_computed$Class != "", , drop = FALSE]

cat("GeoTox object\n")
# Get n_assay and n_chem from GeoTox()$hill_params
if (is.null(x$hill_params)) {
n_assays <- 0
n_chems <- 0
n_assay <- 0
n_chem <- 0
} else {
if ("assay" %in% names(x$hill_params)) {
n_assays <- length(unique(x$hill_params$assay))
n_assay <- length(unique(x$hill_params$assay))
} else {
n_assays <- 1
n_assay <- 1
}
if ("chem" %in% names(x$hill_params)) {
n_chems <- length(unique(x$hill_params$chem))
n_chem <- length(unique(x$hill_params$chem))
} else {
n_chems <- 1
n_chem <- 1
}
}

# Categorize different GeoTox() fields
names_data_vec <- c("age", "IR", "obesity")
names_data_mat <- c("C_ext", "C_ss")
names_computed_mat <- c("D_int", "C_invitro")
names_computed_df <- c("resp")
names_computed_list <- c("sensitivity")
names_other <- setdiff(names(x),
c(names_data_vec, names_data_mat,
names_computed_mat, names_computed_df,
names_computed_list))

# Functions to get size info for each type of field
# m = number of regions
# n = population size
get_info_vec <- function(name) {
size <- ifelse(is.null(x[[name]]), "", "m * (n)")
data.frame(Name = name, Size = size)
}
get_info_mat <- function(name) {
size <- ""
if (!is.null(x[[name]])) {
dim <- dim(x[[name]][[1]])
size <- paste0("m * (n x ", dim[2], ")")
}
data.frame(Name = name, Size = size)
}
get_info_df <- function(name) {
size <- ""
if (!is.null(x[[name]])) {
dim <- dim(x[[name]][[1]])
size <- paste0("m * (", n_assay, " * n x ", dim[2], ")")
}
data.frame(Name = name, Size = size)
}
cat("Assays: ", n_assays, "\n", sep = "")
cat("Chemicals: ", n_chems, "\n", sep = "")
if (nrow(info_simulated) > 0) {
n_regions <- length(x[[info_simulated$Name[1]]])
} else if (nrow(info_computed) > 0) {
n_regions <- length(x[[info_computed$Name[1]]])
get_info_list <- function(name) {
size <- ""
if (!is.null(x[[name]])) {
n_list <- length(x[[name]])
dim <- dim(x[[name]][[1]][[1]])
size <- paste0(n_list, " * (m * (", n_assay, " * n x ", dim[2], "))")
}
data.frame(Name = name, Size = size)
}

# Get size info for each type of field
info_data <- dplyr::bind_rows(
purrr::map(names_data_vec, \(name) get_info_vec(name)),
purrr::map(names_data_mat, \(name) get_info_mat(name))) |>
dplyr::filter(.data$Size != "")

info_computed <- dplyr::bind_rows(
purrr::map(names_computed_mat, \(name) get_info_mat(name)),
purrr::map(names_computed_df, \(name) get_info_df(name)),
purrr::map(names_computed_list, \(name) get_info_list(name))) |>
dplyr::filter(.data$Size != "")

# Get population size from GeoTox()$par$n
if (is.null(x$par$n)) {
n_pop <- 0
} else if (length(unique(x$par$n)) == 1) {
n_pop <- x$par$n[[1]]
} else {
n_regions <- 0
n_pop <- paste0("[", paste(range(x$par$n), collapse = ", "), "]")
}
cat("Regions: ", n_regions, "\n", sep = "")
cat("Population: ", x$par$n, "\n", sep = "")

# Get number of regions from potential data fields
n_region <- purrr::map_int(c(names_data_vec, names_data_mat,
names_computed_mat, names_computed_df),
\(name) length(x[[name]])) |>
max()

# Output info
cat("GeoTox object\n")
cat("Assays: ", n_assay, "\n", sep = "")
cat("Chemicals: ", n_chem, "\n", sep = "")
cat("Regions: m = ", n_region, "\n", sep = "")
cat("Population: n = ", n_pop, "\n", sep = "")
cat("Data Fields:")
if (nrow(info_simulated) > 0) {
if (nrow(info_data) > 0) {
cat("\n")
print(info_simulated, row.names = FALSE, print.gap = 2)
print(info_data, row.names = FALSE, print.gap = 2)
} else {
cat(" None\n")
}
Expand Down
78 changes: 78 additions & 0 deletions R/set_population.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Set population data
#'
#' @param x GeoTox object.
#' @param age numeric vector or list of numeric vectors of age values.
#' @param obesity character vector or list of character vectors of obesity
#' status.
#'
#' @return The same object with simulated fields added.
#' @export
#'
#' @examples
#' # Single region
#' age <- round(runif(10, 1, 100))
#' obesity <- sample(c("Normal", "Obese"), 10, replace = TRUE)
#' geoTox <- set_population(GeoTox(), age = age, obesity = obesity)
#'
#' # Multiple regions
#' age <- list("37001" = round(runif(10, 1, 100)),
#' "37007" = round(runif(8, 1, 100)))
#' obesity <- list("37001" = sample(c("Normal", "Obese"), 10, replace = TRUE),
#' "37007" = sample(c("Normal", "Obese"), 8, replace = TRUE))
#' geoTox <- set_population(GeoTox(), age = age, obesity = obesity)
set_population <- function(x, age = NULL, obesity = NULL) {

set_age <- !is.null(age)
set_obesity <- !is.null(obesity)

if (set_age) {
age <- .check_types(age,
c("numeric", "integer"),
paste0("`age` must be a numeric vector or list of ",
"numeric vectors"))
}

if (set_obesity) {
obesity <- .check_types(obesity,
"character",
paste0("`obesity` must be a character vector or ",
"list of character vectors"))
if (any(purrr::map_lgl(obesity,
\(x) !all(x %in% c("Normal", "Obese"))))) {
stop("`obesity` values must be 'Normal' or 'Obese'", call. = FALSE)
}
}

# Update population size
if (set_age) n_age <- purrr::map_int(age, length)
if (set_obesity) n_obesity <- purrr::map_int(obesity, length)
if (set_age & set_obesity) {
if (!identical(n_age, n_obesity)) {
stop("Population sizes for `age` and `obesity` do not match",
call. = FALSE)
}
x$par$n <- n_age
} else if (set_age) {
x$par$n <- n_age
} else if (set_obesity) {
x$par$n <- n_obesity
}

# Set fields
if (set_age) x$age <- age
if (set_obesity) x$obesity <- obesity

# Clear downstream fields
if (set_age & !is.null(x$IR)) {
warning("Clearing `IR` field", call. = FALSE)
x$IR <- NULL
}
if ((set_age | set_obesity) &
!(is.null(x$C_ss) & is.null(x$css_sensitivity))) {
warning("Clearing `C_ss` and `css_sensitivity` fields", call. = FALSE)
x$C_ss <- NULL
x$css_sensitivity <- NULL
}

x
}
30 changes: 20 additions & 10 deletions R/simulate_age.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
#'
#' @param x data frame or list of data frames containing population data for age
#' groups. Each data frame must contain columns "AGEGRP" and "TOT_POP".
#' @param n simulated sample size.
#' @param n simulated sample size(s).
#'
#' @details
#' Each data frame must contain 19 rows. The first row represents the total
#' population of all age groups while the next 18 rows represent age groups
#' from 0 to 89 in increments of 5 years.
#'
#' The sample size can be either a single value or a vector the same length as
#' the number of data frames in x. If a single value is provided, the same
#' sample size is used for all data frames. If a vector is provided, each
#' element corresponds to the sample size for each data frame in x.
#'
#' @return List of arrays containing simulated ages.
#'
#' @examples
Expand All @@ -25,25 +30,30 @@
#' # set population total for all age groups
#' y$TOT_POP[1] <- sum(y$TOT_POP)
#' simulate_age(list(x = x, y = y), 15)
#' # different sample sizes
#' simulate_age(list(x = x, y = y), c(15, 10))
#'
#' @export
simulate_age <- function(x, n = 1e3) {

if (!any(c("data.frame", "list") %in% class(x))) {
stop("x must be a data.frame or list")
}

if (is.data.frame(x)) x <- list(x)
x <- .check_types(x,
"data.frame",
"`x` must be a data frame or list of data frames")

if (.check_names(x, c("AGEGRP", "TOT_POP"))) {
stop("x data frames must contain columns 'AGEGRP' and 'TOT_POP'")
stop("`x` data frames must contain columns 'AGEGRP' and 'TOT_POP'")
}

if (any(unlist(lapply(x, \(y) nrow(y) != 19)))) {
stop("`x` data frames must contain 19 rows")
}

if (any(unlist(lapply(x, function(y) nrow(y) != 19)))) {
stop("x data frames must contain 19 rows")
if (!(length(n) == 1 | length(n) == length(x))) {
stop("`n` must be a single value or a vector with values for ",
"each data frame in `x`")
}

lapply(x, function(df) .simulate_age(df, n))
purrr::pmap(list(x, n), \(df, n) .simulate_age(df, n))

}

Expand Down
Loading

0 comments on commit c1a6017

Please sign in to comment.