diff --git a/DESCRIPTION b/DESCRIPTION index ed5685a..12932c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,9 @@ Imports: rlang, stats, segmented, - chngpt + chngpt, + mclust, + minpack.lm Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 5c9d92b..cd66e35 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,18 +1,21 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(broken_stick) export(broken_stick_stevens) +export(cluster_mods) export(fake_crustaceans) export(infl_pt) +export(piecewise_mods) export(regrans) export(somerton) export(two_line) export(two_line_logistic) import(ggplot2) import(rlang) -importFrom(ggplot2,aes) importFrom(glue,glue) importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") -importFrom(rlang,.data) +importFrom(mclust,Mclust) +importFrom(minpack.lm,nls.lm.control) +importFrom(minpack.lm,nlsLM) +importFrom(splus2R,peaks) diff --git a/R/broken_stick.R b/R/broken_stick.R deleted file mode 100644 index 83adc81..0000000 --- a/R/broken_stick.R +++ /dev/null @@ -1,117 +0,0 @@ -#' Broken-stick (segmented) approaches to estimating SM50 -#' -#' A wrapper function allowing for multiple methods of broken-stick regression -#' to be applied using a standard format for inputs. See -#' `vignette("broken-stick")` for more information. -#' -#' @param dat data frame or matrix containing the data -#' @param xvar Name of column (integer or double) of measurements for the x-axis -#' variable (e.g., carapace width). -#' @param yvar Name of column (integer or double) of measurements for the y-axis -#' variable (e.g., claw height). -#' @param method Method to use for the regression. A single string or string -#' vector containing one or more of c("segmented", "chngpt", "regrans", -#' "stevens"), or "all" to return the results of all methods for comparison. -#' @param ci Integer; type of confidence intervals to return for SM50, defaults -#' to 95%. -#' @param trans Transformation to be applied to the data before performing the -#' regression: "none", "log" (both variables are log-transformed), or "std" -#' (both variables are standardized = scaled and centered). If no string is -#' provided, no transformation is performed (i.e., the default is "none"). -#' @param upper Integer or double; the upper bound for possible SM50 values. -#' Must be on the same trans of the data. Defaults to the 80th percentile of -#' the x-variable. -#' @param lower Integer or double; the lower bound for possible SM50 values. -#' Must be on the same trans of the data. Defaults to the 20th percentile of -#' the x-variable. -#' -#' @returns An estimate of SM50 from the specified method(s). -#' @export -#' -#' @examples -#' set.seed(12) -#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) -#' broken_stick(fc, xvar = "x", yvar = "y", method = c("segmented", "chngpt")) -broken_stick <- function(dat, - xvar, - yvar, - ci = 95, - lower = NULL, - upper = NULL, - trans = "none", - method = c("segmented", "chngpt", "regrans", "stevens", "all")) { - out <- c() - - if (trans == "log") { - dat$xvar <- log(dat[[xvar]]) - dat$yvar <- log(dat[[yvar]]) - } - else if (trans == "std") { - dat$xvar <- scale(dat[[xvar]]) - dat$xvar <- scale(dat[[yvar]]) - } - else { - dat$xvar <- dat[[xvar]] - dat$yvar <- dat[[yvar]] - } - - if (is.null(lower)) { - lower <- stats::quantile(dat[[xvar]], 0.2) - } - - if (is.null(upper)) { - upper <- stats::quantile(dat[[xvar]], 0.8) - } - - if ("chngpt" %in% method | "all" %in% method) { - - temp <- chngpt::chngptm( - formula.1 = yvar ~ 1, - formula.2 = ~ xvar, - family = "gaussian", - data = dat, - type = "segmented", - var.type = "default", - weights = NULL, - lb.quantile = 0.2, - ub.quantile = 0.8, - )$chngpt - - if (!("all" %in% method) & length(method)==1) { - out <- temp - } - else { - out <- append(out, c(chngpt=temp)) - } - } - - if ("segmented" %in% method | "all" %in% method) { - temp_lm <- stats::lm(yvar~xvar, data = dat) - seg_lm <- segmented::segmented(temp_lm)$psi[2] - out <- append(out, c(segmented=seg_lm)) - } - if ("regrans" %in% method | "all" %in% method) { - temp <- regrans(dat, xvar, yvar, verbose = FALSE) - if (!("all" %in% method) & length(method)==1) { - out <- temp - } - else { - out <- append(out, c(REGRANS = temp)) - } - - } - if ("stevens" %in% method | "all" %in% method) { - temp <- broken_stick_stevens(dat, - xvar = xvar, - yvar = yvar, - verbose = FALSE) - if (!("all" %in% method) & length(method)==1) { - out <- temp - } - else { - out <- append(out, c(Stevens = temp)) - } - } - - return(out) -} diff --git a/R/broken_stick_stevens.R b/R/broken_stick_stevens.R index d42801b..5177f93 100644 --- a/R/broken_stick_stevens.R +++ b/R/broken_stick_stevens.R @@ -1,5 +1,12 @@ #' Broken-stick method from Bradley Stevens #' +#' @description Fits a broken-stick model to estimate size at maturity. Code +#' adapted from Dr. Bradley Stevens at the University of Maryland Eastern +#' Shore. Differs from the broken-stick methods implemented in `regrans()`, +#' `chngpt::chngpt()`, `segmented::segmented()`, `SiZer::piecewise.linear()`, +#' etc. in that only values of the x-axis variable present in the data are +#' tested as possible SM50 values. +#' #' @param dat data frame or matrix containing the data #' @param xvar Name of column (integer or double) of measurements for the x-axis #' variable (e.g., carapace width). @@ -11,6 +18,8 @@ #' @param lower Integer or double; the lower bound for possible SM50 values. #' Must be on the same scale of the data. Defaults to the 20th percentile of #' the x-variable. +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. #' @param verbose Should additional output be returned besides the SM50 #' estimate? #' @@ -30,19 +39,26 @@ broken_stick_stevens <- function(dat, yvar, lower = NULL, upper = NULL, + log = FALSE, verbose = FALSE) { - stevens <- dat %>% dplyr::arrange(.data[[xvar]]) + new_dat <- dat %>% dplyr::arrange(.data[[xvar]]) - xraw <- stevens[[xvar]] - yraw <- stevens[[yvar]] + if (isTRUE(log)) { + xraw <- log(new_dat[[xvar]]) + yraw <- log(new_dat[[yvar]]) + } + else { + xraw <- new_dat[[xvar]] + yraw <- new_dat[[yvar]] + } if (is.null(lower)) { - lower <- stats::quantile(xraw, 0.2) + lower <- stats::quantile(xraw, 0.2, names = FALSE) } if (is.null(upper)) { - upper <- stats::quantile(xraw, 0.8) + upper <- stats::quantile(xraw, 0.8, names = FALSE) } left_x <- (xraw <= lower) # T/F vector @@ -52,28 +68,28 @@ broken_stick_stevens <- function(dat, min_x <- xraw[low_ndx] # lowest T value min_y <- yraw[low_ndx] # lowest T value - stevens$xvar <- xraw - stevens$yvar <- yraw + new_dat$xvar <- xraw + new_dat$yvar <- yraw # null model - single line to describe both maturity stages - lm0 <- stats::lm(yvar ~ xvar, data = stevens) + lm0 <- stats::lm(yvar ~ xvar, data = new_dat) rss0 <- stats::anova(lm0)[[2, 2]] # residual sum of squares ms0 <- stats::anova(lm0)[[3]] # mean squared error F0 <- ms0[1] / ms0[2] # F value - n0 <- dim(stevens)[1] + n0 <- dim(new_dat)[1] rss_min <- rss0 mse0 <- mean(lm0$residuals ^ 2) # assign group membership # 1 = left line, 2 = right line - memb <- rep(1, nrow(stevens)) + memb <- rep(1, nrow(new_dat)) memb_low <- (xraw <= min_x) # T/F list if less than low range memb_high <- (yraw > min_y) # T/F list if GT than high range memb[memb_low] <- 1 # assign 1 to those < low memb[memb_high] <- 2 # assign 2 to those > high memb_sum1 <- summary(as.factor(memb)) - stevens$prior <- memb - stevens$group <- memb + new_dat$prior <- memb + new_dat$group <- memb run <- 0 @@ -83,7 +99,7 @@ broken_stick_stevens <- function(dat, # Left regression lm1 <- stats::lm( I(yvar[memb == 1] - min_y) ~ 0 + I(xvar[memb == 1] - min_x), - data = stevens + data = new_dat ) b1 <- stats::coef(lm1)[[1]] a1 <- min_y - (b1 * min_x) @@ -94,7 +110,7 @@ broken_stick_stevens <- function(dat, # Right regression lm2 <- stats::lm( I(yvar[memb == 2] - min_y) ~ 0 + I(xvar[memb == 2] - min_x), - data = stevens + data = new_dat ) b2 <- stats::coef(lm2)[[1]] a2 <- min_y - (b2 * min_x) @@ -126,26 +142,24 @@ broken_stick_stevens <- function(dat, # next point low_ndx <- low_ndx + 1 - min_x <- stevens$xvar[low_ndx] - min_y <- stevens$yvar[low_ndx] - memb_low <- stevens$xvar <= min_x # T/F list if less than low range - memb_high <- stevens$xvar > min_x # T/F list if GT than high range + min_x <- new_dat$xvar[low_ndx] + min_y <- new_dat$yvar[low_ndx] + memb_low <- new_dat$xvar <= min_x # T/F list if less than low range + memb_high <- new_dat$xvar > min_x # T/F list if GT than high range memb[memb_low] <- 1 # assign 1 to those < low memb[memb_high] <- 2 # assign 2 to those > high } # end loop SM50 <- joint_x - memb_low <- stevens$xvar <= joint_x # T/F list if less than low range - memb_high <- stevens$xvar > joint_x # T/F list if GT than high range + memb_low <- new_dat$xvar <= joint_x # T/F list if less than low range + memb_high <- new_dat$xvar > joint_x # T/F list if GT than high range memb[memb_low] <- 1 # assign 1 to those < low memb[memb_high] <- 2 # assign 2 to those > high - stevens$group <- memb - memb_sum2 <- summary(as.factor(stevens$group)) - n_tot <- sum(memb_sum2) + new_dat$group <- memb output <- list( - data = stevens %>% dplyr::select(-c("xvar", "yvar", "prior")), + data = new_dat %>% dplyr::select(-c("xvar", "yvar", "prior")), SM50 = SM50, imm_slope = b1_1, imm_int = a1_1, diff --git a/R/cluster.R b/R/cluster.R new file mode 100644 index 0000000..1da5a4e --- /dev/null +++ b/R/cluster.R @@ -0,0 +1,40 @@ +#' Classification approaches to estimating SM50 +#' +#' @param dat data frame or matrix containing the data +#' @param xvar Name of column (integer or double) of measurements for the x-axis +#' variable (e.g., carapace width). +#' @param yvar Name of column (integer or double) of measurements for the y-axis +#' variable (e.g., claw height). +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. +#' @param method Classification method to use. A single string or vector +#' containing one or more of c("mclust", "Somerton", "kmeans", "hclust", "infl_pt"), or "all" to return the results of all methods for comparison. +#' @returns An estimate of SM50 from the specified method(s). +#' @export +#' +#' @examples +#' set.seed(12) +#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) +#' cluster_mods(fc, xvar = "x", yvar = "y", method = c("kmeans")) +cluster_mods <- function(dat, + xvar, + yvar, + log = FALSE, + method = c("mclust", "Somerton", "kmeans", "hclust", "infl_pt", "all")) { + + new_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]]) + + if (isTRUE(log)) { + new_dat$xvar <- log(dat[[xvar]]) + new_dat$yvar <- log(dat[[yvar]]) + } + + if ("kmeans" %in% method | "all" %in% method) { + + out <- new_dat + +} + + return(out) + +} diff --git a/R/infl_pt.R b/R/infl_pt.R index 39ccf21..a779613 100644 --- a/R/infl_pt.R +++ b/R/infl_pt.R @@ -9,18 +9,18 @@ #' transition to maturity of a population of Tanner crabs #' (*Chionoecetes bairdi*) was evident by an increase in the #' log(claw height)/log(carapace width) ratio from below 0.2 to above -#' 0.2. `infl_pt_fun()` finds this discriminating line by creating a kernel +#' 0.2. `infl_pt()` finds this discriminating line by creating a kernel #' density estimate (visually similar to a smoothed histogram) of the #' y-var/x-var ratio for all points, then finding the local minimum separating #' the two peaks representing the maturity clusters. #' -#' @import ggplot2 -#' @importFrom rlang .data #' @param dat optional data frame or matrix containing the data #' @param x Name of column (or integer or double vector) containing measurements #' for the x-axis variable (e.g., carapace width). #' @param y Name of column (or integer or double vector) containing measurements #' for the y-axis variable (e.g., claw height). +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. #' @param plot Boolean; should a plot of the density curve with the identified #' minimum be created? #' @@ -37,13 +37,20 @@ #' z <- c(x, y) #' hist(z) #' dat1 <- data.frame(xvar = rep(1, 200), yvar = z) -#' infl_pt(dat1, "xvar", "yvar", TRUE) +#' infl_pt(dat1, "xvar", "yvar", plot = TRUE) #' fc <- fake_crustaceans(n = 100, allo_params = c(1, 0.2, 1.1, 0.2)) -#' infl_pt(fc, "x", "y", TRUE) +#' infl_pt(fc, "x", "y", plot = TRUE) +#' infl_pt(fc, "x", "y", log = TRUE, plot = TRUE) #' -infl_pt <- function(dat, x, y, plot = FALSE) { +infl_pt <- function(dat, x, y, log = FALSE, plot = FALSE) { # find the ratio between the two morphometric variables - ratio <- dat[[y]]/dat[[x]] + if (isTRUE(log)) { + ratio <- log(dat[[y]])/log(dat[[x]]) + } + else { + ratio <- dat[[y]]/dat[[x]] + } + # compute a kernel density estimate (essentially a smoothed histogram) density_test <- stats::density(ratio) diff --git a/R/morphmat-package.R b/R/morphmat-package.R index 9de949e..053c930 100644 --- a/R/morphmat-package.R +++ b/R/morphmat-package.R @@ -2,9 +2,13 @@ "_PACKAGE" ## usethis namespace: start +#' @import ggplot2 #' @import rlang -#' @importFrom ggplot2 aes #' @importFrom glue glue #' @importFrom lifecycle deprecated +#' @importFrom mclust Mclust +#' @importFrom splus2R peaks +#' @importFrom minpack.lm nls.lm.control +#' @importFrom minpack.lm nlsLM ## usethis namespace: end NULL diff --git a/R/piecewise.R b/R/piecewise.R new file mode 100644 index 0000000..a3effaa --- /dev/null +++ b/R/piecewise.R @@ -0,0 +1,149 @@ +#' Piecewise regression approaches to estimating SM50 +#' +#' @description A wrapper function allowing for multiple methods of piecewise regression to +#' be applied using a standard format for inputs. See `vignette("broken-stick")` +#' and `vignette("two-line")` for more information. +#' +#' @details The `two_line_logistic()` function is closely related but not included in this wrapper function because you will generally want more control over the initial values of the parameters and the nonlinear least-squares algorithm may not always converge. +#' +#' This function is primarily intended for easy comparison between the SM50 estimates produced by a variety of different piecewise regression models. For follow-up analyses, we recommend calling the specific function(s) of interest (`regrans()`, `broken_stick_stevens()`, `two_line()`, `segmented::segmented()`, or `chngpt::chngpt()`) and exploring how changing the upper and lower bounds for possible SM50 values and the number of breakpoints to be tested may influence the resulting estimates. +#' +#' @param dat data frame or matrix containing the data +#' @param xvar Name of column (integer or double) of measurements for the x-axis +#' variable (e.g., carapace width). +#' @param yvar Name of column (integer or double) of measurements for the y-axis +#' variable (e.g., claw height). +#' @param method Method to use for the regression. A single string or vector +#' containing one or more of c("segmented", "chngpt", "regrans", "stevens", +#' "TL"), or "all" to return the results of all methods for comparison. +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. +#' @param upper Integer or double; the upper bound for possible SM50 values. +#' Must be on the same trans of the data. Defaults to the 80th percentile of +#' the x-variable. +#' @param lower Integer or double; the lower bound for possible SM50 values. +#' Must be on the same trans of the data. Defaults to the 20th percentile of +#' the x-variable. +#' +#' @returns An estimate of SM50 from the specified method(s). +#' @export +#' +#' @examples +#' set.seed(12) +#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) +#' piecewise_mods(fc, xvar = "x", yvar = "y", method = c("segmented", "chngpt")) +piecewise_mods <- function(dat, + xvar, + yvar, + lower = NULL, + upper = NULL, + log = FALSE, + method = c("segmented", "chngpt", "regrans", "stevens", "TL", "all")) { + dat <- dat %>% dplyr::arrange(.data[[xvar]]) + out <- c() + + if (isTRUE(log)) { + dat$xvar <- log(dat[[xvar]]) + dat$yvar <- log(dat[[yvar]]) + } + else { + dat$xvar <- dat[[xvar]] + dat$yvar <- dat[[yvar]] + } + + if (is.null(lower)) { + lower <- stats::quantile(dat$xvar, 0.2, names = FALSE) + } + + if (is.null(upper)) { + upper <- stats::quantile(dat$xvar, 0.8, names = FALSE) + } + + if ("chngpt" %in% method | "all" %in% method) { + + temp <- chngpt::chngptm( + formula.1 = yvar~1, + formula.2 = ~xvar, + family = "gaussian", + data = dat, + type = "segmented", + var.type = "default", + weights = NULL, + lb.quantile = stats::ecdf(dat$xvar)(lower), + ub.quantile = stats::ecdf(dat$xvar)(upper), + )$chngpt + + if (!("all" %in% method) & length(method) == 1) { + out <- temp + } + else { + out <- append(out, c(chngpt = temp)) + } + } + + if ("segmented" %in% method | "all" %in% method) { + temp_lm <- stats::lm(yvar ~ xvar, data = dat) + seg_lm <- segmented::segmented(temp_lm)$psi[2] + + if (!("all" %in% method) & length(method) == 1) { + out <- seg_lm + } + else { + out <- append(out, c(segmented = seg_lm)) + } + } + + if ("regrans" %in% method | "all" %in% method) { + temp <- regrans( + dat, + xvar = "xvar", + yvar = "yvar", + lower = lower, + upper = upper, + verbose = FALSE) + + if (!("all" %in% method) & length(method) == 1) { + out <- temp + } + else { + out <- append(out, c(REGRANS = temp)) + } + } + + if ("stevens" %in% method | "all" %in% method) { + + temp <- broken_stick_stevens( + dat, + xvar = "xvar", + yvar = "yvar", + lower = lower, + upper = upper, + verbose = FALSE + ) + if (!("all" %in% method) & length(method) == 1) { + out <- temp + } + else { + out <- append(out, c(Stevens = temp)) + } + } + + if ("TL" %in% method | "all" %in% method) { + temp <- two_line( + dat, + xvar = "xvar", + yvar = "yvar", + lower = lower, + upper = upper, + verbose = FALSE + ) + if (!("all" %in% method) & length(method) == 1) { + out <- temp + } + else { + out <- append(out, c(Two_line = temp)) + } + } + + return(out) +} diff --git a/R/regrans.R b/R/regrans.R index 34cda0c..f82425e 100644 --- a/R/regrans.R +++ b/R/regrans.R @@ -11,6 +11,8 @@ #' @param upper Integer or double; the upper bound for possible SM50 values. #' Must be on the same scale of the data. Defaults to the 80th percentile of #' the x-variable. +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. #' @param verbose Return all breakpoints tested and their sum of squares, or #' only the estimated SM50? #' @param n_tries Number of breakpoints to test within the unknown range. @@ -26,22 +28,29 @@ #' regrans(fc, "x", "y", verbose = FALSE) #' head(regrans(fc, "x", "y", verbose = TRUE), n = 30) regrans <- function(dat, - xvar, - yvar, - lower = NULL, - upper = NULL, - verbose = FALSE, - n_tries = 100) { + xvar, + yvar, + lower = NULL, + upper = NULL, + log = FALSE, + verbose = FALSE, + n_tries = 100) { - x <- dat[[xvar]] - y <- dat[[yvar]] + if (isTRUE(log)) { + x <- log(dat[[xvar]]) + y <- log(dat[[yvar]]) + } + else { + x <- dat[[xvar]] + y <- dat[[yvar]] + } if (is.null(lower)) { - lower <- stats::quantile(x, 0.2) + lower <- stats::quantile(x, 0.2, names = FALSE) } if (is.null(upper)) { - upper <- stats::quantile(x, 0.8) + upper <- stats::quantile(x, 0.8, names = FALSE) } changept_choices <- seq(lower, upper, length.out = n_tries) diff --git a/R/somerton.R b/R/somerton.R index d0c1a24..d3c14aa 100644 --- a/R/somerton.R +++ b/R/somerton.R @@ -11,10 +11,8 @@ #' @param lower Integer or double; the lower bound for possible SM50 values. #' Must be on the same trans of the data. Defaults to the 20th percentile of #' the x-variable. -#' @param trans Transformation to be applied to the data before performing the -#' regression: "none", "log" (both variables are log-transformed), or "std" -#' (both variables are standardized = scaled and centered). If no string is -#' provided, no transformation is performed (i.e., the default is "none"). +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. #' @param max_iter Maximum number of iterations #' #' @returns Output is a list that also includes the input data frame with a @@ -30,36 +28,31 @@ #' mod <- glm(data = out_df, pred_mat_num ~ x, family = binomial(link = "logit")) #' unname(-coef(mod)[1] / coef(mod)[2]) somerton <- function( - dat, # data.frame with columns corresponding to xvar, yvar + dat, # data frame with columns corresponding to xvar and yvar xvar, # X variable yvar, # Y variable - trans = "none", # transformation to apply + log = FALSE, # log-transform the data before fitting the model? lower = NULL, # lower bound of unknown range upper = NULL, # upper bound of unknown range max_iter = 50 # maximum number of iterations ) { - if (is.null(lower)) { - lower <- stats::quantile(dat[[xvar]], 0.2) - } - - if (is.null(upper)) { - upper <- stats::quantile(dat[[xvar]], 0.8) - } - - if (trans == "log") { + if (isTRUE(log)) { dat$xvar <- log(dat[[xvar]]) dat$yvar <- log(dat[[yvar]]) } - else if (trans == "std") { - dat$xvar <- scale(dat[[xvar]]) - dat$xvar <- scale(dat[[yvar]]) - } else { dat$xvar <- dat[[xvar]] dat$yvar <- dat[[yvar]] } + if (is.null(lower)) { + lower <- stats::quantile(dat$xvar, 0.2, names = FALSE) + } + + if (is.null(upper)) { + upper <- stats::quantile(dat$xvar, 0.8, names = FALSE) + } df <- dat %>% dplyr::mutate(group = dplyr::case_when(xvar < lower ~ "juv", @@ -68,7 +61,6 @@ somerton <- function( df$temp_group <- df$group - rsq_vec <- rep(NA, max_iter) RSS_vec <- rep(NA, max_iter) diff --git a/R/two_line.R b/R/two_line.R index e738ebe..e5f0eaa 100644 --- a/R/two_line.R +++ b/R/two_line.R @@ -26,6 +26,8 @@ #' only observed values of the x-variable ("obs"), or should it be a specified #' number of evenly-spaced values between the lower and upper limits of the #' unknown region ("even", the default) +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. #' @param num_bps When `bps = "even"`, how many values should be tested as #' possible endpoints? Defaults to 100, but should be increased. #' @@ -47,27 +49,36 @@ #' #' @seealso [two_line_logistic()] for an alternative two-line model with a #' logistic transition between the left and right segments and -#' [broken_stick()] for segmented/piecewise regression methods. +#' [piecewise_mods()] for segmented/piecewise regression methods. #' two_line <- function(dat, - xvar, - yvar, - lower = NULL, - upper = NULL, - verbose = FALSE, - bps = "even", - num_bps = 100) { - stevens <- dat %>% dplyr::arrange(.data[[xvar]]) - - xraw <- stevens[[xvar]] - yraw <- stevens[[yvar]] + xvar, + yvar, + lower = NULL, + upper = NULL, + verbose = FALSE, + bps = "even", + log = FALSE, + num_bps = 100) { + + + new_dat <- dat %>% dplyr::arrange(.data[[xvar]]) + + if (isTRUE(log)) { + xraw <- log(new_dat[[xvar]]) + yraw <- log(new_dat[[yvar]]) + } + else { + xraw <- new_dat[[xvar]] + yraw <- new_dat[[yvar]] + } if (is.null(lower)) { - lower <- stats::quantile(xraw, 0.2) + lower <- stats::quantile(xraw, 0.2, names = FALSE) } if (is.null(upper)) { - upper <- stats::quantile(xraw, 0.8) + upper <- stats::quantile(xraw, 0.8, names = FALSE) } left_x <- (xraw <= lower) # T/F vector @@ -77,26 +88,26 @@ two_line <- function(dat, min_x <- xraw[low_ndx] # lowest T value min_y <- yraw[low_ndx] # lowest T value - stevens$xvar <- xraw - stevens$yvar <- yraw + new_dat$xvar <- xraw + new_dat$yvar <- yraw - lm0 <- stats::lm(yvar ~ xvar, data = stevens) + lm0 <- stats::lm(yvar ~ xvar, data = new_dat) rss0 <- stats::anova(lm0)[[2, 2]] # residual sum of squares ms0 <- stats::anova(lm0)[[3]] # mean squared error F0 <- ms0[1] / ms0[2] # F value - n0 <- dim(stevens)[1] + n0 <- dim(new_dat)[1] rss_min <- rss0 mse0 <- mean(lm0$residuals ^ 2) # assign group membership # 1 = left line, 2= right line - memb <- rep(1, nrow(stevens)) + memb <- rep(1, nrow(new_dat)) memb_low <- (xraw <= min_x) # T/F list if less than low range memb_high <- (yraw > min_y) # T/F list if GT than high range memb[memb_low] <- 1 # assign 1 to those < low memb[memb_high] <- 2 # assign 2 to those > high memb_sum1 <- summary(as.factor(memb)) - stevens$group <- memb + new_dat$group <- memb #### Loop @@ -106,13 +117,13 @@ two_line <- function(dat, for (i in 1:n0) { piecewise1 <- stats::lm( yvar ~ xvar * (xvar < xvar[i]) + xvar * (xvar >= xvar[i]), - data = stevens) + data = new_dat) mse[i] <- mean(piecewise1$residuals ^ 2) } ### find breakpoint (bp) that gives lowest MSE bp_ind <- which(mse == min(mse)) - bp <- stevens$xvar[bp_ind] # this is not necessarily where the lines cross + bp <- new_dat$xvar[bp_ind] # this is not necessarily where the lines cross } @@ -124,7 +135,7 @@ two_line <- function(dat, mse <- rep(0, num_bps) for (i in 1:num_bps) { piecewise1 <- stats::lm(yvar ~ xvar * (xvar < steps[i]) + - xvar * (xvar >= steps[i]), data = stevens) + xvar * (xvar >= steps[i]), data = new_dat) mse[i] <- mean(piecewise1$residuals ^ 2) } @@ -139,7 +150,7 @@ two_line <- function(dat, ## rerun piecewise regression at best bp piecewise2 <- stats::lm(yvar ~ xvar * (xvar < bp) + xvar * (xvar > bp), - data = stevens) + data = new_dat) pw_vals <- stats::coef(piecewise2) pw_vals[which(is.na(pw_vals))] <- 0 @@ -152,11 +163,11 @@ two_line <- function(dat, #### Reassign group membership memb_pw <- rep(1, n0) - memb_pw[stevens$xvar >= bp] <- 2 - stevens$group <- memb_pw + memb_pw[new_dat$xvar >= bp] <- 2 + new_dat$group <- memb_pw output <- list( - data = stevens, + data = new_dat, breakpoint = bp, intersection = jx, imm_slope = b_lo, diff --git a/R/two_line_logistic.R b/R/two_line_logistic.R index b9e8b7f..a56f079 100644 --- a/R/two_line_logistic.R +++ b/R/two_line_logistic.R @@ -17,6 +17,8 @@ #' model. If not provided, taken to be the median of the x-variable #' @param alpha_start Starting value for the logistic slope parameter when #' fitting the NLS model +#' @param log Boolean; should both variables be log-transformed before performing the +#' regression? Defaults to FALSE. #' @param verbose Should additional output be returned besides the SM50 #' estimate? #' @@ -26,20 +28,26 @@ #' #' @examples #' set.seed(12) -#' fc <- fake_crustaceans(n=100, L50=100, allo_params=c(1, 0.2, 1.1, 0.2)) -#' two_line_logistic(fc, xvar="x", yvar="y", verbose = FALSE) +#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) +#' two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE) two_line_logistic <- function(dat, xvar, yvar, imm_int = 1, imm_slope = 0.2, - mat_int = 1, + mat_int = 4, mat_slope = 0.3, SM50_start = NULL, alpha_start = 9, + log = FALSE, verbose = FALSE) { - tll_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]]) + if (isTRUE(log)) { + tll_dat <- data.frame(xvar = log(dat[[xvar]]), yvar = log(dat[[yvar]])) + } + else { + tll_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]]) + } if (is.null(SM50_start)) { SM50_start <- stats::median(tll_dat$xvar) @@ -57,7 +65,7 @@ two_line_logistic <- function(dat, )))) + (int2 + slope2 * xvar) * (1 / (1 + exp(-(xvar - SM50) / alpha))) } - nls_out <- stats::nls( + nls_out <- minpack.lm::nlsLM( formula = yvar ~ TLL_fun(xvar, int1, slope1, int2, slope2, SM50, alpha), data = tll_dat, start = list( @@ -68,7 +76,8 @@ two_line_logistic <- function(dat, SM50 = SM50_start, alpha = alpha_start ), - control=stats::nls.control(maxiter=100) + lower = c(-Inf, 0, -Inf, 0, 0, 0), + control = minpack.lm::nls.lm.control(maxiter = 500, maxfev = 10000) ) if (verbose == TRUE) { diff --git a/README.Rmd b/README.Rmd index 04b9f18..34b1e86 100644 --- a/README.Rmd +++ b/README.Rmd @@ -33,7 +33,7 @@ A compilation of methods used to estimate size at (sexual) maturity based on mor `morphmat` will include versions of the methods implemented in these existing GitHub repositories: | | | | | -|-----------------|-----------------|-----------------|----------------------| +|------------------|------------------|------------------|------------------| | **Type** | **Authors** | **GitHub repository** | **Description/notes** | | Package | Josymar Torrejon-Magallanes | [ejosymart/sizeMat](https://github.com/ejosymart/sizeMat) | [sizeMat: An R Package to Estimate Size at Sexual Maturity](https://cran.r-project.org/web/packages/sizeMat/vignettes/sizeMat.html) | | Package | Rodrigo Sant'Ana, Fernando Mayer | [rodrigosantana/Regrans: Fits Segmented Linear Regression Models](https://github.com/rodrigosantana/Regrans) | Older repository: [fernandomayer/Regrans](https://github.com/fernandomayer/Regrans/blob/master/change.point.R) | @@ -46,7 +46,7 @@ A compilation of methods used to estimate size at (sexual) maturity based on mor The following scripts do not use morphometric data and require individuals to already be classified as mature or immature. They provide various examples of how to fit maturity\~length binomial models to estimate SM50 and obtain confidence intervals. These tools can also be used to calculate size at maturity for non-crustacean fisheries. For a comprehensive examination of this type of model, see Mainguy et al [-@mainguy2024]. | | | | | -|----------------|----------------|-----------------|-----------------------| +|------------------|------------------|------------------|------------------| | **Type** | **Authors** | **GitHub repository** | **Description/notes** | | R scripts | Lucas Rodrigues | [lvcasrodrigues/maturity_at_size](https://github.com/lvcasrodrigues/maturity_at_size) | Does not use morphometric data; takes size classes with known numbers of mature individuals per size class and fits a binomial model (frequentist or Bayesian). Finds SM50 by generating new data from the model rather than from ratio of parameters | | R scripts | Mainguy et al. | [rafamoral/L50](https://github.com/rafamoral/L50): Monitoring reproduction in fish: Assessing the adequacy of ogives and the predicted uncertainty of their *L*~50~ estimates for more reliable biological inferences | Over a dozen methods for estimating L50 values and obtaining confidence intervals from (frequentist or Bayesian) binomial models (Delta, Fieller, bootstrap resampling, profile-likelihood, etc.). See accompanying manuscript by Mainguy et al. [-@mainguy2024]. | @@ -83,7 +83,7 @@ regrans(fc, "x", "y", verbose = FALSE) Two-line logistic: ```{r} -two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE) +two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 105) ``` Two-line model (lines are fit separately; no forced intersection): @@ -105,10 +105,10 @@ Other packages: # chngpt ``` -Compare estimates from all regression methods: +Compare estimates from all piecewise regression methods: ```{r} -broken_stick(fc, xvar = "x", yvar = "y", method = "all") +piecewise_mods(fc, xvar = "x", yvar = "y", method = "all") ``` ### Clustering methods diff --git a/README.md b/README.md index 04db9f8..ce6643f 100644 --- a/README.md +++ b/README.md @@ -101,9 +101,9 @@ regrans(fc, "x", "y", verbose = FALSE) Two-line logistic: ``` r -two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE) +two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 105) #> SM50 -#> 104.7633 +#> 104.7636 ``` Two-line model (lines are fit separately; no forced intersection): @@ -129,12 +129,14 @@ Other packages: # chngpt ``` -Compare estimates from all regression methods: +Compare estimates from all piecewise regression methods: ``` r -broken_stick(fc, xvar = "x", yvar = "y", method = "all") -#> chngpt segmented REGRANS Stevens -#> 89.44561 88.71463 89.43822 91.10524 +piecewise_mods(fc, xvar = "x", yvar = "y", method = "all") +#> chngpt segmented REGRANS +#> 89.44561 88.71463 89.43822 +#> Stevens Two_line.breakpoint Two_line.intersection +#> 91.10524 106.06548 1383.97444 ``` ### Clustering methods diff --git a/_pkgdown.yml b/_pkgdown.yml index 9c7c221..0ceec86 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,5 @@ url: https://rmk118.github.io/morphmat/ template: bootstrap: 5 + math-rendering: katex diff --git a/man/broken_stick.Rd b/man/broken_stick.Rd deleted file mode 100644 index de9f698..0000000 --- a/man/broken_stick.Rd +++ /dev/null @@ -1,59 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/broken_stick.R -\name{broken_stick} -\alias{broken_stick} -\title{Broken-stick (segmented) approaches to estimating SM50} -\usage{ -broken_stick( - dat, - xvar, - yvar, - ci = 95, - lower = NULL, - upper = NULL, - trans = "none", - method = c("segmented", "chngpt", "regrans", "stevens", "all") -) -} -\arguments{ -\item{dat}{data frame or matrix containing the data} - -\item{xvar}{Name of column (integer or double) of measurements for the x-axis -variable (e.g., carapace width).} - -\item{yvar}{Name of column (integer or double) of measurements for the y-axis -variable (e.g., claw height).} - -\item{ci}{Integer; type of confidence intervals to return for SM50, defaults -to 95\%.} - -\item{lower}{Integer or double; the lower bound for possible SM50 values. -Must be on the same trans of the data. Defaults to the 20th percentile of -the x-variable.} - -\item{upper}{Integer or double; the upper bound for possible SM50 values. -Must be on the same trans of the data. Defaults to the 80th percentile of -the x-variable.} - -\item{trans}{Transformation to be applied to the data before performing the -regression: "none", "log" (both variables are log-transformed), or "std" -(both variables are standardized = scaled and centered). If no string is -provided, no transformation is performed (i.e., the default is "none").} - -\item{method}{Method to use for the regression. A single string or string -vector containing one or more of c("segmented", "chngpt", "regrans", -"stevens"), or "all" to return the results of all methods for comparison.} -} -\value{ -An estimate of SM50 from the specified method(s). -} -\description{ -A wrapper function allowing for multiple methods of broken-stick regression -to be applied using a standard format for inputs. See -\code{vignette("broken-stick")} for more information. -} -\examples{ -set.seed(12) -fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) -broken_stick(fc, xvar = "x", yvar = "y", method = c("segmented", "chngpt")) -} diff --git a/man/broken_stick_stevens.Rd b/man/broken_stick_stevens.Rd index 9c60103..0c4c9c2 100644 --- a/man/broken_stick_stevens.Rd +++ b/man/broken_stick_stevens.Rd @@ -10,6 +10,7 @@ broken_stick_stevens( yvar, lower = NULL, upper = NULL, + log = FALSE, verbose = FALSE ) } @@ -30,6 +31,9 @@ the x-variable.} Must be on the same scale of the data. Defaults to the 80th percentile of the x-variable.} +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + \item{verbose}{Should additional output be returned besides the SM50 estimate?} } @@ -41,7 +45,12 @@ immature amd mature slope and intercept parameters, and the F and p-values for the final piecewise model. } \description{ -Broken-stick method from Bradley Stevens +Fits a broken-stick model to estimate size at maturity. Code +adapted from Dr. Bradley Stevens at the University of Maryland Eastern +Shore. Differs from the broken-stick methods implemented in \code{regrans()}, +\code{chngpt::chngpt()}, \code{segmented::segmented()}, \code{SiZer::piecewise.linear()}, +etc. in that only values of the x-axis variable present in the data are +tested as possible SM50 values. } \examples{ set.seed(12) diff --git a/man/cluster_mods.Rd b/man/cluster_mods.Rd new file mode 100644 index 0000000..387c6ce --- /dev/null +++ b/man/cluster_mods.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cluster.R +\name{cluster_mods} +\alias{cluster_mods} +\title{Classification approaches to estimating SM50} +\usage{ +cluster_mods( + dat, + xvar, + yvar, + log = FALSE, + method = c("mclust", "Somerton", "kmeans", "hclust", "infl_pt", "all") +) +} +\arguments{ +\item{dat}{data frame or matrix containing the data} + +\item{xvar}{Name of column (integer or double) of measurements for the x-axis +variable (e.g., carapace width).} + +\item{yvar}{Name of column (integer or double) of measurements for the y-axis +variable (e.g., claw height).} + +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + +\item{method}{Classification method to use. A single string or vector +containing one or more of c("mclust", "Somerton", "kmeans", "hclust", "infl_pt"), or "all" to return the results of all methods for comparison.} +} +\value{ +An estimate of SM50 from the specified method(s). +} +\description{ +Classification approaches to estimating SM50 +} +\examples{ +set.seed(12) +fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) +cluster_mods(fc, xvar = "x", yvar = "y", method = c("kmeans")) +} diff --git a/man/infl_pt.Rd b/man/infl_pt.Rd index 4a6db2f..324081c 100644 --- a/man/infl_pt.Rd +++ b/man/infl_pt.Rd @@ -4,7 +4,7 @@ \alias{infl_pt} \title{Maturity classification based on the minimum density of CH/CW ratios} \usage{ -infl_pt(dat, x, y, plot = FALSE) +infl_pt(dat, x, y, log = FALSE, plot = FALSE) } \arguments{ \item{dat}{optional data frame or matrix containing the data} @@ -15,6 +15,9 @@ for the x-axis variable (e.g., carapace width).} \item{y}{Name of column (or integer or double vector) containing measurements for the y-axis variable (e.g., claw height).} +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + \item{plot}{Boolean; should a plot of the density curve with the identified minimum be created?} } @@ -32,7 +35,7 @@ For example, this would be an effective classification method if the transition to maturity of a population of Tanner crabs (\emph{Chionoecetes bairdi}) was evident by an increase in the log(claw height)/log(carapace width) ratio from below 0.2 to above -0.2. \code{infl_pt_fun()} finds this discriminating line by creating a kernel +0.2. \code{infl_pt()} finds this discriminating line by creating a kernel density estimate (visually similar to a smoothed histogram) of the y-var/x-var ratio for all points, then finding the local minimum separating the two peaks representing the maturity clusters. @@ -45,8 +48,9 @@ y <- rnorm(100, mean = 15, sd = 3) z <- c(x, y) hist(z) dat1 <- data.frame(xvar = rep(1, 200), yvar = z) -infl_pt(dat1, "xvar", "yvar", TRUE) +infl_pt(dat1, "xvar", "yvar", plot = TRUE) fc <- fake_crustaceans(n = 100, allo_params = c(1, 0.2, 1.1, 0.2)) -infl_pt(fc, "x", "y", TRUE) +infl_pt(fc, "x", "y", plot = TRUE) +infl_pt(fc, "x", "y", log = TRUE, plot = TRUE) } diff --git a/man/piecewise_mods.Rd b/man/piecewise_mods.Rd new file mode 100644 index 0000000..381d275 --- /dev/null +++ b/man/piecewise_mods.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/piecewise.R +\name{piecewise_mods} +\alias{piecewise_mods} +\title{Piecewise regression approaches to estimating SM50} +\usage{ +piecewise_mods( + dat, + xvar, + yvar, + lower = NULL, + upper = NULL, + log = FALSE, + method = c("segmented", "chngpt", "regrans", "stevens", "TL", "all") +) +} +\arguments{ +\item{dat}{data frame or matrix containing the data} + +\item{xvar}{Name of column (integer or double) of measurements for the x-axis +variable (e.g., carapace width).} + +\item{yvar}{Name of column (integer or double) of measurements for the y-axis +variable (e.g., claw height).} + +\item{lower}{Integer or double; the lower bound for possible SM50 values. +Must be on the same trans of the data. Defaults to the 20th percentile of +the x-variable.} + +\item{upper}{Integer or double; the upper bound for possible SM50 values. +Must be on the same trans of the data. Defaults to the 80th percentile of +the x-variable.} + +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + +\item{method}{Method to use for the regression. A single string or vector +containing one or more of c("segmented", "chngpt", "regrans", "stevens", +"TL"), or "all" to return the results of all methods for comparison.} +} +\value{ +An estimate of SM50 from the specified method(s). +} +\description{ +A wrapper function allowing for multiple methods of piecewise regression to +be applied using a standard format for inputs. See \code{vignette("broken-stick")} +and \code{vignette("two-line")} for more information. +} +\details{ +The \code{two_line_logistic()} function is closely related but not included in this wrapper function because you will generally want more control over the initial values of the parameters and the nonlinear least-squares algorithm may not always converge. + +This function is primarily intended for easy comparison between the SM50 estimates produced by a variety of different piecewise regression models. For follow-up analyses, we recommend calling the specific function(s) of interest (\code{regrans()}, \code{broken_stick_stevens()}, \code{two_line()}, \code{segmented::segmented()}, or \code{chngpt::chngpt()}) and exploring how changing the upper and lower bounds for possible SM50 values and the number of breakpoints to be tested may influence the resulting estimates. +} +\examples{ +set.seed(12) +fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) +piecewise_mods(fc, xvar = "x", yvar = "y", method = c("segmented", "chngpt")) +} diff --git a/man/regrans.Rd b/man/regrans.Rd index 3b21dc2..28ed074 100644 --- a/man/regrans.Rd +++ b/man/regrans.Rd @@ -10,6 +10,7 @@ regrans( yvar, lower = NULL, upper = NULL, + log = FALSE, verbose = FALSE, n_tries = 100 ) @@ -31,6 +32,9 @@ the x-variable.} Must be on the same scale of the data. Defaults to the 80th percentile of the x-variable.} +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + \item{verbose}{Return all breakpoints tested and their sum of squares, or only the estimated SM50?} diff --git a/man/somerton.Rd b/man/somerton.Rd index 20e3935..05de94f 100644 --- a/man/somerton.Rd +++ b/man/somerton.Rd @@ -8,7 +8,7 @@ somerton( dat, xvar, yvar, - trans = "none", + log = FALSE, lower = NULL, upper = NULL, max_iter = 50 @@ -23,10 +23,8 @@ axis variable (e.g., carapace width).} \item{yvar}{Name of column (integer or double) of measurements for the y-axis variable (e.g., claw height).} -\item{trans}{Transformation to be applied to the data before performing the -regression: "none", "log" (both variables are log-transformed), or "std" -(both variables are standardized = scaled and centered). If no string is -provided, no transformation is performed (i.e., the default is "none").} +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} \item{lower}{Integer or double; the lower bound for possible SM50 values. Must be on the same trans of the data. Defaults to the 20th percentile of diff --git a/man/two_line.Rd b/man/two_line.Rd index 0aadf82..99feaa8 100644 --- a/man/two_line.Rd +++ b/man/two_line.Rd @@ -12,6 +12,7 @@ two_line( upper = NULL, verbose = FALSE, bps = "even", + log = FALSE, num_bps = 100 ) } @@ -40,6 +41,9 @@ only observed values of the x-variable ("obs"), or should it be a specified number of evenly-spaced values between the lower and upper limits of the unknown region ("even", the default)} +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + \item{num_bps}{When \code{bps = "even"}, how many values should be tested as possible endpoints? Defaults to 100, but should be increased.} } @@ -74,5 +78,5 @@ two_line(fc, xvar = "x", yvar = "y", verbose = FALSE) \seealso{ \code{\link[=two_line_logistic]{two_line_logistic()}} for an alternative two-line model with a logistic transition between the left and right segments and -\code{\link[=broken_stick]{broken_stick()}} for segmented/piecewise regression methods. +\code{\link[=piecewise_mods]{piecewise_mods()}} for segmented/piecewise regression methods. } diff --git a/man/two_line_logistic.Rd b/man/two_line_logistic.Rd index 64b9434..25d70e3 100644 --- a/man/two_line_logistic.Rd +++ b/man/two_line_logistic.Rd @@ -10,10 +10,11 @@ two_line_logistic( yvar, imm_int = 1, imm_slope = 0.2, - mat_int = 1, + mat_int = 4, mat_slope = 0.3, SM50_start = NULL, alpha_start = 9, + log = FALSE, verbose = FALSE ) } @@ -44,6 +45,9 @@ model. If not provided, taken to be the median of the x-variable} \item{alpha_start}{Starting value for the logistic slope parameter when fitting the NLS model} +\item{log}{Boolean; should both variables be log-transformed before performing the +regression? Defaults to FALSE.} + \item{verbose}{Should additional output be returned besides the SM50 estimate?} } @@ -56,6 +60,6 @@ Two-line logistic model } \examples{ set.seed(12) -fc <- fake_crustaceans(n=100, L50=100, allo_params=c(1, 0.2, 1.1, 0.2)) -two_line_logistic(fc, xvar="x", yvar="y", verbose = FALSE) +fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) +two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE) } diff --git a/tests/testthat/test-broken_stick_stevens.R b/tests/testthat/test-broken_stick_stevens.R index 13ec8dd..4f3784d 100644 --- a/tests/testthat/test-broken_stick_stevens.R +++ b/tests/testthat/test-broken_stick_stevens.R @@ -1,17 +1,24 @@ test_that("function returns expected value", { set.seed(123) - fc <- fake_crustaceans(n=100, L50=100, allo_params=c(1, 0.2, 1.1, 0.2)) - expect_equal( - round(broken_stick_stevens(fc, xvar="x", yvar="y", verbose = FALSE),4), - 121.4316 - ) + fc <- fake_crustaceans(n = 100, + L50 = 100, + allo_params = c(1, 0.2, 1.1, 0.2)) + expect_equal(round(broken_stick_stevens( + fc, + xvar = "x", + yvar = "y", + verbose = FALSE + ), 4), 121.4316) x1 <- c(1:100) - y1 <- c(1:75)*2+rnorm(75, 0, 10) - y2 <- c(76:100)*4+rnorm(25, 3, 10) - testdat <- data.frame(x=x1, y=c(y1, y2)) - expect_equal( - broken_stick_stevens(testdat, xvar="x", yvar="y", verbose = FALSE), - 66 - ) + y1 <- c(1:75) * 2 + rnorm(75, 0, 10) + y2 <- c(76:100) * 4 + rnorm(25, 3, 10) + testdat <- data.frame(x = x1, y = c(y1, y2)) + expect_equal(broken_stick_stevens( + testdat, + xvar = "x", + yvar = "y", + verbose = FALSE + ), + 66) }) diff --git a/tests/testthat/test-broken_stick.R b/tests/testthat/test-piecewise_mods.R similarity index 72% rename from tests/testthat/test-broken_stick.R rename to tests/testthat/test-piecewise_mods.R index b3fa430..cbfbffd 100644 --- a/tests/testthat/test-broken_stick.R +++ b/tests/testthat/test-piecewise_mods.R @@ -1,9 +1,9 @@ -test_that("Stevens wrapper works", { +test_that("Piecewise regression wrapper works", { set.seed(123) fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2)) - expect_equal(round(broken_stick( + expect_equal(round(piecewise_mods( fc, xvar = "x", yvar = "y", diff --git a/vignettes/morphmat.Rmd b/vignettes/morphmat.Rmd new file mode 100644 index 0000000..2f5dd8c --- /dev/null +++ b/vignettes/morphmat.Rmd @@ -0,0 +1,76 @@ +--- +title: "morphmat" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{morphmat} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(morphmat) +library(ggplot2) +``` + +We will start by simulating a data set with a known SM50 value of 75 mm to demonstrate an example workflow using `morphmat`. +```{r} +#| label: generate-crabs + +set.seed(12) # set seed for reproducibility + +fc <- fake_crustaceans( + error_scale = 17, + slope = 9, + L50 = 75, + n = 800, + allo_params = c(0.9, 0.25, 1.05, 0.2), + x_mean = 85 +) + +mytheme <- theme_classic() + # define custom theme for ggplots + theme( + axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)), + axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)), + text = element_text(size = 13)) +``` + +# Plot your data + +## Original scale +```{r} +ggplot() + + geom_point(data = fc, aes(x, y), alpha = 0.4) + + labs(x = "Carapace width (mm)", y = "Claw height (mm)", ) + + mytheme +``` + +## Log-log scale +```{r} +ggplot() + + geom_point(data = fc, aes(log_x, log_y), alpha = 0.4) + + labs(x = "Log carapace width (mm)", y = "Log claw height (mm)", ) + + mytheme +``` + +# Suggested method + +## mclust for classification + +```{r} + +``` + + +## GLM for obtaining SM50 value + +# Comparing clustering approaches + +# Comparison with piecewise regression models +