fixed piecewise function, updated log transformation arguments

rmk118 · Dec 2, 2024 · 942d77b · 942d77b
1 parent f1d512b
commit 942d77b
Show file tree

Hide file tree

Showing 27 changed files with 585 additions and 314 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,9 @@ Imports:
     rlang,
     stats,
     segmented,
-    chngpt
+    chngpt,
+    mclust,
+    minpack.lm
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,18 +1,21 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(broken_stick)
 export(broken_stick_stevens)
+export(cluster_mods)
 export(fake_crustaceans)
 export(infl_pt)
+export(piecewise_mods)
 export(regrans)
 export(somerton)
 export(two_line)
 export(two_line_logistic)
 import(ggplot2)
 import(rlang)
-importFrom(ggplot2,aes)
 importFrom(glue,glue)
 importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%>%")
-importFrom(rlang,.data)
+importFrom(mclust,Mclust)
+importFrom(minpack.lm,nls.lm.control)
+importFrom(minpack.lm,nlsLM)
+importFrom(splus2R,peaks)
diff --git a/R/broken_stick.R b/R/broken_stick.R
diff --git a/R/broken_stick_stevens.R b/R/broken_stick_stevens.R
@@ -1,5 +1,12 @@
 #' Broken-stick method from Bradley Stevens
 #'
+#' @description Fits a broken-stick model to estimate size at maturity. Code
+#'   adapted from Dr. Bradley Stevens at the University of Maryland Eastern
+#'   Shore. Differs from the broken-stick methods implemented in `regrans()`,
+#'   `chngpt::chngpt()`, `segmented::segmented()`, `SiZer::piecewise.linear()`,
+#'   etc. in that only values of the x-axis variable present in the data are
+#'   tested as possible SM50 values.
+#'
 #' @param dat data frame or matrix containing the data
 #' @param xvar Name of column (integer or double) of measurements for the x-axis
 #'   variable (e.g., carapace width).
@@ -11,6 +18,8 @@
 #' @param lower Integer or double; the lower bound for possible SM50 values.
 #'   Must be on the same scale of the data. Defaults to the 20th percentile of
 #'   the x-variable.
+#' @param log Boolean; should both variables be log-transformed before performing the
+#'   regression? Defaults to FALSE.
 #' @param verbose Should additional output be returned besides the SM50
 #'   estimate?
 #'
@@ -30,19 +39,26 @@ broken_stick_stevens <- function(dat,
                                  yvar,
                                  lower = NULL,
                                  upper = NULL,
+                                 log = FALSE,
                                  verbose = FALSE) {
 
-  stevens <- dat %>% dplyr::arrange(.data[[xvar]])
+  new_dat <- dat %>% dplyr::arrange(.data[[xvar]])
 
-  xraw <- stevens[[xvar]]
-  yraw <- stevens[[yvar]]
+  if (isTRUE(log)) {
+    xraw <- log(new_dat[[xvar]])
+    yraw <- log(new_dat[[yvar]])
+  }
+  else {
+    xraw <- new_dat[[xvar]]
+    yraw <- new_dat[[yvar]]
+  }
 
   if (is.null(lower)) {
-    lower <- stats::quantile(xraw, 0.2)
+    lower <- stats::quantile(xraw, 0.2, names = FALSE)
   }
 
   if (is.null(upper)) {
-    upper <- stats::quantile(xraw, 0.8)
+    upper <- stats::quantile(xraw, 0.8, names = FALSE)
   }
 
   left_x <- (xraw <= lower) # T/F vector
@@ -52,28 +68,28 @@ broken_stick_stevens <- function(dat,
   min_x <- xraw[low_ndx] # lowest T value
   min_y <- yraw[low_ndx] # lowest T value
 
-  stevens$xvar <- xraw
-  stevens$yvar <- yraw
+  new_dat$xvar <- xraw
+  new_dat$yvar <- yraw
 
   # null model - single line to describe both maturity stages
-  lm0 <- stats::lm(yvar ~ xvar, data = stevens)
+  lm0 <- stats::lm(yvar ~ xvar, data = new_dat)
   rss0 <- stats::anova(lm0)[[2, 2]] # residual sum of squares
   ms0 <- stats::anova(lm0)[[3]] # mean squared error
   F0 <- ms0[1] / ms0[2] # F value
-  n0 <- dim(stevens)[1]
+  n0 <- dim(new_dat)[1]
   rss_min <- rss0
   mse0 <- mean(lm0$residuals ^ 2)
 
   # assign group membership
   # 1 = left line, 2 = right line
-  memb <- rep(1, nrow(stevens))
+  memb <- rep(1, nrow(new_dat))
   memb_low <- (xraw <= min_x) # T/F list if less than low range
   memb_high <- (yraw > min_y) # T/F list if GT than high range
   memb[memb_low] <- 1 # assign 1 to those < low
   memb[memb_high] <- 2 # assign 2 to those > high
   memb_sum1 <- summary(as.factor(memb))
-  stevens$prior <- memb
-  stevens$group <- memb
+  new_dat$prior <- memb
+  new_dat$group <- memb
 
   run <- 0
 
@@ -83,7 +99,7 @@ broken_stick_stevens <- function(dat,
     # Left regression
     lm1 <- stats::lm(
       I(yvar[memb == 1] - min_y) ~ 0 + I(xvar[memb == 1] - min_x),
-      data = stevens
+      data = new_dat
       )
     b1 <- stats::coef(lm1)[[1]]
     a1 <- min_y - (b1 * min_x)
@@ -94,7 +110,7 @@ broken_stick_stevens <- function(dat,
     # Right regression
     lm2 <- stats::lm(
       I(yvar[memb == 2] - min_y) ~ 0 + I(xvar[memb == 2] - min_x),
-      data = stevens
+      data = new_dat
       )
     b2 <- stats::coef(lm2)[[1]]
     a2 <- min_y - (b2 * min_x)
@@ -126,26 +142,24 @@ broken_stick_stevens <- function(dat,
 
     # next point
     low_ndx <- low_ndx + 1
-    min_x <- stevens$xvar[low_ndx]
-    min_y <- stevens$yvar[low_ndx]
-    memb_low <- stevens$xvar <= min_x # T/F list if less than low range
-    memb_high <- stevens$xvar > min_x # T/F list if GT than high range
+    min_x <- new_dat$xvar[low_ndx]
+    min_y <- new_dat$yvar[low_ndx]
+    memb_low <- new_dat$xvar <= min_x # T/F list if less than low range
+    memb_high <- new_dat$xvar > min_x # T/F list if GT than high range
     memb[memb_low] <- 1 # assign 1 to those < low
     memb[memb_high] <- 2 # assign 2 to those > high
   } # end loop
 
   SM50 <- joint_x
 
-  memb_low <- stevens$xvar <= joint_x # T/F list if less than low range
-  memb_high <- stevens$xvar > joint_x # T/F list if GT than high range
+  memb_low <- new_dat$xvar <= joint_x # T/F list if less than low range
+  memb_high <- new_dat$xvar > joint_x # T/F list if GT than high range
   memb[memb_low] <- 1 # assign 1 to those < low
   memb[memb_high] <- 2 # assign 2 to those > high
-  stevens$group <- memb
-  memb_sum2 <- summary(as.factor(stevens$group))
-  n_tot <- sum(memb_sum2)
+  new_dat$group <- memb
 
   output <- list(
-    data = stevens %>% dplyr::select(-c("xvar", "yvar", "prior")),
+    data = new_dat %>% dplyr::select(-c("xvar", "yvar", "prior")),
     SM50 = SM50,
     imm_slope = b1_1,
     imm_int = a1_1,

diff --git a/R/cluster.R b/R/cluster.R
@@ -0,0 +1,40 @@
+#' Classification approaches to estimating SM50
+#'
+#' @param dat data frame or matrix containing the data
+#' @param xvar Name of column (integer or double) of measurements for the x-axis
+#'   variable (e.g., carapace width).
+#' @param yvar Name of column (integer or double) of measurements for the y-axis
+#'   variable (e.g., claw height).
+#' @param log Boolean; should both variables be log-transformed before performing the
+#'   regression? Defaults to FALSE.
+#' @param method Classification method to use. A single string or vector
+#'   containing one or more of c("mclust", "Somerton", "kmeans", "hclust", "infl_pt"), or "all" to return the results of all methods for comparison.
+#' @returns An estimate of SM50 from the specified method(s).
+#' @export
+#'
+#' @examples
+#' set.seed(12)
+#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2))
+#' cluster_mods(fc, xvar = "x", yvar = "y", method = c("kmeans"))
+cluster_mods <- function(dat,
+                         xvar,
+                         yvar,
+                         log = FALSE,
+                         method = c("mclust", "Somerton", "kmeans", "hclust", "infl_pt", "all")) {
+
+  new_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]])
+
+  if (isTRUE(log)) {
+    new_dat$xvar <- log(dat[[xvar]])
+    new_dat$yvar <- log(dat[[yvar]])
+  }
+
+  if ("kmeans" %in% method | "all" %in% method) {
+
+    out <- new_dat
+
+}
+
+  return(out)
+
+}
diff --git a/R/infl_pt.R b/R/infl_pt.R
@@ -9,18 +9,18 @@
 #' transition to maturity of a population of Tanner crabs
 #' (*Chionoecetes bairdi*) was evident by an increase in the
 #' log(claw height)/log(carapace width) ratio from below 0.2 to above
-#' 0.2. `infl_pt_fun()` finds this discriminating line by creating a kernel
+#' 0.2. `infl_pt()` finds this discriminating line by creating a kernel
 #' density estimate (visually similar to a smoothed histogram) of the
 #' y-var/x-var ratio for all points, then finding the local minimum separating
 #' the two peaks representing the maturity clusters.
 #'
-#' @import ggplot2
-#' @importFrom rlang .data
 #' @param dat optional data frame or matrix containing the data
 #' @param x Name of column (or integer or double vector) containing measurements
 #'   for the x-axis variable (e.g., carapace width).
 #' @param y Name of column (or integer or double vector) containing measurements
 #'   for the y-axis variable (e.g., claw height).
+#' @param log Boolean; should both variables be log-transformed before performing the
+#'   regression? Defaults to FALSE.
 #' @param plot Boolean; should a plot of the density curve with the identified
 #'   minimum be created?
 #'
@@ -37,13 +37,20 @@
 #' z <- c(x, y)
 #' hist(z)
 #' dat1 <- data.frame(xvar = rep(1, 200), yvar = z)
-#' infl_pt(dat1, "xvar", "yvar", TRUE)
+#' infl_pt(dat1, "xvar", "yvar", plot = TRUE)
 #' fc <- fake_crustaceans(n = 100, allo_params = c(1, 0.2, 1.1, 0.2))
-#' infl_pt(fc, "x", "y", TRUE)
+#' infl_pt(fc, "x", "y", plot = TRUE)
+#' infl_pt(fc, "x", "y", log = TRUE, plot = TRUE)
 #'
-infl_pt <- function(dat, x, y, plot = FALSE) {
+infl_pt <- function(dat, x, y, log = FALSE, plot = FALSE) {
   # find the ratio between the two morphometric variables
-  ratio <- dat[[y]]/dat[[x]]
+  if (isTRUE(log)) {
+    ratio <- log(dat[[y]])/log(dat[[x]])
+  }
+  else {
+    ratio <- dat[[y]]/dat[[x]]
+  }
+
 
   # compute a kernel density estimate (essentially a smoothed histogram)
   density_test <- stats::density(ratio)

diff --git a/R/morphmat-package.R b/R/morphmat-package.R
@@ -2,9 +2,13 @@
 "_PACKAGE"
 
 ## usethis namespace: start
+#' @import ggplot2
 #' @import rlang
-#' @importFrom ggplot2 aes
 #' @importFrom glue glue
 #' @importFrom lifecycle deprecated
+#' @importFrom mclust Mclust
+#' @importFrom splus2R peaks
+#' @importFrom minpack.lm nls.lm.control
+#' @importFrom minpack.lm nlsLM
 ## usethis namespace: end
 NULL