Skip to content

Commit

Permalink
fixed piecewise function, updated log transformation arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
rmk118 committed Dec 2, 2024
1 parent f1d512b commit 942d77b
Show file tree
Hide file tree
Showing 27 changed files with 585 additions and 314 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ Imports:
rlang,
stats,
segmented,
chngpt
chngpt,
mclust,
minpack.lm
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Expand Down
9 changes: 6 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(broken_stick)
export(broken_stick_stevens)
export(cluster_mods)
export(fake_crustaceans)
export(infl_pt)
export(piecewise_mods)
export(regrans)
export(somerton)
export(two_line)
export(two_line_logistic)
import(ggplot2)
import(rlang)
importFrom(ggplot2,aes)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
importFrom(mclust,Mclust)
importFrom(minpack.lm,nls.lm.control)
importFrom(minpack.lm,nlsLM)
importFrom(splus2R,peaks)
117 changes: 0 additions & 117 deletions R/broken_stick.R

This file was deleted.

62 changes: 38 additions & 24 deletions R/broken_stick_stevens.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#' Broken-stick method from Bradley Stevens
#'
#' @description Fits a broken-stick model to estimate size at maturity. Code
#' adapted from Dr. Bradley Stevens at the University of Maryland Eastern
#' Shore. Differs from the broken-stick methods implemented in `regrans()`,
#' `chngpt::chngpt()`, `segmented::segmented()`, `SiZer::piecewise.linear()`,
#' etc. in that only values of the x-axis variable present in the data are
#' tested as possible SM50 values.
#'
#' @param dat data frame or matrix containing the data
#' @param xvar Name of column (integer or double) of measurements for the x-axis
#' variable (e.g., carapace width).
Expand All @@ -11,6 +18,8 @@
#' @param lower Integer or double; the lower bound for possible SM50 values.
#' Must be on the same scale of the data. Defaults to the 20th percentile of
#' the x-variable.
#' @param log Boolean; should both variables be log-transformed before performing the
#' regression? Defaults to FALSE.
#' @param verbose Should additional output be returned besides the SM50
#' estimate?
#'
Expand All @@ -30,19 +39,26 @@ broken_stick_stevens <- function(dat,
yvar,
lower = NULL,
upper = NULL,
log = FALSE,
verbose = FALSE) {

stevens <- dat %>% dplyr::arrange(.data[[xvar]])
new_dat <- dat %>% dplyr::arrange(.data[[xvar]])

xraw <- stevens[[xvar]]
yraw <- stevens[[yvar]]
if (isTRUE(log)) {
xraw <- log(new_dat[[xvar]])
yraw <- log(new_dat[[yvar]])
}
else {
xraw <- new_dat[[xvar]]
yraw <- new_dat[[yvar]]
}

if (is.null(lower)) {
lower <- stats::quantile(xraw, 0.2)
lower <- stats::quantile(xraw, 0.2, names = FALSE)
}

if (is.null(upper)) {
upper <- stats::quantile(xraw, 0.8)
upper <- stats::quantile(xraw, 0.8, names = FALSE)
}

left_x <- (xraw <= lower) # T/F vector
Expand All @@ -52,28 +68,28 @@ broken_stick_stevens <- function(dat,
min_x <- xraw[low_ndx] # lowest T value
min_y <- yraw[low_ndx] # lowest T value

stevens$xvar <- xraw
stevens$yvar <- yraw
new_dat$xvar <- xraw
new_dat$yvar <- yraw

# null model - single line to describe both maturity stages
lm0 <- stats::lm(yvar ~ xvar, data = stevens)
lm0 <- stats::lm(yvar ~ xvar, data = new_dat)
rss0 <- stats::anova(lm0)[[2, 2]] # residual sum of squares
ms0 <- stats::anova(lm0)[[3]] # mean squared error
F0 <- ms0[1] / ms0[2] # F value
n0 <- dim(stevens)[1]
n0 <- dim(new_dat)[1]
rss_min <- rss0
mse0 <- mean(lm0$residuals ^ 2)

# assign group membership
# 1 = left line, 2 = right line
memb <- rep(1, nrow(stevens))
memb <- rep(1, nrow(new_dat))
memb_low <- (xraw <= min_x) # T/F list if less than low range
memb_high <- (yraw > min_y) # T/F list if GT than high range
memb[memb_low] <- 1 # assign 1 to those < low
memb[memb_high] <- 2 # assign 2 to those > high
memb_sum1 <- summary(as.factor(memb))
stevens$prior <- memb
stevens$group <- memb
new_dat$prior <- memb
new_dat$group <- memb

run <- 0

Expand All @@ -83,7 +99,7 @@ broken_stick_stevens <- function(dat,
# Left regression
lm1 <- stats::lm(
I(yvar[memb == 1] - min_y) ~ 0 + I(xvar[memb == 1] - min_x),
data = stevens
data = new_dat
)
b1 <- stats::coef(lm1)[[1]]
a1 <- min_y - (b1 * min_x)
Expand All @@ -94,7 +110,7 @@ broken_stick_stevens <- function(dat,
# Right regression
lm2 <- stats::lm(
I(yvar[memb == 2] - min_y) ~ 0 + I(xvar[memb == 2] - min_x),
data = stevens
data = new_dat
)
b2 <- stats::coef(lm2)[[1]]
a2 <- min_y - (b2 * min_x)
Expand Down Expand Up @@ -126,26 +142,24 @@ broken_stick_stevens <- function(dat,

# next point
low_ndx <- low_ndx + 1
min_x <- stevens$xvar[low_ndx]
min_y <- stevens$yvar[low_ndx]
memb_low <- stevens$xvar <= min_x # T/F list if less than low range
memb_high <- stevens$xvar > min_x # T/F list if GT than high range
min_x <- new_dat$xvar[low_ndx]
min_y <- new_dat$yvar[low_ndx]
memb_low <- new_dat$xvar <= min_x # T/F list if less than low range
memb_high <- new_dat$xvar > min_x # T/F list if GT than high range
memb[memb_low] <- 1 # assign 1 to those < low
memb[memb_high] <- 2 # assign 2 to those > high
} # end loop

SM50 <- joint_x

memb_low <- stevens$xvar <= joint_x # T/F list if less than low range
memb_high <- stevens$xvar > joint_x # T/F list if GT than high range
memb_low <- new_dat$xvar <= joint_x # T/F list if less than low range
memb_high <- new_dat$xvar > joint_x # T/F list if GT than high range
memb[memb_low] <- 1 # assign 1 to those < low
memb[memb_high] <- 2 # assign 2 to those > high
stevens$group <- memb
memb_sum2 <- summary(as.factor(stevens$group))
n_tot <- sum(memb_sum2)
new_dat$group <- memb

output <- list(
data = stevens %>% dplyr::select(-c("xvar", "yvar", "prior")),
data = new_dat %>% dplyr::select(-c("xvar", "yvar", "prior")),
SM50 = SM50,
imm_slope = b1_1,
imm_int = a1_1,
Expand Down
40 changes: 40 additions & 0 deletions R/cluster.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Classification approaches to estimating SM50
#'
#' @param dat data frame or matrix containing the data
#' @param xvar Name of column (integer or double) of measurements for the x-axis
#' variable (e.g., carapace width).
#' @param yvar Name of column (integer or double) of measurements for the y-axis
#' variable (e.g., claw height).
#' @param log Boolean; should both variables be log-transformed before performing the
#' regression? Defaults to FALSE.
#' @param method Classification method to use. A single string or vector
#' containing one or more of c("mclust", "Somerton", "kmeans", "hclust", "infl_pt"), or "all" to return the results of all methods for comparison.
#' @returns An estimate of SM50 from the specified method(s).
#' @export
#'
#' @examples
#' set.seed(12)
#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2))
#' cluster_mods(fc, xvar = "x", yvar = "y", method = c("kmeans"))
cluster_mods <- function(dat,
xvar,
yvar,
log = FALSE,
method = c("mclust", "Somerton", "kmeans", "hclust", "infl_pt", "all")) {

new_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]])

if (isTRUE(log)) {
new_dat$xvar <- log(dat[[xvar]])
new_dat$yvar <- log(dat[[yvar]])
}

if ("kmeans" %in% method | "all" %in% method) {

out <- new_dat

}

return(out)

}
21 changes: 14 additions & 7 deletions R/infl_pt.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@
#' transition to maturity of a population of Tanner crabs
#' (*Chionoecetes bairdi*) was evident by an increase in the
#' log(claw height)/log(carapace width) ratio from below 0.2 to above
#' 0.2. `infl_pt_fun()` finds this discriminating line by creating a kernel
#' 0.2. `infl_pt()` finds this discriminating line by creating a kernel
#' density estimate (visually similar to a smoothed histogram) of the
#' y-var/x-var ratio for all points, then finding the local minimum separating
#' the two peaks representing the maturity clusters.
#'
#' @import ggplot2
#' @importFrom rlang .data
#' @param dat optional data frame or matrix containing the data
#' @param x Name of column (or integer or double vector) containing measurements
#' for the x-axis variable (e.g., carapace width).
#' @param y Name of column (or integer or double vector) containing measurements
#' for the y-axis variable (e.g., claw height).
#' @param log Boolean; should both variables be log-transformed before performing the
#' regression? Defaults to FALSE.
#' @param plot Boolean; should a plot of the density curve with the identified
#' minimum be created?
#'
Expand All @@ -37,13 +37,20 @@
#' z <- c(x, y)
#' hist(z)
#' dat1 <- data.frame(xvar = rep(1, 200), yvar = z)
#' infl_pt(dat1, "xvar", "yvar", TRUE)
#' infl_pt(dat1, "xvar", "yvar", plot = TRUE)
#' fc <- fake_crustaceans(n = 100, allo_params = c(1, 0.2, 1.1, 0.2))
#' infl_pt(fc, "x", "y", TRUE)
#' infl_pt(fc, "x", "y", plot = TRUE)
#' infl_pt(fc, "x", "y", log = TRUE, plot = TRUE)
#'
infl_pt <- function(dat, x, y, plot = FALSE) {
infl_pt <- function(dat, x, y, log = FALSE, plot = FALSE) {
# find the ratio between the two morphometric variables
ratio <- dat[[y]]/dat[[x]]
if (isTRUE(log)) {
ratio <- log(dat[[y]])/log(dat[[x]])
}
else {
ratio <- dat[[y]]/dat[[x]]
}


# compute a kernel density estimate (essentially a smoothed histogram)
density_test <- stats::density(ratio)
Expand Down
6 changes: 5 additions & 1 deletion R/morphmat-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
"_PACKAGE"

## usethis namespace: start
#' @import ggplot2
#' @import rlang
#' @importFrom ggplot2 aes
#' @importFrom glue glue
#' @importFrom lifecycle deprecated
#' @importFrom mclust Mclust
#' @importFrom splus2R peaks
#' @importFrom minpack.lm nls.lm.control
#' @importFrom minpack.lm nlsLM
## usethis namespace: end
NULL
Loading

0 comments on commit 942d77b

Please sign in to comment.