Skip to content

Commit

Permalink
started two-line funs
Browse files Browse the repository at this point in the history
  • Loading branch information
rmk118 committed Oct 1, 2024
1 parent 4d54483 commit 7b290ad
Show file tree
Hide file tree
Showing 7 changed files with 365 additions and 10 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export(broken_stick_stevens)
export(fake_crabs)
export(infl_pt)
export(regrans_fun)
export(two_line_logistic)
export(two_line_stevens)
import(ggplot2)
import(rlang)
importFrom(ggplot2,aes)
Expand Down
10 changes: 5 additions & 5 deletions R/broken_stick_stevens.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
#' @param verbose Should additional output be returned besides the SM50
#' estimate?
#'
#' @returns If verbose is FALSE (the default), an estimate of SM50 from the
#' specified method(s). Otherwise, output is a list that also includes the
#' original data with a column representing which line (immature or mature)
#' the point was assigned to, the immature amd mature slope and intercept
#' parameters, and the F and p-values for the final piecewise model.
#' @returns If verbose is FALSE (the default), an estimate of SM50. Otherwise,
#' output is a list that also includes the original data with a column
#' representing which line (immature or mature) the point was assigned to, the
#' immature amd mature slope and intercept parameters, and the F and p-values
#' for the final piecewise model.
#' @export
#'
#' @examples
Expand Down
74 changes: 74 additions & 0 deletions R/two_line_logistic.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' Two-line logistic model
#'
#' @param dat data frame or matrix containing the data
#' @param xvar Name of column (integer or double) of measurements for the x-axis
#' variable (e.g., carapace width).
#' @param yvar Name of column (integer or double) of measurements for the y-axis
#' variable (e.g., claw height).
#' @param imm_int Starting value for the immature intercept parameter when fitting the NLS model
#' @param imm_slope tarting value for the immature slope parameter when fitting the NLS model
#' @param mat_int Starting value for the mature intercept parameter when fitting the NLS model
#' @param mat_slope Starting value for the mature slope parameter when fitting the NLS model
#' @param SM50_start Starting value for SM50 parameter when fitting the NLS model. If not provided, taken to be the median of the x-variable
#' @param alpha_start Starting value for the logistic slope parameter when fitting the NLS model
#' @param verbose Should additional output be returned besides the SM50
#' estimate?
#'
#' @returns If verbose is FALSE (the default), an estimate of SM50. Otherwise,
#' output is the NLS model object.
#' @export
#'
#' @examples
#' set.seed(12)
#' fc <- fake_crabs(n=100, L50=100, allo_params=c(1, 0.2, 1.1, 0.2))
#' two_line_logistic(fc, xvar="x", yvar="y", verbose = FALSE)
two_line_logistic <- function(dat,
xvar,
yvar,
imm_int = 1,
imm_slope = 0.2,
mat_int = 1,
mat_slope = 0.3,
SM50_start = NULL,
alpha_start = 9,
verbose = FALSE) {

tll_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]])

if (is.null(SM50_start)) {
SM50_start <- stats::median(tll_dat$xvar)
}

TLL_fun <- function(xvar,
int1,
slope1,
int2,
slope2,
SM50,
alpha) {
(int1 + slope1 * xvar) * (1 - (1 / (1 + exp(
-(xvar - SM50) / alpha
)))) + (int2 + slope2 * xvar) * (1 / (1 + exp(-(xvar - SM50) / alpha)))
}

nls_out <- stats::nls(
formula = yvar ~ TLL_fun(xvar, int1, slope1, int2, slope2, SM50, alpha),
data = tll_dat,
start = list(
int1 = imm_int,
slope1 = imm_slope,
int2 = mat_int,
slope2 = mat_slope,
SM50 = SM50_start,
alpha = alpha_start
),
control=stats::nls.control(maxiter=100)
)

if (verbose == TRUE) {
return(nls_out)
}
else
return(stats::coef(nls_out)["SM50"])
}

160 changes: 160 additions & 0 deletions R/two_line_stevens.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#' Two-line methods from Bradley Stevens
#'
#' @param dat data frame or matrix containing the data
#' @param xvar Name of column (integer or double) of measurements for the x-axis
#' variable (e.g., carapace width).
#' @param yvar Name of column (integer or double) of measurements for the y-axis
#' variable (e.g., claw height).
#' @param upper Integer or double; the upper bound for possible SM50 values.
#' Must be on the same scale of the data. Defaults to the 80th percentile of
#' the x-variable.
#' @param lower Integer or double; the lower bound for possible SM50 values.
#' Must be on the same scale of the data. Defaults to the 20th percentile of
#' the x-variable.
#' @param verbose Should additional output be returned besides the SM50
#' estimate?
#' @param bps Should the values tested as possible breakpoints be restricted to
#' only observed values of the x-variable ("obs"), or should it be a specified
#' number of evenly-spaced values between the lower and upper limits of the
#' unknown region ("even", the default)
#' @param num_bps When `bps = "even"`, how many values should be tested as
#' possible endpoints? Defaults to 100, but should be increased.
#'
#' @returns If verbose is FALSE (the default), two possible estimates of SM50:
#' the breakpoint x-value marking the transition between immature and mature
#' points/lines, and the intersection point where the two lines cross. The
#' intersection value will typically be extremely unrealistic unless
#' the slopes of the lines are drastically different. If verbose is TRUE,
#' output is a list that also includes the original data with a column
#' representing which line (immature or mature) the point was assigned to, the
#' immature amd mature slope and intercept parameters, and the intersection
#' point of the two lines.
#' @export
#'
#' @examples
#' #' set.seed(12)
#' fc <- fake_crabs(n=100, L50=100, allo_params=c(1, 0.2, 1.1, 0.2))
#' two_line_stevens(fc, xvar="x", yvar="y", verbose = FALSE)
two_line_stevens <- function(dat,
xvar,
yvar,
lower = NULL,
upper = NULL,
verbose = FALSE,
bps = "even",
num_bps = 100) {
stevens <- dat %>% dplyr::arrange(.data[[xvar]])

xraw <- stevens[[xvar]]
yraw <- stevens[[yvar]]

if (is.null(lower)) {
lower <- stats::quantile(xraw, 0.2)
}

if (is.null(upper)) {
upper <- stats::quantile(xraw, 0.8)
}

left_x <- (xraw <= lower) # T/F vector
low_ndx <- sum(left_x) # largest group 1 point
right_x <- (xraw >= upper) # T/F vector
high_ndx <- (length(xraw) - sum(right_x)) + 1 # smallest group 2 point
min_x <- xraw[low_ndx] # lowest T value
min_y <- yraw[low_ndx] # lowest T value

stevens$xvar <- xraw
stevens$yvar <- yraw

lm0 <- stats::lm(yvar ~ xvar, data = stevens)
rss0 <- stats::anova(lm0)[[2, 2]] # residual sum of squares
ms0 <- stats::anova(lm0)[[3]] # mean squared error
F0 <- ms0[1] / ms0[2] # F value
n0 <- dim(stevens)[1]
rss_min <- rss0
mse0 <- mean(lm0$residuals ^ 2)

# assign group membership
# 1 = left line, 2= right line
memb <- rep(1, nrow(stevens))
memb_low <- (xraw <= min_x) # T/F list if less than low range
memb_high <- (yraw > min_y) # T/F list if GT than high range
memb[memb_low] <- 1 # assign 1 to those < low
memb[memb_high] <- 2 # assign 2 to those > high
memb_sum1 <- summary(as.factor(memb))
stevens$group <- memb

#### Loop

if (bps == "obs") {
mse <- rep(0, n0)

for (i in 1:n0) {
piecewise1 <- stats::lm(yvar ~ xvar * (xvar < xvar[i]) + xvar * (xvar >= xvar[i]), data =
stevens)
mse[i] <- mean(piecewise1$residuals ^ 2)
}

### find breakpoint (bp) that gives lowest MSE
bp_ind <- which(mse == min(mse))
bp <- stevens$xvar[bp_ind] # this is not necessarily where the lines cross

}

if (bps == "even") {
## determine increment for loop
steps <- seq(lower, upper, l = num_bps)

#### Loop
mse <- rep(0, num_bps)
for (i in 1:num_bps) {
piecewise1 <- stats::lm(yvar ~ xvar * (xvar < steps[i]) +
xvar * (xvar >= steps[i]), data = stevens)
mse[i] <- mean(piecewise1$residuals ^ 2)
}

### find breakpoint (bp) that gives lowest MSE
bp_ind <- which(mse == min(mse))
bp <- steps[bp_ind] # this is not necessarily where the lines cross
}

if (length(bp) > 1) {
bp <- stats::median(bp)
}

## rerun piecewise regression at best bp
piecewise2 <- stats::lm(yvar ~ xvar * (xvar < bp) + xvar * (xvar > bp), data =
stevens)

pw_vals <- stats::coef(piecewise2)
pw_vals[which(is.na(pw_vals))] <- 0
a_lo <- pw_vals[1] + pw_vals[3]
b_lo <- pw_vals[2] + pw_vals[5]
a_hi <- pw_vals[1] + pw_vals[4]
b_hi <- pw_vals[2]

jx <- as.numeric((a_lo - a_hi) / (b_hi - b_lo)) #the point where 2 lines meet
jy <- a_lo + b_lo * jx

#### Reassign group membership
memb_pw <- rep(1, n0)
memb_pw[stevens$xvar >= bp] <- 2
stevens$group <- memb_pw

output <- list(
data = stevens,
bp = bp,
jx = jx,
imm_slope = b_lo,
imm_int = a_lo,
mat_slope = b_hi,
mat_int = a_hi
)

if (verbose == TRUE) {
return(output)
}
else
return(c(breakpoint = bp, intersection = jx))

}
10 changes: 5 additions & 5 deletions man/broken_stick_stevens.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 55 additions & 0 deletions man/two_line_logistic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions man/two_line_stevens.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7b290ad

Please sign in to comment.