Skip to content

Commit

Permalink
added dataframe option
Browse files Browse the repository at this point in the history
  • Loading branch information
mgimond committed Jun 5, 2023
1 parent d1ab950 commit 84f56c0
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 35 deletions.
52 changes: 33 additions & 19 deletions R/eda_dens.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
#' @description \code{eda_dens} generates overlapping density distributions for
#' two variables.
#'
#' @param x Vector for first variable.
#' @param y Vector for second variable.
#' @param x Vector for first variable or a dataframe.
#' @param y Vector for second variable or column defining the continuous
#' variable if \code{x} is a dataframe.
#' @param fac Column defining the grouping variable if \code{x} is a dataframe.
#' @param p Power transformation to apply to both sets of values.
#' @param tukey Boolean determining if a Tukey transformation should be adopted
#' (FALSE adopts a Box-Cox transformation).
Expand All @@ -21,8 +23,8 @@
#' @param alpha Fill transparency (0 = transparent, 1 = opaque). Only applicable
#' if \code{rgb()} is not used to define fill colors.
#' @param legend Boolean determining if a legend should be added to the plot.
#' @param xlab X label for output plot.
#' @param ylab Y label for output plot.
#' @param xlab X variable label. Ignored if \code{x} is a dataframe.
#' @param ylab Y variable label. Ignored if \code{x} is a dataframe.
#' @param ... Arguments passed to the \code{stats::density()} function.
#'
#' @details This function will generate overlapping density plots with the first
Expand All @@ -33,36 +35,48 @@
#'
#' @examples
#'
#' # Accepting data as two separate vector objects
#' # Passing data as two separate vector objects
#' set.seed(207)
#' x <- rbeta(1000,2,8)
#' y <- x * 1.5 + 0.1
#' eda_dens(x, y)
#'
#' # Passing data as a dataframe
#' dat <- data.frame(val = c(x, y),
#' grp = c(rep("x", length(x)), rep("y", length(y))))
#' eda_dens(dat, val, grp)



eda_dens <- function(x, y, p = 1L, tukey = FALSE, fx = NULL,
eda_dens <- function(x, y, fac = NULL, p = 1L, tukey = FALSE, fx = NULL,
fy = NULL, grey = 0.7, col = "red", size = 0.8,
alpha = 0.4, xlab = NULL, ylab = NULL, legend = TRUE, ...) {

# Parameters check
if (!is.numeric(x)) stop("X needs to be numeric")
if (!is.numeric(y)) stop("Y needs to be numeric")

# Define labels
if(is.null(xlab)){
xlab = substitute(x)
}
if(is.null(ylab)){
ylab = substitute(y)
# Extract data
if("data.frame" %in% class(x)){
val <- eval(substitute(y), x)
fac <- eval(substitute(fac), x)
g <- unique(fac)
if( length(g) != 2){
stop(paste("Column", fac, "has", length(g),
"unique values. It needs to have two exactly."))
}
x <- val[fac == g[1]]
y <- val[fac == g[2]]
xlab <- g[1]
ylab <- g[2]
} else {
if(is.null(xlab)){
xlab = substitute(x)
}
if(is.null(ylab)){
ylab = substitute(y)
}
}

# Re-express data if required
if (p != 1L) {
x <- eda_re(x, p = p, tukey = tukey)
y <- eda_re(y, p = p, tukey = tukey)
}


# Apply formula if present
if(!is.null(fx) & !is.null(fy))
Expand Down
57 changes: 41 additions & 16 deletions R/eda_qq.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
#' @description \code{eda_qq} generates an empirical QQ plot or a Tukey
#' mean-difference plot
#'
#' @param x Column assigned to the x axis.
#' @param y Column assigned to the y axis.
#' @param x Vector for first variable or a dataframe.
#' @param y Vector for second variable or column defining the continuous
#' variable if \code{x} is a dataframe.
#' @param fac Column defining the grouping variable if \code{x} is a dataframe.
#' @param p Power transformation to apply to both sets of values.
#' @param tukey Boolean determining if a Tukey transformation should be adopted
#' (FALSE adopts a Box-Cox transformation).
Expand All @@ -34,16 +36,16 @@
#' IQR. Two values are needed.
#' @param l.val Quantiles to define the quantile line parameters. Defaults to
#' the mid 75\% of values. Two values are needed.
#' @param xlab X label for output plot
#' @param ylab Y label for output plot
#' @param xlab X label for output plot. Ignored if \code{x} is a dataframe.
#' @param ylab Y label for output plot. Ignored if \code{x} is a dataframe.
#' @param ... Not used
#'
#' @details The QQ plot will displays the IQR via grey boxes for both x and y
#' values. The box widths can be changed via the \code{b.val} argument. The plot
#' will also display the mid 75\% of values via light colored dashed lines. The
#' line positions can be changed via the \code{l.val} argument. The middle
#' dashed line represents each batch's median value.
#' Console output prints the suggested multiplicative and additive offsets.
#' values. The box widths can be changed via the \code{b.val} argument. The
#' plot will also display the mid 75\% of values via light colored dashed
#' lines. The line positions can be changed via the \code{l.val} argument. The
#' middle dashed line represents each batch's median value. Console output
#' prints the suggested multiplicative and additive offsets.
#'
#'
#' @returns Returns a list with the following components:
Expand All @@ -60,8 +62,12 @@
#'
#' @examples
#'
#' # Example 1: Comparing "Tenor 1" and "Bass 2" singer height values
#' # Passing data as a dataframe
#' singer <- lattice::singer
#' dat <- singer[singer$voice.part %in% c("Bass 2", "Tenor 1"), ]
#' eda_qq(dat, height, voice.part)
#'
#' # Passing data as two separate vector objects
#' bass2 <- subset(singer, voice.part == "Bass 2", select = height, drop = TRUE )
#' tenor1 <- subset(singer, voice.part == "Tenor 1", select = height, drop = TRUE )
#'
Expand Down Expand Up @@ -99,18 +105,39 @@
#'


eda_qq <- function(x, y, p = 1L, q.type = 5, tukey = FALSE, md = FALSE,
fx = NULL, fy = NULL, plot = TRUE,
eda_qq <- function(x, y, fac = NULL, p = 1L, tukey = FALSE, md = FALSE,
q.type = 5, fx = NULL, fy = NULL, plot = TRUE,
grey = 0.6, pch = 21, p.col = "grey50", p.fill = "grey80",
size = 0.8, alpha = 0.8, q = TRUE, b.val = c(0.25,0.75),
l.val = c(0.125, 0.875), xlab = NULL, ylab = NULL, ...) {

# Parameters check
if (!is.numeric(x)) stop("X needs to be numeric")
if (!is.numeric(y)) stop("Y needs to be numeric")
if (length(b.val)!= 2) stop("The b.val argument must have two values.")
if (length(l.val)!= 2) stop("The b.val argument must have two values.")

# Extract data
if("data.frame" %in% class(x)){
val <- eval(substitute(y), x)
fac <- eval(substitute(fac), x)
g <- unique(fac)
if( length(g) != 2){
stop(paste("Column", fac, "has", length(g),
"unique values. It needs to have two exactly."))
}
x <- val[fac == g[1]]
y <- val[fac == g[2]]
xlab <- g[1]
ylab <- g[2]
} else {
if(is.null(xlab)){
xlab = substitute(x)
}
if(is.null(ylab)){
ylab = substitute(y)
}
}


# Define labels
if(is.null(xlab)){
xlab = substitute(x)
Expand All @@ -120,10 +147,8 @@ eda_qq <- function(x, y, p = 1L, q.type = 5, tukey = FALSE, md = FALSE,
}

# Re-express data if required
if (p != 1L){
x <- eda_re(x, p = p, tukey = tukey)
y <- eda_re(y, p = p, tukey = tukey)
}

# Get suggested multiplicative and additive offsets (off of original data)
zl <- qqplot(x, y, plot.it = FALSE, qtype = q.type)
Expand Down

0 comments on commit 84f56c0

Please sign in to comment.