From eb497f7b40af8b1714b05af2bf3b68af17c90837 Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 20 Jan 2021 12:23:42 -0800 Subject: [PATCH] checkHzDepthLogic: use data.table instead of profileApply #157 #174 --- R/checkHzDepthLogic.R | 139 +++++++++++++----- man/checkHzDepthLogic.Rd | 80 +++++----- misc/aqp2/man_deprecated/checkHzDepthLogic.Rd | 54 +++++++ 3 files changed, 198 insertions(+), 75 deletions(-) create mode 100644 misc/aqp2/man_deprecated/checkHzDepthLogic.Rd diff --git a/R/checkHzDepthLogic.R b/R/checkHzDepthLogic.R index 2d3ad31f7..b78cf621d 100644 --- a/R/checkHzDepthLogic.R +++ b/R/checkHzDepthLogic.R @@ -1,51 +1,116 @@ - -## related issues: -# ## https://github.com/ncss-tech/aqp/issues/65 - -## general-purpose hz depth logic check -# assumes that data are sorted ID, top ASC -# x: SoilProfileCollection object to check -checkHzDepthLogic <- function(x) { +#' Check a SoilProfileCollection object for errors in horizon depths. +#' +#' @description This function inspects a SoilProfileCollection object, looking for four common errors in horizon depths: +#' +#' 1. bottom depth shallower than top depth +#' 2. equal top and bottom depth +#' 3. missing top or bottom depth (e.g. `NA`) +#' 4. gap or overlap between adjacent horizons +#' +#' @param x SoilProfileCollection object to check +#' +#' @param fast If details about specific test results are not needed, the operation can allocate less memory and run approximately 5x faster. Default: `FALSE` +#' +#' @return A `data.frame` containing profile IDs, validity boolean (`valid`) and test results if `fast = FALSE`. +#' +#' The `data.frame` will have as many rows as profiles in `x` (`length(x)`). +#' +#' - `id` : Profile IDs, named according to `idname(x)` +#' - `valid` : boolean, profile passes all of the following tests +#' - `depthLogic` : boolean, errors related to depth logic +#' - `sameDepth` : boolean, errors related to same top/bottom depths +#' - `missingDepth` : boolean, NA in top / bottom depths +#' - `overlapOrGap` : boolean, gaps or overlap in adjacent horizons +#' +#' @export +#' @author D.E. Beaudette, A.G. Brown +#' @examples +#' +#' ## sample data +#' +#' data(sp3) +#' depths(sp3) <- id ~ top + bottom +#' +#' # these data should be clean +#' res <- checkHzDepthLogic(sp3) +#' +#' head(res) +#' +#' # less memory if only concerned about net validity +#' res <- checkHzDepthLogic(sp3, fast = TRUE) +#' +#' head(res) +#' +checkHzDepthLogic <- function(x, fast = FALSE) { - # used inside / outside of scope of .check() - htb <- horizonDepths(x) + stopifnot(inherits(x, 'SoilProfileCollection')) + + h <- data.table::as.data.table(horizons(x)) + hzd <- horizonDepths(x) idn <- idname(x) + hby <- substitute(idn) + top <- substitute(hzd[1]) + bottom <- substitute(hzd[2]) + + res <- NULL - .check <- function(i) { - # extract pieces - h <- horizons(i) + if (!fast) { - # convenience vars - ID.i <- h[[idn]][1] - .top <- h[[htb[1]]] - .bottom <- h[[htb[2]]] + res <- h[, .(tests = list(tests = data.frame(t(hzDepthTests(eval(top), eval(bottom)))))), by = c(eval(hby))][, + .(tests = tests, valid = all(!tests[[1]])), by = c(eval(hby))] - # hzTests takes two numeric vectors and returns named logical - test <- hzDepthTests(.top, .bottom) + res <- cbind(res, data.table::rbindlist(res$tests)) + res$tests <- NULL - # pack into DF, 1 row per profile - res <- data.frame( - .id=ID.i, - depthLogic=test[1], - sameDepth=test[2], - missingDepth=test[3], - overlapOrGap=test[4], - stringsAsFactors = FALSE - ) + } else { - # re-name .id -> idname(x) - names(res)[1] <- idn + res <- h[, all(!hzDepthTests(eval(top), eval(bottom))), by = c(eval(hby))] + colnames(res) <- c(idname(x), "valid") - return(res) } - # iterate over profiles, result is safely packed into a DF ready for splicing into @site - res <- profileApply(x, .check, simplify = FALSE, frameify = TRUE) + return(as.data.frame(res)) - # add 'valid' flag for simple filtering - res[['valid']] <- !apply(res[, -1], 1, any) - - return(res) + # + # # used inside / outside of scope of .check() + # htb <- horizonDepths(x) + # idn <- idname(x) + # + # .check <- function(i) { + # # extract pieces + # h <- horizons(i) + # + # # convenience vars + # ID.i <- h[[idn]][1] + # .top <- h[[htb[1]]] + # .bottom <- h[[htb[2]]] + # + # # hzTests takes two numeric vectors and returns named logical + # test <- hzDepthTests(.top, .bottom) + # + # # pack into DF, 1 row per profile + # res <- data.frame( + # .id=ID.i, + # depthLogic=test[1], + # sameDepth=test[2], + # missingDepth=test[3], + # overlapOrGap=test[4], + # stringsAsFactors = FALSE + # ) + # + # # re-name .id -> idname(x) + # names(res)[1] <- idn + # + # return(res) + # } + # + # # iterate over profiles, result is safely packed into a DF ready for splicing into @site + # res <- profileApply(x, .check, simplify = FALSE, frameify = TRUE) + # + # # add 'valid' flag for simple filtering + # res[['valid']] <- !apply(res[, -1], 1, any) + # + # return(res) } #' @title Tests of horizon depth logic diff --git a/man/checkHzDepthLogic.Rd b/man/checkHzDepthLogic.Rd index 2449321a3..e89f95298 100644 --- a/man/checkHzDepthLogic.Rd +++ b/man/checkHzDepthLogic.Rd @@ -1,54 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checkHzDepthLogic.R \name{checkHzDepthLogic} \alias{checkHzDepthLogic} -\alias{test_hz_logic} - -\title{Check a \code{SoilProfileCollection} object for errors in horizon depths.} -\description{This function inspects a \code{SoilProfileCollection} object, looking for 4 common errors in horizon depths: 1) bottom depths shallower than top depths, 2) equal top and bottom depths, 3) missing top or bottom depths (e.g. NA), and, 4) gaps or overlap between adjacent horizons.} - +\title{Check a SoilProfileCollection object for errors in horizon depths.} \usage{ -checkHzDepthLogic(x) +checkHzDepthLogic(x, fast = FALSE) } - \arguments{ - \item{x}{a \code{SoilProfileCollection} object} -} - -\details{This function replaces \code{test_hz_logic}, now marked as deprecated.} - -\value{A \code{data.frame} with as many rows as profiles in \code{x}. - \describe{ - \item{id}{Profile IDs, named according to \code{idname(x)}} - - \item{depthLogic}{boolean, errors related to depth logic} +\item{x}{SoilProfileCollection object to check} - \item{sameDepth}{boolean, errors related to same top/bottom depths} - - \item{missingDepth}{boolean, NA in top / bottom depths} - - \item{overlapOrGap}{boolean, gaps or overlap in adjacent horizons} - - \item{valid}{boolean, profile passes all tests} - } +\item{fast}{If details about specific test results are not needed, the operation can allocate less memory and run approximately 5x faster. Default: \code{FALSE}} +} +\value{ +A \code{data.frame} containing profile IDs, validity boolean (\code{valid}) and test results if \code{fast = FALSE}. + +The \code{data.frame} will have as many rows as profiles in \code{x} (\code{length(x)}). +\itemize{ +\item \code{id} : Profile IDs, named according to \code{idname(x)} +\item \code{valid} : boolean, profile passes all of the following tests +\itemize{ +\item \code{depthLogic} : boolean, errors related to depth logic +\item \code{sameDepth} : boolean, errors related to same top/bottom depths +\item \code{missingDepth} : boolean, NA in top / bottom depths +\item \code{overlapOrGap} : boolean, gaps or overlap in adjacent horizons +} +} +} +\description{ +This function inspects a SoilProfileCollection object, looking for four common errors in horizon depths: +\enumerate{ +\item bottom depth shallower than top depth +\item equal top and bottom depth +\item missing top or bottom depth (e.g. \code{NA}) +\item gap or overlap between adjacent horizons +} } - - -\author{D.E. Beaudette} - -\note{There is currently no simple way to fix errors identified by this function. Stay tuned for a \code{fixHzDepthErrors()}.} - - - - \examples{ ## sample data + data(sp3) depths(sp3) <- id ~ top + bottom - + # these data should be clean -(res <- checkHzDepthLogic(sp3)) +res <- checkHzDepthLogic(sp3) -} +head(res) -\keyword{ manip } +# less memory if only concerned about net validity +res <- checkHzDepthLogic(sp3, fast = TRUE) +head(res) + +} +\author{ +D.E. Beaudette, A.G. Brown +} diff --git a/misc/aqp2/man_deprecated/checkHzDepthLogic.Rd b/misc/aqp2/man_deprecated/checkHzDepthLogic.Rd new file mode 100644 index 000000000..2449321a3 --- /dev/null +++ b/misc/aqp2/man_deprecated/checkHzDepthLogic.Rd @@ -0,0 +1,54 @@ +\name{checkHzDepthLogic} +\alias{checkHzDepthLogic} +\alias{test_hz_logic} + +\title{Check a \code{SoilProfileCollection} object for errors in horizon depths.} +\description{This function inspects a \code{SoilProfileCollection} object, looking for 4 common errors in horizon depths: 1) bottom depths shallower than top depths, 2) equal top and bottom depths, 3) missing top or bottom depths (e.g. NA), and, 4) gaps or overlap between adjacent horizons.} + +\usage{ +checkHzDepthLogic(x) +} + +\arguments{ + \item{x}{a \code{SoilProfileCollection} object} +} + +\details{This function replaces \code{test_hz_logic}, now marked as deprecated.} + +\value{A \code{data.frame} with as many rows as profiles in \code{x}. + \describe{ + \item{id}{Profile IDs, named according to \code{idname(x)}} + + \item{depthLogic}{boolean, errors related to depth logic} + + \item{sameDepth}{boolean, errors related to same top/bottom depths} + + \item{missingDepth}{boolean, NA in top / bottom depths} + + \item{overlapOrGap}{boolean, gaps or overlap in adjacent horizons} + + \item{valid}{boolean, profile passes all tests} + } +} + + +\author{D.E. Beaudette} + +\note{There is currently no simple way to fix errors identified by this function. Stay tuned for a \code{fixHzDepthErrors()}.} + + + + +\examples{ + +## sample data +data(sp3) +depths(sp3) <- id ~ top + bottom + +# these data should be clean +(res <- checkHzDepthLogic(sp3)) + +} + +\keyword{ manip } +