From 799ec1517306441786e9303fb41782509a7549b0 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Thu, 21 Jan 2021 16:04:04 -0800 Subject: [PATCH] reshape examples/demo (#193) * sim: update docs; remove reshape example #157 * sp3 example data.table patch #157 * update aqp demo to use data.table #157 * docs * fix missing comment example --- R/permute_profile.R | 32 +++---- R/sim.R | 99 ++++++++++++++++++++- demo/aqp.R | 139 +++++++++++++++++++----------- man/permute_profile.Rd | 16 +++- man/sim.Rd | 109 +++++++++++++++-------- man/sp3.Rd | 190 ++++++++++++++++++++++------------------- 6 files changed, 391 insertions(+), 194 deletions(-) diff --git a/R/permute_profile.R b/R/permute_profile.R index 2f77986a2..e476b8fc4 100644 --- a/R/permute_profile.R +++ b/R/permute_profile.R @@ -7,7 +7,11 @@ #' @param min.thickness Minimum thickness of permuted horizons (default: 1) #' @param soildepth Depth below which horizon depths are not permuted (default: NULL) #' @param new.idname New column name to contain unique profile ID (default: pID) -#' @description This method is most "believable" when used to _gently_ permute the data, on the order of moving boundaries a few centimeters in either direction. The nice thing about it is it can leverage semi-quantitative (ordered factor) levels of boundary distinctness/topography for the upper and lower boundary of individual horizons, given a set of assumptions to convert classes to a "standard deviation" (see example). +#' +#' @description "Perturbs" the **boundary between horizons** using a standard deviation thickness. The thickness standard deviation corresponds roughly to the concept of "horizon boundary distinctness." This is arguably "easier" to parameterize from something like a single profile description where boundary distinctness classes (based on vertical distance of transition) are recorded for each horizon. +#' +#' @details +#'This method is most "believable" when used to _gently_ permute the data, on the order of moving boundaries a few centimeters in either direction. The nice thing about it is it can leverage semi-quantitative (ordered factor) levels of boundary distinctness/topography for the upper and lower boundary of individual horizons, given a set of assumptions to convert classes to a "standard deviation" (see example). #' #' If you imagine a normal curve with its mean centered on the vertical (depth axis) at a RV horizon depth. By the Empirical Rule for Normal distribution, two "standard deviations" above or below that RV depth represent 95% of the "volume" of the boundary. #' @@ -18,11 +22,15 @@ #' Future implementations may use boundary topography as a second hierarchical level (e.g. trig-based random functions), but think that distinctness captures the "uncertainty" about horizon separation at a specific "point" on the ground (or line in the profile quite well, and the extra variation may be hard to interpret, in general. #' #' @return A SoilProfileCollection with n permutations of p. +#' +#' @seealso [random_profile()] [sim()] [hzDistinctnessCodeToOffset()] +#' #' @export permute_profile #' @author Andrew G. Brown #' #' @examples -#' # # example with sp1 (using boundary distinctness) +#' +#' # example with sp1 (using boundary distinctness) #' data("sp1") #' depths(sp1) <- id ~ top + bottom #' @@ -41,19 +49,13 @@ #' #' quantile(sp1$bound_sd, na.rm = TRUE) #' p <- sp1[3] - -# # example with loafercreek (no boundaries) -# library(soilDB) -# data("loafercreek") -# # -# # # assume boundary sd is 1/12 midpoint of horizon depth -# # (i.e. generally increases/less well known with depth) -# # -# loafercreek <- mutate(loafercreek, midpt = (hzdepb - hzdept) / 2 + hzdept, -# # bound_sd = midpt / 12) -# quantile(loafercreek$bound_sd) -# p <- loafercreek[1] - +#' +#' # assume boundary sd is 1/12 midpoint of horizon depth +#' # (i.e. general relationship: SD increases (less well known) with depth) +#' sp1 <- mutate(sp1, midpt = (bottom - top) / 2 + top, bound_sd = midpt / 12) +#' quantile(sp1$bound_sd) +#' p <- sp1[1] +#' permute_profile <- function(p, n = 100, id = NULL, boundary.attr, min.thickness = 1, soildepth = NULL, new.idname = 'pID') { diff --git a/R/sim.R b/R/sim.R index 35ba88d99..9597b0b4e 100644 --- a/R/sim.R +++ b/R/sim.R @@ -1,4 +1,101 @@ -# function is more useful when supplied with a meaningful sd for each horizon +# +#' Simulate Soil Profiles +#' +#' @description Simulate a collection of soil profiles based on the horizonation of a single soil profile. +#' +#' The function is most useful when supplied with a meaningful standard deviation in thickness for each horizon -- which generally implies an aggregation of **several** profiles (say, using some generalized horizon pattern). +#' +#' This contrasts with a similar approach in [permute_profile()] -- which "perturbs" the **boundary between horizons** using a standard deviation of "horizon transition zone" thickness. This thickness standard deviation corresponds roughly to the concept of "horizon boundary distinctness." +#' +#' @param x a SoilProfileCollection object containing a single profile from which to draw simulated data +#' @param n the number of requested simulations +#' @param iterations sampling iterations used to determine each horizon thickness +#' @param hz.sd standard deviation used to simulate horizon thickness, can be a vector but must divide evenly into the number of horizons found in \code{x} +#' @param min.thick minimum horizon thickness allowed in simulation results +#' +#' @return A SoilProfileCollection object with \code{n} simulated profiles, each containing the same number of horizons and same data as \code{x} +#' +#' @details This function generates a collection of simulated soil profiles based on the horizon thickness data associated with a single "template" profile. Simulation is based on sampling from a family of gaussian distribution with means defined by the "template" profile and standard deviation defined by the user. +#' +#' @export +#' +#' @seealso \code{\link{random_profile}} \code{\link{permute_profile}} +#' +#' @author D. E. Beaudette +#' +#' @examples +#' +#' # load sample data and convert into SoilProfileCollection +#' data(sp3) +#' depths(sp3) <- id ~ top + bottom +#' +#' # select a profile to use as the basis for simulation +#' s <- sp3[3,] +#' +#' # reset horizon names +#' s$name <- paste('H', seq_along(s$name), sep = '') +#' +#' # simulate 25 new profiles, using 's' and function defaults +#' sim.1 <- sim(s, n = 25) +#' +#' # simulate 25 new profiles using 's' and variable SD for each horizon +#' sim.2 <- sim(s, n = 25, hz.sd = c(1, 2, 5, 5, 5, 10, 3)) +#' +#' # plot +#' par(mfrow = c(2, 1), mar = c(0, 0, 0, 0)) +#' plot(sim.1) +#' mtext( +#' 'SD = 2', +#' side = 2, +#' line = -1.5, +#' font = 2, +#' cex = 0.75 +#' ) +#' plot(sim.2) +#' mtext( +#' 'SD = c(1, 2, 5, 5, 5, 10, 3)', +#' side = 2, +#' line = -1.5, +#' font = 2, +#' cex = 0.75 +#' ) +#' +#' # aggregate horizonation of simulated data +#' # note: set class_prob_mode=2 as profiles were not defined to a constant depth +#' sim.2$name <- factor(sim.2$name) +#' a <- slab(sim.2, ~ name, class_prob_mode = 2) +#' +#' # convert to long format for plotting simplicity +#' library(data.table) +#' a.long <- +#' melt(a, +#' id.vars = c('top', 'bottom'), +#' measure.vars = levels(sim.2$name)) +#' +#' # plot horizon probabilities derived from simulated data +#' # dashed lines are the original horizon boundaries +#' library(lattice) +#' +#' xyplot( +#' top ~ value, +#' groups = variable, +#' data = a.long, +#' subset = value > 0, +#' ylim = c(100,-5), +#' type = c('l', 'g'), +#' asp = 1.5, +#' ylab = 'Depth (cm)', +#' xlab = 'Probability', +#' auto.key = list( +#' columns = 4, +#' lines = TRUE, +#' points = FALSE +#' ), +#' panel = function(...) { +#' panel.xyplot(...) +#' panel.abline(h = s$top, lty = 2, lwd = 2) +#' } +#' ) sim <- function(x, n=1, iterations=25, hz.sd=2, min.thick=2) { hd <- horizonDepths(x) diff --git a/demo/aqp.R b/demo/aqp.R index 3c626d05d..77fed1e20 100644 --- a/demo/aqp.R +++ b/demo/aqp.R @@ -1,19 +1,20 @@ ## -## main demo for AQP package-- a work in progress, largely just material from help files +## main demo for AQP package ## -# required packages -require(aqp) -require(ape) -require(cluster) -require(lattice) -require(reshape) +# load packages +library(aqp) +library(ape) +library(cluster) +library(lattice) +library(data.table) +# Example 1: sp1: 9 soil profiles from Pinnacles National Monument, CA. +data(sp1) # # 1. basic profile aggregation and plotting # -data(sp1) depths(sp1) <- id ~ top + bottom # aggregate all profiles into 1,5,10,20 cm thick slabs, computing mean values by slab @@ -29,53 +30,69 @@ s20 <- slab(sp1, ~ prop, slab.fun=mean, na.rm=TRUE, slab.structure=20) head(s1) # variation in segment-weighted mean property: very little -round(sapply( -list(s1, s5, s10, s20), -function(i) { - with(i, sum((bottom - top) * value) / sum(bottom - top)) - } -), 1) +round(sapply(list(s1, s5, s10, s20), + function(i) { + with(i, sum((bottom - top) * value) / sum(bottom - top)) + }), 1) # combined viz -g2 <- make.groups("1cm interval"=s1, "5cm interval"=s5, -"10cm interval"=s10, "20cm interval"=s20) +g2 <- make.groups( + "1cm interval" = s1, + "5cm interval" = s5, + "10cm interval" = s10, + "20cm interval" = s20 +) # note special syntax for plotting step function -xyplot(cbind(top,bottom) ~ value, groups=which, data=g2, id=g2$which, -panel=panel.depth_function, ylim=c(250,-10), -scales=list(y=list(tick.number=10)), xlab='Property', -ylab='Depth (cm)', main='Soil Profile Aggregation by Regular Depth-slice', -auto.key=list(columns=2, points=FALSE, lines=TRUE) +xyplot( + cbind(top, bottom) ~ value, + groups = which, + data = g2, + id = g2$which, + panel = panel.depth_function, + ylim = c(250, -10), + scales = list(y = list(tick.number = 10)), + xlab = 'Property', + ylab = 'Depth (cm)', + main = 'Soil Profile Aggregation by Regular Depth-slice', + auto.key = list( + columns = 2, + points = FALSE, + lines = TRUE + ) ) - +# Example 2: sp3: 10 soil profiles from the Sierra Nevada Foothills Region of California. +data(sp3) # # 2. investigate the concept of a 'median profile' # note that this involves aggregation between two dissimilar groups of soils # -data(sp3) # generate a RGB version of soil colors # and convert to HSV for aggregation -sp3$h <- NA ; sp3$s <- NA ; sp3$v <- NA -sp3.rgb <- with(sp3, munsell2rgb(hue, value, chroma, return_triplets=TRUE)) -sp3[, c('h','s','v')] <- t(with(sp3.rgb, rgb2hsv(r, g, b, maxColorValue=1))) +sp3$h <- NA +sp3$s <- NA +sp3$v <- NA +sp3.rgb <- with(sp3, munsell2rgb(hue, value, chroma, return_triplets = TRUE)) +sp3[, c('h', 's', 'v')] <- t(with(sp3.rgb, rgb2hsv(r, g, b, maxColorValue = 1))) # promote to SoilProfileCollection depths(sp3) <- id ~ top + bottom # aggregate across entire collection -a <- slab(sp3, fm= ~ clay + cec + ph + h + s + v, slab.structure=10) +a <- slab(sp3, + fm = ~ clay + cec + ph + h + s + v, + slab.structure = 10) # check str(a) # convert back to wide format -library(reshape) -a.wide.q25 <- cast(a, top + bottom ~ variable, value=c('p.q25')) -a.wide.q50 <- cast(a, top + bottom ~ variable, value=c('p.q50')) -a.wide.q75 <- cast(a, top + bottom ~ variable, value=c('p.q75')) +a.wide.q25 <- dcast(as.data.table(a), top + bottom ~ variable, value.var = c('p.q25')) +a.wide.q50 <- dcast(as.data.table(a), top + bottom ~ variable, value.var = c('p.q50')) +a.wide.q75 <- dcast(as.data.table(a), top + bottom ~ variable, value.var = c('p.q75')) # add a new id for the 25th, 50th, and 75th percentile pedons a.wide.q25$id <- 'Q25' @@ -83,33 +100,38 @@ a.wide.q50$id <- 'Q50' a.wide.q75$id <- 'Q75' # combine original data with "mean profile" -vars <- c('top','bottom','id','clay','cec','ph','h','s','v') +vars <- c('id', 'top', 'bottom', 'clay', 'cec', 'ph', 'h', 's', 'v') + # make data.frame version of sp3 -sp3.df <- as(sp3, 'data.frame') -sp3.grouped <- rbind( -sp3.df[, vars], a.wide.q25[, vars], a.wide.q50[, vars], a.wide.q75[, vars] -) +sp3.grouped <- as.data.frame(rbind(as.data.table(horizons(sp3))[, .SD, .SDcol = vars], + a.wide.q25[, .SD, .SDcol = vars], + a.wide.q50[, .SD, .SDcol = vars], + a.wide.q75[, .SD, .SDcol = vars])) # re-constitute the soil color from HSV triplets # convert HSV back to standard R colors sp3.grouped$soil_color <- with(sp3.grouped, hsv(h, s, v)) # give each horizon a name -sp3.grouped$name <- paste(round(sp3.grouped$clay), '/' , -round(sp3.grouped$cec), '/', round(sp3.grouped$ph,1)) - +sp3.grouped$name <- paste(round(sp3.grouped$clay), '/' , + round(sp3.grouped$cec), '/', + round(sp3.grouped$ph, 1)) +plot(sp3.grouped) ## perform comparison, and convert to phylo class object ## D is rescaled to [0,] -d <- profile_compare(sp3.grouped, vars=c('clay','cec','ph'), max_d=100, -k=0.01, replace_na=TRUE, add_soil_flag=TRUE, rescale.result=TRUE) - -require(cluster) -h <- agnes(d, method='ward') +d <- profile_compare(sp3.grouped, + vars = c('clay', 'cec', 'ph'), + max_d = 100, + k = 0.01, + replace_na = TRUE, + add_soil_flag = TRUE, + rescale.result = TRUE) + +h <- agnes(d, method = 'ward') p <- ladderize(as.phylo(as.hclust(h))) - # look at distance plot-- just the median profile plot_distance_graph(d, 12) @@ -121,9 +143,14 @@ round(1 - (as.matrix(d)[12, ] / max(as.matrix(d)[12, ])), 2) depths(sp3.grouped) <- id ~ top + bottom # setup plot: note that D has a scale of [0,1] -par(mar=c(1,1,1,1)) -p.plot <- plot(p, cex=0.8, label.offset=3, direction='up', y.lim=c(2,0), -x.lim=c(1.25,length(sp3.grouped)+1), show.tip.label=FALSE) +par(mar = c(1, 1, 1, 1)) +p.plot <- plot(p, + cex = 0.8, + label.offset = 3, + direction = 'up', + y.lim = c(2, 0), + x.lim = c(1.25, length(sp3.grouped) + 1), + show.tip.label = FALSE) # get the last plot geometry lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) @@ -132,10 +159,18 @@ lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) d.labels <- attr(d, 'Labels') new_order <- sapply(1:lastPP$Ntip, -function(i) which(as.integer(lastPP$xx[1:lastPP$Ntip]) == i)) + function(i) + which(as.integer(lastPP$xx[1:lastPP$Ntip]) == i)) # plot the profiles, in the ordering defined by the dendrogram # with a couple fudge factors to make them fit -plot(sp3.grouped, color="soil_color", plot.order=new_order, -scaling.factor=0.01, width=0.1, cex.names=0.5, -y.offset=max(lastPP$yy)+0.1, add=TRUE) +plotSPC( + sp3.grouped, + color = "soil_color", + plot.order = new_order, + scaling.factor = 0.01, + width = 0.1, + cex.names = 0.5, + y.offset = max(lastPP$yy) + 0.1, + add = TRUE +) diff --git a/man/permute_profile.Rd b/man/permute_profile.Rd index 2c524d480..c16b899d3 100644 --- a/man/permute_profile.Rd +++ b/man/permute_profile.Rd @@ -33,6 +33,9 @@ permute_profile( A SoilProfileCollection with n permutations of p. } \description{ +"Perturbs" the \strong{boundary between horizons} using a standard deviation thickness. The thickness standard deviation corresponds roughly to the concept of "horizon boundary distinctness." This is arguably "easier" to parameterize from something like a single profile description where boundary distinctness classes (based on vertical distance of transition) are recorded for each horizon. +} +\details{ This method is most "believable" when used to \emph{gently} permute the data, on the order of moving boundaries a few centimeters in either direction. The nice thing about it is it can leverage semi-quantitative (ordered factor) levels of boundary distinctness/topography for the upper and lower boundary of individual horizons, given a set of assumptions to convert classes to a "standard deviation" (see example). If you imagine a normal curve with its mean centered on the vertical (depth axis) at a RV horizon depth. By the Empirical Rule for Normal distribution, two "standard deviations" above or below that RV depth represent 95\% of the "volume" of the boundary. @@ -44,7 +47,8 @@ Of course, boundaries are not symmetrical and this is at best an approximation f Future implementations may use boundary topography as a second hierarchical level (e.g. trig-based random functions), but think that distinctness captures the "uncertainty" about horizon separation at a specific "point" on the ground (or line in the profile quite well, and the extra variation may be hard to interpret, in general. } \examples{ -# # example with sp1 (using boundary distinctness) + +# example with sp1 (using boundary distinctness) data("sp1") depths(sp1) <- id ~ top + bottom @@ -63,6 +67,16 @@ sp1$bound_sd[is.na(sp1$bound_sd)] <- 0 quantile(sp1$bound_sd, na.rm = TRUE) p <- sp1[3] + +# assume boundary sd is 1/12 midpoint of horizon depth +# (i.e. general relationship: SD increases (less well known) with depth) +sp1 <- mutate(sp1, midpt = (bottom - top) / 2 + top, bound_sd = midpt / 12) +quantile(sp1$bound_sd) +p <- sp1[1] + +} +\seealso{ +\code{\link[=random_profile]{random_profile()}} \code{\link[=sim]{sim()}} \code{\link[=hzDistinctnessCodeToOffset]{hzDistinctnessCodeToOffset()}} } \author{ Andrew G. Brown diff --git a/man/sim.Rd b/man/sim.Rd index dc3ebe8d0..9ba13f460 100644 --- a/man/sim.Rd +++ b/man/sim.Rd @@ -1,77 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sim.R \name{sim} \alias{sim} - \title{Simulate Soil Profiles} -\description{ - Simulate a collection of soil profiles based on the horizonation of a single soil profile. -} \usage{ - sim(x, n=1, iterations=25, hz.sd=2, min.thick=2) +sim(x, n = 1, iterations = 25, hz.sd = 2, min.thick = 2) } - \arguments{ - \item{x}{a SoilProfileCollection object containing a single profile from which to draw simulated data} - \item{n}{the number of requested simulations} - \item{iterations}{sampling iterations used to determine each horizon thickness} - \item{hz.sd}{standard deviation used to simulate horizon thickness, can be a vector but must divide evenly into the number of horizons found in \code{x}} - \item{min.thick}{minimum horizon thickness allowed in simulation results} -} +\item{x}{a SoilProfileCollection object containing a single profile from which to draw simulated data} +\item{n}{the number of requested simulations} + +\item{iterations}{sampling iterations used to determine each horizon thickness} + +\item{hz.sd}{standard deviation used to simulate horizon thickness, can be a vector but must divide evenly into the number of horizons found in \code{x}} + +\item{min.thick}{minimum horizon thickness allowed in simulation results} +} \value{ - A SoilProfileCollection object with \code{n} simulated profiles, each containing the same number of horizons and same data as \code{x}. +A SoilProfileCollection object with \code{n} simulated profiles, each containing the same number of horizons and same data as \code{x} +} +\description{ +Simulate a collection of soil profiles based on the horizonation of a single soil profile. + +The function is most useful when supplied with a meaningful standard deviation in thickness for each horizon -- which generally implies an aggregation of \strong{several} profiles (say, using some generalized horizon pattern). + +This contrasts with a similar approach in \code{\link[=permute_profile]{permute_profile()}} -- which "perturbs" the \strong{boundary between horizons} using a standard deviation of "horizon transition zone" thickness. This thickness standard deviation corresponds roughly to the concept of "horizon boundary distinctness." } \details{ This function generates a collection of simulated soil profiles based on the horizon thickness data associated with a single "template" profile. Simulation is based on sampling from a family of gaussian distribution with means defined by the "template" profile and standard deviation defined by the user. } - -\author{D. E. Beaudette} -\seealso{\code{\link{random_profile}}} \examples{ + # load sample data and convert into SoilProfileCollection data(sp3) depths(sp3) <- id ~ top + bottom # select a profile to use as the basis for simulation -s <- sp3[3, ] +s <- sp3[3,] # reset horizon names -s$name <- paste('H', seq_along(s$name), sep='') +s$name <- paste('H', seq_along(s$name), sep = '') # simulate 25 new profiles, using 's' and function defaults -sim.1 <- sim(s, n=25) +sim.1 <- sim(s, n = 25) # simulate 25 new profiles using 's' and variable SD for each horizon -sim.2 <- sim(s, n=25, hz.sd=c(1, 2, 5, 5, 5, 10, 3)) +sim.2 <- sim(s, n = 25, hz.sd = c(1, 2, 5, 5, 5, 10, 3)) # plot -par(mfrow=c(2,1), mar=c(0, 0, 0, 0)) +par(mfrow = c(2, 1), mar = c(0, 0, 0, 0)) plot(sim.1) -mtext('SD = 2', side=2, line=-1.5, font=2, cex=0.75) +mtext( + 'SD = 2', + side = 2, + line = -1.5, + font = 2, + cex = 0.75 +) plot(sim.2) -mtext('SD = c(1, 2, 5, 5, 5, 10, 3)', side=2, line=-1.5, font=2, cex=0.75) +mtext( + 'SD = c(1, 2, 5, 5, 5, 10, 3)', + side = 2, + line = -1.5, + font = 2, + cex = 0.75 +) # aggregate horizonation of simulated data # note: set class_prob_mode=2 as profiles were not defined to a constant depth sim.2$name <- factor(sim.2$name) -a <- slab(sim.2, ~ name, class_prob_mode=2) +a <- slab(sim.2, ~ name, class_prob_mode = 2) # convert to long format for plotting simplicity -library(reshape) -a.long <- melt(a, id.vars=c('top','bottom'), measure.vars=levels(sim.2$name)) +library(data.table) +a.long <- + melt(a, + id.vars = c('top', 'bottom'), + measure.vars = levels(sim.2$name)) # plot horizon probabilities derived from simulated data # dashed lines are the original horizon boundaries library(lattice) -xyplot(top ~ value, groups=variable, data=a.long, subset=value > 0, -ylim=c(100, -5), type=c('l','g'), asp=1.5, -ylab='Depth (cm)', xlab='Probability', -auto.key=list(columns=4, lines=TRUE, points=FALSE), -panel=function(...) { -panel.xyplot(...) -panel.abline(h=s$top, lty=2, lwd=2) -}) +xyplot( + top ~ value, + groups = variable, + data = a.long, + subset = value > 0, + ylim = c(100,-5), + type = c('l', 'g'), + asp = 1.5, + ylab = 'Depth (cm)', + xlab = 'Probability', + auto.key = list( + columns = 4, + lines = TRUE, + points = FALSE + ), + panel = function(...) { + panel.xyplot(...) + panel.abline(h = s$top, lty = 2, lwd = 2) + } +) +} +\seealso{ +\code{\link{random_profile}} \code{\link{permute_profile}} +} +\author{ +D. E. Beaudette } -\keyword{manip} - - diff --git a/man/sp3.Rd b/man/sp3.Rd index 7ce615006..58621f161 100644 --- a/man/sp3.Rd +++ b/man/sp3.Rd @@ -42,94 +42,108 @@ These data were collected to support research funded by the Kearney Foundation o ## this example investigates the concept of a "median profile" # required packages -if(require(ape) & require(cluster)) { - -data(sp3) - -# generate a RGB version of soil colors -# and convert to HSV for aggregation -sp3$h <- NA ; sp3$s <- NA ; sp3$v <- NA -sp3.rgb <- with(sp3, munsell2rgb(hue, value, chroma, return_triplets=TRUE)) -sp3[, c('h','s','v')] <- t(with(sp3.rgb, rgb2hsv(r, g, b, maxColorValue=1))) - -# promote to SoilProfileCollection -depths(sp3) <- id ~ top + bottom - -# aggregate across entire collection -a <- slab(sp3, fm= ~ clay + cec + ph + h + s + v, slab.structure=10) - -# check -str(a) - -# convert back to wide format -library(reshape) -a.wide.q25 <- cast(a, top + bottom ~ variable, value=c('p.q25')) -a.wide.q50 <- cast(a, top + bottom ~ variable, value=c('p.q50')) -a.wide.q75 <- cast(a, top + bottom ~ variable, value=c('p.q75')) - -# add a new id for the 25th, 50th, and 75th percentile pedons -a.wide.q25$id <- 'Q25' -a.wide.q50$id <- 'Q50' -a.wide.q75$id <- 'Q75' - -# combine original data with "mean profile" -vars <- c('top','bottom','id','clay','cec','ph','h','s','v') -# make data.frame version of sp3 -sp3.df <- as(sp3, 'data.frame') -sp3.grouped <- rbind( -sp3.df[, vars], a.wide.q25[, vars], a.wide.q50[, vars], a.wide.q75[, vars] -) - -# re-constitute the soil color from HSV triplets -# convert HSV back to standard R colors -sp3.grouped$soil_color <- with(sp3.grouped, hsv(h, s, v)) - -# give each horizon a name -sp3.grouped$name <- paste(round(sp3.grouped$clay), '/' , -round(sp3.grouped$cec), '/', round(sp3.grouped$ph,1)) - - - -## perform comparison, and convert to phylo class object -## D is rescaled to [0,] -d <- profile_compare(sp3.grouped, vars=c('clay','cec','ph'), max_d=100, -k=0.01, replace_na=TRUE, add_soil_flag=TRUE, rescale.result=TRUE) - - -h <- agnes(d, method='ward') -p <- ladderize(as.phylo(as.hclust(h))) - - -# look at distance plot-- just the median profile -plot_distance_graph(d, 12) - -# similarity relative to median profile (profile #12) -round(1 - (as.matrix(d)[12, ] / max(as.matrix(d)[12, ])), 2) - -## make dendrogram + soil profiles -# first promote to SoilProfileCollection -depths(sp3.grouped) <- id ~ top + bottom - -# setup plot: note that D has a scale of [0,1] -par(mar=c(1,1,1,1)) -p.plot <- plot(p, cex=0.8, label.offset=3, direction='up', y.lim=c(2,0), -x.lim=c(1.25,length(sp3.grouped)+1), show.tip.label=FALSE) - -# get the last plot geometry -lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) - -# the original labels, and new (indexed) order of pedons in dendrogram -d.labels <- attr(d, 'Labels') - -new_order <- sapply(1:lastPP$Ntip, -function(i) which(as.integer(lastPP$xx[1:lastPP$Ntip]) == i)) - -# plot the profiles, in the ordering defined by the dendrogram -# with a couple fudge factors to make them fit -plot(sp3.grouped, color="soil_color", plot.order=new_order, -scaling.factor=0.01, width=0.1, cex.names=0.5, -y.offset=max(lastPP$yy)+0.1, add=TRUE) - +if (require(ape) & require(cluster)) { + data(sp3) + + # generate a RGB version of soil colors + # and convert to HSV for aggregation + sp3$h <- NA + sp3$s <- NA + sp3$v <- NA + sp3.rgb <- with(sp3, munsell2rgb(hue, value, chroma, return_triplets = TRUE)) + + sp3[, c('h', 's', 'v')] <- t(with(sp3.rgb, rgb2hsv(r, g, b, maxColorValue = 1))) + + # promote to SoilProfileCollection + depths(sp3) <- id ~ top + bottom + + # aggregate across entire collection + a <- slab(sp3, fm = ~ clay + cec + ph + h + s + v, slab.structure = 10) + + # check + str(a) + + # convert back to wide format + library(data.table) + + a.wide.q25 <- dcast(a, top + bottom ~ variable, value.var = c('p.q25')) + a.wide.q50 <- dcast(a, top + bottom ~ variable, value.var = c('p.q50')) + a.wide.q75 <- dcast(a, top + bottom ~ variable, value.var = c('p.q75')) + + # add a new id for the 25th, 50th, and 75th percentile pedons + a.wide.q25$id <- 'Q25' + a.wide.q50$id <- 'Q50' + a.wide.q75$id <- 'Q75' + + # combine original data with "mean profile" + vars <- c('top', 'bottom', 'id', 'clay', 'cec', 'ph', 'h', 's', 'v') + # make data.frame version of sp3 + sp3.df <- as(sp3, 'data.frame') + sp3.grouped <- rbind(sp3.df[, vars], a.wide.q25[, vars], a.wide.q50[, vars], a.wide.q75[, vars]) + + # re-constitute the soil color from HSV triplets + # convert HSV back to standard R colors + sp3.grouped$soil_color <- with(sp3.grouped, hsv(h, s, v)) + + # give each horizon a name + sp3.grouped$name <- paste( + round(sp3.grouped$clay), + '/' , + round(sp3.grouped$cec), + '/', + round(sp3.grouped$ph, 1) + ) + + ## perform comparison, and convert to phylo class object + ## D is rescaled to [0,] + d <- profile_compare( + sp3.grouped, + vars = c('clay', 'cec', 'ph'), + max_d = 100, + k = 0.01, + replace_na = TRUE, + add_soil_flag = TRUE, + rescale.result = TRUE + ) + + h <- agnes(d, method = 'ward') + p <- ladderize(as.phylo(as.hclust(h))) + + # look at distance plot-- just the median profile + plot_distance_graph(d, 12) + + # similarity relative to median profile (profile #12) + round(1 - (as.matrix(d)[12,] / max(as.matrix(d)[12,])), 2) + + ## make dendrogram + soil profiles + # first promote to SoilProfileCollection + depths(sp3.grouped) <- id ~ top + bottom + + # setup plot: note that D has a scale of [0,1] + par(mar = c(1, 1, 1, 1)) + + # get the last plot geometry + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) + + # the original labels, and new (indexed) order of pedons in dendrogram + d.labels <- attr(d, 'Labels') + + new_order <- sapply(1:lastPP$Ntip, + function(i) + which(as.integer(lastPP$xx[1:lastPP$Ntip]) == i)) + + # plot the profiles, in the ordering defined by the dendrogram + # with a couple fudge factors to make them fit + plot( + sp3.grouped, + color = "soil_color", + plot.order = new_order, + scaling.factor = 0.01, + width = 0.1, + cex.names = 0.5, + y.offset = max(lastPP$yy) + 0.1, + add = TRUE + ) } }