diff --git a/R/aggregateColor.R b/R/aggregateColor.R index f07a8bbd2..6940b1857 100644 --- a/R/aggregateColor.R +++ b/R/aggregateColor.R @@ -1,11 +1,94 @@ -## TODO: replace dlply and ddply with base functions -# x: SPC -# groups: groups to aggregate over, can be either site/horizon attribute, need not be a factor -# col: r-compatible hex notation of color -# k: number of groups to reduce colors to (within each group), conditioned on unique colors available -# profile_wt: weighting via site-level attribute -aggregateColor <- function(x, groups='genhz', col='soil_color', colorSpace = 'CIE2000', k=NULL, profile_wt=NULL) { + +#' @title Summarize Soil Colors +#' +#' @description Summarize soil color data, weighted by occurrence and horizon thickness. +#' +#' @param x a `SoilProfileCollection` object +#' @param groups the name of a horizon or site attribute used to group horizons, see examples +#' @param col the name of a horizon-level attribute with soil color specified in hexadecimal (i.e. "#rrggbb") +#' @param colorSpace the name of color space or color distance metric to use for conversion of aggregate colors to Munsell; either CIE2000 (color distance metric), LAB, or sRGB. Default = 'CIE2000' +#' @param k single integer specifying the number of colors discretized via PAM (cluster package), see details +#' @param profile_wt the name of a site-level attribute used to modify weighting, e.g. area +#' +#' @return A list with the following components: +#' +#' \item{scaled.data}{a `list` of colors and associated weights, one item for each generalized horizon label with at least one color specified in the source data} +#' \item{aggregate.data}{a `data.frame` of weighted-mean colors, one row for each generalized horizon label with at least one color specified in the source data} +#' +#' @export +#' +#' @details Weights are computed by: +#' `w_i = sqrt(sum(thickness_i)) * n_i` +#' where `w_i` is the weight associated with color `i`, `thickness_i` is the total thickness of all horizons associated with the color `i`, and `n_i` is the number of horizons associated with color `i`. Weights are computed within groups specified by `groups`. +#' +#' @author D.E. Beaudette +#' +#' @seealso \code{\link{generalize.hz}} +#' +#' +#' @examples +#' +#' # load some example data +#' data(sp1, package='aqp') +#' +#' # upgrade to SoilProfileCollection and convert Munsell colors +#' sp1$soil_color <- with(sp1, munsell2rgb(hue, value, chroma)) +#' depths(sp1) <- id ~ top + bottom +#' site(sp1) <- ~ group +#' +#' # generalize horizon names +#' n <- c('O', 'A', 'B', 'C') +#' p <- c('O', 'A', 'B', 'C') +#' sp1$genhz <- generalize.hz(sp1$name, n, p) +#' +#' # aggregate colors over horizon-level attribute: 'genhz' +#' a <- aggregateColor(sp1, groups = 'genhz', col = 'soil_color') +#' +#' # aggregate colors over site-level attribute: 'group' +#' a <- aggregateColor(sp1, groups = 'group', col = 'soil_color') +#' +#' # aggregate colors over site-level attribute: 'group' +#' # discretize colors to 4 per group +#' a <- aggregateColor(sp1, groups = 'group', col = 'soil_color', k = 4) +#' +#' # aggregate colors over depth-slices +#' s <- slice(sp1, c(5, 10, 15, 25, 50, 100, 150) ~ soil_color) +#' s$slice <- paste0(s$top, ' cm') +#' s$slice <- factor(s$slice, levels=guessGenHzLevels(s, 'slice')$levels) +#' a <- aggregateColor(s, groups = 'slice', col = 'soil_color') +#' +#' \dontrun{ +#' # optionally plot with helper function +#' if(require(sharpshootR)) +#' aggregateColorPlot(a) +#' } +#' +#' # a more interesting example +#' \dontrun{ +#' data(loafercreek, package = 'soilDB') +#' +#' # generalize horizon names using REGEX rules +#' n <- c('Oi', 'A', 'BA','Bt1','Bt2','Bt3','Cr','R') +#' p <- c('O', '^A$|Ad|Ap|AB','BA$|Bw', +#' 'Bt1$|^B$','^Bt$|^Bt2$','^Bt3|^Bt4|CBt$|BCt$|2Bt|2CB$|^C$','Cr','R') +#' loafercreek$genhz <- generalize.hz(loafercreek$hzname, n, p) +#' +#' # remove non-matching generalized horizon names +#' loafercreek$genhz[loafercreek$genhz == 'not-used'] <- NA +#' loafercreek$genhz <- factor(loafercreek$genhz) +#' +#' a <- aggregateColor(loafercreek, 'genhz') +#' +#' # plot results with helper function +#' par(mar=c(1,4,4,1)) +#' aggregateColorPlot(a, print.n.hz = TRUE) +#' +#' # inspect aggregate data +#' a$aggregate.data +#' } +#' +aggregateColor <- function(x, groups = 'genhz', col = 'soil_color', colorSpace = 'CIE2000', k = NULL, profile_wt = NULL) { # sanity check if(!is.null(k)) { @@ -76,81 +159,104 @@ aggregateColor <- function(x, groups='genhz', col='soil_color', colorSpace = 'CI h.no.na[[groups]] <- factor(h.no.na[[groups]]) # split by genhz + ss <- split(h.no.na, h.no.na[[groups]]) + + # iterate over groups # note that some genhz will have 0 records - s <- dlply(h.no.na, groups, function(i) { - + s <- lapply(ss, function(i) { + + # current group label + this.group <- i[[groups]][1] + # optionally reduce to `k` colors via PAM, by group # this will only work if there are >= 1 unique colors n.cols <- length(unique(i[[col]])) - + if(is.numeric(k) & n.cols > 1) { - + # condition number of classes on available data k.adj <- pmin(k, n.cols - 1) - + # work with a unique subset, there is no need to compute distances / cluster all colors lut <- data.frame(col=unique(i[[col]]), stringsAsFactors = FALSE) - + # same order as LUT # convert to sRGB # NA will create bogus colors, filtered above v <- t(col2rgb(lut$col) / 255) - + # convert to LAB v <- convertColor(v, from='sRGB', to='Lab', from.ref.white='D65', to.ref.white = 'D65', clip = FALSE) - + # compute perceptually based distance matrix on unique colors dE00 <- farver::compare_colour(v, v, from_space='lab', method='CIE2000', white_from='D65') dE00 <- as.dist(dE00) - - + + # clustering from distance matrix # TODO: save clustering results for later v.pam <- pam(dE00, k = k.adj, diss = TRUE, pamonce=5) - + # put clustering vector into LUT lut$cluster <- v.pam$clustering - + # get medoid colors med.col <- lut$col[v.pam$medoids] - + # expand original colors to medoid colors based on LUT idx <- base::match(i[[col]], lut$col) - + # replace original colors with discretized colors i[[col]] <- med.col[lut$cluster[idx]] } - + # aggregate depth by unique soil color # this assumes that thickness > 0, otherwise NaN is returned - res <- ddply(i, col, summarise, weight=sqrt(sum(thick)) * length(thick), n.hz=length(thick)) - - + # convert to data.table for summary + i <- as.data.table(i) + + # not sure about most readable style + res <- i[, + list( + weight = sqrt(sum(thick)) * length(thick), + n.hz = length(thick) + ), + by = col + ] + + # convert back to data.frame + res <- as.data.frame(res) + # sort by thickness-- this is our metric for relevance res <- res[order(res$weight, decreasing=TRUE), ] - + # back-calculate the closest Munsell color - m <- rgb2munsell(t(col2rgb(res[[col]])) / 255, colorSpace=colorSpace) - + m <- rgb2munsell(t(col2rgb(res[[col]])) / 255, colorSpace = colorSpace) + # format as text res$munsell <- paste0(m[, 1], ' ', m[, 2], '/', m[, 3]) - + + # add group ID + res[['.id']] <- this.group + return(res) }) + + # rescale using the sum of the weights within the current horizon s.scaled <- lapply(s, function(i) { i$weight <- i$weight / sum(i$weight) return(i) }) - + # compute weighted mean color for each group, in CIE LAB colorspace # TODO: this is similar to soilDB::estimateColorMixture(), consider consolidation # TODO: this is the second pass of color conversion, can it be done in a single pass? # TODO: should aggregate colors be mixed from the discretized colors? probably # TODO: color mixing should be performed using reflectance spectra - s.agg <- ldply(s.scaled, function(i) { + s.agg <- lapply(s.scaled, function(i) { # convert to sRGB v <- t(col2rgb(i[[col]])) / 255 @@ -173,11 +279,20 @@ aggregateColor <- function(x, groups='genhz', col='soil_color', colorSpace = 'CI wm.munsell <- rgb2munsell(wm, colorSpace = colorSpace) # consolidate and return - res <- data.frame(munsell=wm.munsell, col=wm.col, wm, n=nrow(i)) + res <- data.frame( + .id = i[['.id']][1], + munsell = wm.munsell, + col = wm.col, + wm, + n = nrow(i) + ) + return(res) }) + + s.agg <- do.call('rbind', s.agg) names(s.agg)[1] <- groups # return scaled color data - return(list(scaled.data=s.scaled, aggregate.data=s.agg)) + return(list(scaled.data = s.scaled, aggregate.data = s.agg)) } diff --git a/man/aggregateColor.Rd b/man/aggregateColor.Rd index aeee89bde..51dca226a 100644 --- a/man/aggregateColor.Rd +++ b/man/aggregateColor.Rd @@ -1,38 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregateColor.R \name{aggregateColor} \alias{aggregateColor} - \title{Summarize Soil Colors} -\description{Summarize soil color data, weighted by occurrence and horizon thickness.} \usage{ -aggregateColor(x, groups = "genhz", col = "soil_color", - colorSpace = 'CIE2000', k=NULL, profile_wt=NULL) +aggregateColor( + x, + groups = "genhz", + col = "soil_color", + colorSpace = "CIE2000", + k = NULL, + profile_wt = NULL +) } -%- maybe also 'usage' for other objects documented here. \arguments{ - \item{x}{a \code{SoilProfileCollection} object} - \item{groups}{the name of a horizon or site attribute used to group horizons, see examples} - \item{col}{the name of a horizon-level attribute with soil color specified in hexadecimal (i.e. "#rrggbb")} - \item{colorSpace}{the name of color space to use for conversion of aggregate colors to Munsell; either CIE2000, LAB, or sRGB. Default = 'CIE2000'} - \item{k}{single integer specifying the number of colors discretized via PAM, see details} - \item{profile_wt}{the name of a site-level attribute used to modify weighting, e.g. area} -} +\item{x}{a \code{SoilProfileCollection} object} -\value{A list with the following components: +\item{groups}{the name of a horizon or site attribute used to group horizons, see examples} - \item{scaled.data}{a list of colors and associated weights, one item for each generalized horizon label with at least one color specified in the source data} - \item{aggregate.data}{a data.frame of weighted-mean colors, one row for each generalized horizon label with at least one color specified in the source data} - -} +\item{col}{the name of a horizon-level attribute with soil color specified in hexadecimal (i.e. "#rrggbb")} -\details{Weights are computed by: -w_i = sqrt(sum(thickness_i)) * n_i -where w_i is the weight associated with color i, thickness_i is the total thickness of all horizons associated with the color i, and n_i is the number of horizons associated with color i. Weights are computed within groups specified by \code{groups}.} +\item{colorSpace}{the name of color space or color distance metric to use for conversion of aggregate colors to Munsell; either CIE2000 (color distance metric), LAB, or sRGB. Default = 'CIE2000'} -\author{D.E. Beaudette} +\item{k}{single integer specifying the number of colors discretized via PAM (cluster package), see details} -\seealso{\code{\link{generalize.hz}}} +\item{profile_wt}{the name of a site-level attribute used to modify weighting, e.g. area} +} +\value{ +A list with the following components: +\item{scaled.data}{a \code{list} of colors and associated weights, one item for each generalized horizon label with at least one color specified in the source data} +\item{aggregate.data}{a \code{data.frame} of weighted-mean colors, one row for each generalized horizon label with at least one color specified in the source data} +} +\description{ +Summarize soil color data, weighted by occurrence and horizon thickness. +} +\details{ +Weights are computed by: +\code{w_i = sqrt(sum(thickness_i)) * n_i} +where \code{w_i} is the weight associated with color \code{i}, \code{thickness_i} is the total thickness of all horizons associated with the color \code{i}, and \code{n_i} is the number of horizons associated with color \code{i}. Weights are computed within groups specified by \code{groups}. +} \examples{ + # load some example data data(sp1, package='aqp') @@ -63,36 +72,39 @@ s$slice <- factor(s$slice, levels=guessGenHzLevels(s, 'slice')$levels) a <- aggregateColor(s, groups = 'slice', col = 'soil_color') \dontrun{ -# optionally plot with helper function -if(require(sharpshootR)) - aggregateColorPlot(a) + # optionally plot with helper function + if(require(sharpshootR)) + aggregateColorPlot(a) } # a more interesting example \dontrun{ -data(loafercreek, package = 'soilDB') - -# generalize horizon names using REGEX rules -n <- c('Oi', 'A', 'BA','Bt1','Bt2','Bt3','Cr','R') -p <- c('O', '^A$|Ad|Ap|AB','BA$|Bw', -'Bt1$|^B$','^Bt$|^Bt2$','^Bt3|^Bt4|CBt$|BCt$|2Bt|2CB$|^C$','Cr','R') -loafercreek$genhz <- generalize.hz(loafercreek$hzname, n, p) - -# remove non-matching generalized horizon names -loafercreek$genhz[loafercreek$genhz == 'not-used'] <- NA -loafercreek$genhz <- factor(loafercreek$genhz) - -a <- aggregateColor(loafercreek, 'genhz') - -# plot results with helper function -par(mar=c(1,4,4,1)) -aggregateColorPlot(a, print.n.hz = TRUE) + data(loafercreek, package = 'soilDB') + + # generalize horizon names using REGEX rules + n <- c('Oi', 'A', 'BA','Bt1','Bt2','Bt3','Cr','R') + p <- c('O', '^A$|Ad|Ap|AB','BA$|Bw', + 'Bt1$|^B$','^Bt$|^Bt2$','^Bt3|^Bt4|CBt$|BCt$|2Bt|2CB$|^C$','Cr','R') + loafercreek$genhz <- generalize.hz(loafercreek$hzname, n, p) + + # remove non-matching generalized horizon names + loafercreek$genhz[loafercreek$genhz == 'not-used'] <- NA + loafercreek$genhz <- factor(loafercreek$genhz) + + a <- aggregateColor(loafercreek, 'genhz') + + # plot results with helper function + par(mar=c(1,4,4,1)) + aggregateColorPlot(a, print.n.hz = TRUE) + + # inspect aggregate data + a$aggregate.data +} -# inspect aggregate data -a$aggregate.data } +\seealso{ +\code{\link{generalize.hz}} +} +\author{ +D.E. Beaudette } -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory. -\keyword{manip} - diff --git a/tests/testthat/test-aggregateColor.R b/tests/testthat/test-aggregateColor.R index febf947a0..3382b487f 100644 --- a/tests/testthat/test-aggregateColor.R +++ b/tests/testthat/test-aggregateColor.R @@ -41,7 +41,7 @@ test_that("expected error conditions", { expect_error(aggregateColor(x, groups='genhz', col='soil_color', k=NA)) }) - +## TODO: add a couple more of these test_that("manual calculation using CIE2000 and LAB, single profile", { x <- sp1[1, ] @@ -69,3 +69,29 @@ test_that("manual calculation using CIE2000 and LAB, single profile", { expect_equal(test2, '10YR 3/2') } }) + + +test_that("manual calculation using CIE2000 and LAB, single profile", { + + data(sp3) + depths(sp3) <- id ~ top + bottom + + # single profile + x <- sp3[4, ] + # group all horizons together + site(x)$group <- 'A' + + # do the work + a <- aggregateColor(x, groups = 'group', col = 'soil_color', colorSpace = 'CIE2000') + + # manually verified + expect_true(a$aggregate.data$munsell.hue == '5YR') + expect_true(a$aggregate.data$munsell.value == '5') + expect_true(a$aggregate.data$munsell.chroma == '5') + + # TODO: verify weights + +}) + + +