From 0ac8b1a1105aa05cba8a9efde1d0d278f96990a7 Mon Sep 17 00:00:00 2001 From: carlesmila Date: Mon, 11 Mar 2024 15:24:48 +0100 Subject: [PATCH] NNDM modeldomain as SpatRaster (+ updates in docs) --- R/nndm.R | 43 +++++++++++++++++++++++--------------- man/nndm.Rd | 23 +++++++++----------- tests/testthat/test-nndm.R | 23 ++++++++++++++++++-- 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/R/nndm.R b/R/nndm.R index 91867198..3cb45d42 100644 --- a/R/nndm.R +++ b/R/nndm.R @@ -4,7 +4,7 @@ #' indices to perform a NNDM LOO CV for map validation. #' @author Carles MilĂ  #' @param tpoints sf or sfc point object. Contains the training points samples. -#' @param modeldomain sf polygon object defining the prediction area (see Details). +#' @param modeldomain sf polygon object or SpatRaster defining the prediction area (see Details). #' @param ppoints sf or sfc point object. Contains the target prediction points. #' Optional. Alternative to modeldomain (see Details). #' @param samplesize numeric. How many points in the modeldomain should be sampled as prediction points? @@ -34,12 +34,13 @@ #' When \emph{phi} is set to "max", nearest neighbor distance matching is performed for the entire prediction area. Euclidean distances are used for projected #' and non-defined CRS, great circle distances are used for geographic CRS (units in meters). #' -#' The \emph{modeldomain} is a sf polygon that defines the prediction area. The function takes a regular point sample (amount defined by \emph{samplesize)} from the spatial extent. +#' The \emph{modeldomain} is either a sf polygon that defines the prediction area, or alternatively a SpatRaster out of which a polygon, +#' transformed into the CRS of the training points, is defined as the outline of all non-NA cells. +#' Then, the function takes a regular point sample (amount defined by \emph{samplesize)} from the spatial extent. #' As an alternative use \emph{ppoints} instead of \emph{modeldomain}, if you have already defined the prediction locations (e.g. raster pixel centroids). #' When using either \emph{modeldomain} or \emph{ppoints}, we advise to plot the study area polygon and the training/prediction points as a previous step to ensure they are aligned. #' -#' @note NNDM is a variation of LOOCV and therefore may take a long time for large training data sets. -#' A k-fold variant will be implemented shortly. +#' @note NNDM is a variation of LOOCV and therefore may take a long time for large training data sets. See \code{\link{knndm}} for a more efficient k-fold variant of the method. #' @seealso \code{\link{geodist}}, \code{\link{knndm}} #' @references #' \itemize{ @@ -97,8 +98,8 @@ #' plot(nndm_pred) #' #' ######################################################################## -#' # Example 3: Real- world example; using a modeldomain instead of previously -#' # sampled prediction locations +#' # Example 3: Real- world example; using a SpatRast modeldomain instead +#' # of previously sampled prediction locations #' ######################################################################## #' \dontrun{ #' library(sf) @@ -112,15 +113,11 @@ #' pts <- st_as_sf(pts,coords=c("Easting","Northing")) #' st_crs(pts) <- 26911 #' studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST")) -#' studyArea[!is.na(studyArea)] <- 1 -#' studyArea <- as.polygons(studyArea, values = FALSE, na.all = TRUE) |> -#' st_as_sf() |> -#' st_union() #' pts <- st_transform(pts, crs = st_crs(studyArea)) -#' plot(studyArea) -#' plot(st_geometry(pts), add = TRUE, col = "red") +#' terra::plot(studyArea[["DEM"]]) +#' terra::plot(vect(pts), add = T) #' -#' nndm_folds <- nndm(pts, modeldomain= studyArea) +#' nndm_folds <- nndm(pts, modeldomain = studyArea) #' plot(nndm_folds) #' #' #use for cross-validation: @@ -144,10 +141,22 @@ nndm <- function(tpoints, modeldomain = NULL, ppoints = NULL, samplesize = 1000, # create sample points from modeldomain if(is.null(ppoints)&!is.null(modeldomain)){ - # Check modeldomain is indeed a polygon - if(!any(c("sfc", "sf") %in% class(modeldomain))){ - stop("modeldomain must be a sf/sfc object.") - }else if(!any(class(sf::st_geometry(modeldomain)) %in% c("sfc_POLYGON", "sfc_MULTIPOLYGON"))){ + # Check modeldomain is indeed a sf/SpatRaster + if(!any(c("sfc", "sf", "SpatRaster") %in% class(modeldomain))){ + stop("modeldomain must be a sf/sfc object or a 'SpatRaster' object.") + } + + # If modeldomain is a SpatRaster, transform into polygon + if(any(class(modeldomain) == "SpatRaster")){ + modeldomain[!is.na(modeldomain)] <- 1 + modeldomain <- terra::as.polygons(modeldomain, values = FALSE, na.all = TRUE) |> + sf::st_as_sf() |> + sf::st_union() + modeldomain <- sf::st_transform(modeldomain, crs = sf::st_crs(tpoints)) + } + + # Check modeldomain is indeed a polygon sf + if(!any(class(sf::st_geometry(modeldomain)) %in% c("sfc_POLYGON", "sfc_MULTIPOLYGON"))){ stop("modeldomain must be a sf/sfc polygon object.") } diff --git a/man/nndm.Rd b/man/nndm.Rd index b0a2d21b..e4aacc78 100644 --- a/man/nndm.Rd +++ b/man/nndm.Rd @@ -17,7 +17,7 @@ nndm( \arguments{ \item{tpoints}{sf or sfc point object. Contains the training points samples.} -\item{modeldomain}{sf polygon object defining the prediction area (see Details).} +\item{modeldomain}{sf polygon object or SpatRaster defining the prediction area (see Details).} \item{ppoints}{sf or sfc point object. Contains the target prediction points. Optional. Alternative to modeldomain (see Details).} @@ -58,13 +58,14 @@ Distances are only matched up to \emph{phi}. Beyond that range, all data points When \emph{phi} is set to "max", nearest neighbor distance matching is performed for the entire prediction area. Euclidean distances are used for projected and non-defined CRS, great circle distances are used for geographic CRS (units in meters). -The \emph{modeldomain} is a sf polygon that defines the prediction area. The function takes a regular point sample (amount defined by \emph{samplesize)} from the spatial extent. +The \emph{modeldomain} is either a sf polygon that defines the prediction area, or alternatively a SpatRaster out of which a polygon, +transformed into the CRS of the training points, is defined as the outline of all non-NA cells. +Then, the function takes a regular point sample (amount defined by \emph{samplesize)} from the spatial extent. As an alternative use \emph{ppoints} instead of \emph{modeldomain}, if you have already defined the prediction locations (e.g. raster pixel centroids). When using either \emph{modeldomain} or \emph{ppoints}, we advise to plot the study area polygon and the training/prediction points as a previous step to ensure they are aligned. } \note{ -NNDM is a variation of LOOCV and therefore may take a long time for large training data sets. -A k-fold variant will be implemented shortly. +NNDM is a variation of LOOCV and therefore may take a long time for large training data sets. See \code{\link{knndm}} for a more efficient k-fold variant of the method. } \examples{ ######################################################################## @@ -116,8 +117,8 @@ nndm_pred plot(nndm_pred) ######################################################################## -# Example 3: Real- world example; using a modeldomain instead of previously -# sampled prediction locations +# Example 3: Real- world example; using a SpatRast modeldomain instead +# of previously sampled prediction locations ######################################################################## \dontrun{ library(sf) @@ -131,15 +132,11 @@ pts <- dat[,-1] pts <- st_as_sf(pts,coords=c("Easting","Northing")) st_crs(pts) <- 26911 studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST")) -studyArea[!is.na(studyArea)] <- 1 -studyArea <- as.polygons(studyArea, values = FALSE, na.all = TRUE) |> - st_as_sf() |> - st_union() pts <- st_transform(pts, crs = st_crs(studyArea)) -plot(studyArea) -plot(st_geometry(pts), add = TRUE, col = "red") +terra::plot(studyArea[["DEM"]]) +terra::plot(vect(pts), add = T) -nndm_folds <- nndm(pts, modeldomain= studyArea) +nndm_folds <- nndm(pts, modeldomain = studyArea) plot(nndm_folds) #use for cross-validation: diff --git a/tests/testthat/test-nndm.R b/tests/testthat/test-nndm.R index 3b69d4de..9d6aad08 100644 --- a/tests/testthat/test-nndm.R +++ b/tests/testthat/test-nndm.R @@ -37,7 +37,7 @@ test_that("NNDM detects wrong data and geometry types", { # model domain expect_error(suppressWarnings(nndm(tpoints_sfc, modeldomain = 1)), - "modeldomain must be a sf/sfc object.") + "modeldomain must be a sf/sfc object or a 'SpatRaster' object.") expect_error(nndm(tpoints_sfc, modeldomain = ppoints_sfc), "modeldomain must be a sf/sfc polygon object.") }) @@ -106,5 +106,24 @@ test_that("NNDM yields the expected results for all CRS", { # Geographic tpoints_sf_4326 <- sf::st_set_crs(tpoints_sfc, 4326) ppoints_sf_4326 <- sf::st_set_crs(ppoints_sfc, 4326) - expect_equal(as.numeric(nndm(tpoints_sf_4326, ppoints = ppoints_sf_4326, phi = 1000000)$Gjstar[20], 4), 355614.94) + expect_equal(as.numeric(nndm(tpoints_sf_4326, ppoints = ppoints_sf_4326, phi = 1000000)$Gjstar[20]), 355614.94) +}) + +test_that("NNDM yields the expected results with SpatRast modeldomain", { + + set.seed(1234) + + # prepare sample data + dat <- readRDS(system.file("extdata","Cookfarm.RDS",package="CAST")) + dat <- terra::aggregate(dat[,c("DEM","TWI", "NDRE.M", "Easting", "Northing","VW")], + by=list(as.character(dat$SOURCEID)),mean) + pts <- dat[,-1] + pts <- sf::st_as_sf(pts,coords=c("Easting","Northing")) + sf::st_crs(pts) <- 26911 + studyArea <- terra::rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST")) + pts <- sf::st_transform(pts, crs = sf::st_crs(studyArea)) + + nndm_folds <- nndm(pts, modeldomain = studyArea, phi = 150) + expect_equal(as.numeric(nndm(pts, modeldomain = studyArea, phi = 150)$Gjstar[5]), 63.828663) + })