diff --git a/R/knndm.R b/R/knndm.R index 8fe599c6..565d1244 100644 --- a/R/knndm.R +++ b/R/knndm.R @@ -5,7 +5,7 @@ #' #' @author Carles Milà and Jan Linnenbrink #' @param tpoints sf or sfc point object. Contains the training points samples. -#' @param modeldomain sf polygon object defining the prediction area. Optional; alternative to ppoints (see Details). +#' @param modeldomain sf polygon object or SpatRaster defining the prediction area. Optional; alternative to ppoints (see Details). #' @param ppoints sf or sfc point object. Contains the target prediction points. Optional; alternative to modeldomain (see Details). #' @param space character. Only "geographical" knndm, i.e. kNNDM in the geographical space, is currently implemented. #' @param k integer. Number of folds desired for CV. Defaults to 10. @@ -54,13 +54,12 @@ #' the `maxp` parameter; this may help to control for strong clustering (at the cost of having unbalanced folds). #' Secondly, decrease the number of final folds `k`, which may help to have larger clusters. #' -#' The `modeldomain` is a sf polygon that defines the prediction area. The function takes a regular point sample -#' (amount defined by `samplesize`) from the spatial extent. As an alternative use `ppoints` instead of -#' `modeldomain`, if you have already defined the prediction locations (e.g. raster pixel centroids). -#' When using either `modeldomain` or `ppoints`, we advise to plot the study area polygon and the -#' training/prediction points as a previous step to ensure they are aligned. +#' #' The `modeldomain` is either a sf polygon that defines the prediction area, or alternatively a SpatRaster out of which a polygon, +#' transformed into the CRS of the training points, is defined as the outline of all non-NA cells. +#' Then, the function takes a regular point sample (amount defined by `samplesize`) from the spatial extent. +#' As an alternative use `ppoints` instead of `modeldomain`, if you have already defined the prediction locations (e.g. raster pixel centroids). +#' When using either `modeldomain` or `ppoints`, we advise to plot the study area polygon and the training/prediction points as a previous step to ensure they are aligned. #' -#' @note Experimental cycle. Article describing and testing the algorithm in preparation. #' @references #' \itemize{ #' \item Linnenbrink, J., Milà, C., Ludwig, M., and Meyer, H.: kNNDM: k-fold Nearest Neighbour Distance Matching Cross-Validation for map accuracy estimation, EGUsphere [preprint], https://doi.org/10.5194/egusphere-2023-1308, 2023. @@ -139,13 +138,9 @@ #' pts <- st_as_sf(pts,coords=c("Easting","Northing")) #' st_crs(pts) <- 26911 #' studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST")) -#' studyArea[!is.na(studyArea)] <- 1 -#' studyArea <- as.polygons(studyArea, values = FALSE, na.all = TRUE) |> -#' st_as_sf() |> -#' st_union() #' pts <- st_transform(pts, crs = st_crs(studyArea)) -#' plot(studyArea) -#' plot(st_geometry(pts), add = TRUE, col = "red") +#' terra::plot(studyArea[["DEM"]]) +#' terra::plot(vect(pts), add = T) #' #' knndm_folds <- knndm(pts, modeldomain=studyArea, k = 5) #' knndm_folds @@ -173,12 +168,40 @@ knndm <- function(tpoints, modeldomain = NULL, ppoints = NULL, # create sample points from modeldomain if(is.null(ppoints)&!is.null(modeldomain)){ + + # Check modeldomain is indeed a sf/SpatRaster + if(!any(c("sfc", "sf", "SpatRaster") %in% class(modeldomain))){ + stop("modeldomain must be a sf/sfc object or a 'SpatRaster' object.") + } + + # If modeldomain is a SpatRaster, transform into polygon + if(any(class(modeldomain) == "SpatRaster")){ + modeldomain[!is.na(modeldomain)] <- 1 + modeldomain <- terra::as.polygons(modeldomain, values = FALSE, na.all = TRUE) |> + sf::st_as_sf() |> + sf::st_union() + modeldomain <- sf::st_transform(modeldomain, crs = sf::st_crs(tpoints)) + } + + # Check modeldomain is indeed a polygon sf + if(!any(class(sf::st_geometry(modeldomain)) %in% c("sfc_POLYGON", "sfc_MULTIPOLYGON"))){ + stop("modeldomain must be a sf/sfc polygon object.") + } + + # Check whether modeldomain has the same crs as tpoints if(!identical(sf::st_crs(tpoints), sf::st_crs(modeldomain))){ stop("tpoints and modeldomain must have the same CRS") } + + # We sample message(paste0(samplesize, " prediction points are sampled from the modeldomain")) ppoints <- sf::st_sample(x = modeldomain, size = samplesize, type = sampling) sf::st_crs(ppoints) <- sf::st_crs(modeldomain) + + }else if(!is.null(ppoints)){ + if(!identical(sf::st_crs(tpoints), sf::st_crs(ppoints))){ + stop("tpoints and ppoints must have the same CRS") + } } # Conditional preprocessing actions @@ -208,7 +231,6 @@ knndm <- function(tpoints, modeldomain = NULL, ppoints = NULL, } - # kNNDM checks check_knndm <- function(tpoints, ppoints, space, k, maxp, clustering, islonglat){ diff --git a/R/nndm.R b/R/nndm.R index 3cb45d42..194f16b1 100644 --- a/R/nndm.R +++ b/R/nndm.R @@ -4,9 +4,9 @@ #' indices to perform a NNDM LOO CV for map validation. #' @author Carles Milà #' @param tpoints sf or sfc point object. Contains the training points samples. -#' @param modeldomain sf polygon object or SpatRaster defining the prediction area (see Details). +#' @param modeldomain sf polygon object or SpatRaster defining the prediction area. Optional; alternative to ppoints (see Details). #' @param ppoints sf or sfc point object. Contains the target prediction points. -#' Optional. Alternative to modeldomain (see Details). +#' Optional. Optional; alternative to modeldomain (see Details). #' @param samplesize numeric. How many points in the modeldomain should be sampled as prediction points? #' Only required if modeldomain is used instead of ppoints. #' @param sampling character. How to draw prediction points from the modeldomain? See `sf::st_sample`. diff --git a/man/knndm.Rd b/man/knndm.Rd index 675401b5..eac89aa6 100644 --- a/man/knndm.Rd +++ b/man/knndm.Rd @@ -20,7 +20,7 @@ knndm( \arguments{ \item{tpoints}{sf or sfc point object. Contains the training points samples.} -\item{modeldomain}{sf polygon object defining the prediction area. Optional; alternative to ppoints (see Details).} +\item{modeldomain}{sf polygon object or SpatRaster defining the prediction area. Optional; alternative to ppoints (see Details).} \item{ppoints}{sf or sfc point object. Contains the target prediction points. Optional; alternative to modeldomain (see Details).} @@ -82,14 +82,11 @@ configuration still show signs of Gj* > Gij, there are several things that can b the `maxp` parameter; this may help to control for strong clustering (at the cost of having unbalanced folds). Secondly, decrease the number of final folds `k`, which may help to have larger clusters. -The `modeldomain` is a sf polygon that defines the prediction area. The function takes a regular point sample -(amount defined by `samplesize`) from the spatial extent. As an alternative use `ppoints` instead of -`modeldomain`, if you have already defined the prediction locations (e.g. raster pixel centroids). -When using either `modeldomain` or `ppoints`, we advise to plot the study area polygon and the -training/prediction points as a previous step to ensure they are aligned. -} -\note{ -Experimental cycle. Article describing and testing the algorithm in preparation. +#' The `modeldomain` is either a sf polygon that defines the prediction area, or alternatively a SpatRaster out of which a polygon, +transformed into the CRS of the training points, is defined as the outline of all non-NA cells. +Then, the function takes a regular point sample (amount defined by `samplesize`) from the spatial extent. +As an alternative use `ppoints` instead of `modeldomain`, if you have already defined the prediction locations (e.g. raster pixel centroids). +When using either `modeldomain` or `ppoints`, we advise to plot the study area polygon and the training/prediction points as a previous step to ensure they are aligned. } \examples{ ######################################################################## @@ -161,13 +158,9 @@ pts <- dat[,-1] pts <- st_as_sf(pts,coords=c("Easting","Northing")) st_crs(pts) <- 26911 studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST")) -studyArea[!is.na(studyArea)] <- 1 -studyArea <- as.polygons(studyArea, values = FALSE, na.all = TRUE) |> - st_as_sf() |> - st_union() pts <- st_transform(pts, crs = st_crs(studyArea)) -plot(studyArea) -plot(st_geometry(pts), add = TRUE, col = "red") +terra::plot(studyArea[["DEM"]]) +terra::plot(vect(pts), add = T) knndm_folds <- knndm(pts, modeldomain=studyArea, k = 5) knndm_folds diff --git a/man/nndm.Rd b/man/nndm.Rd index e4aacc78..dd355c4e 100644 --- a/man/nndm.Rd +++ b/man/nndm.Rd @@ -17,10 +17,10 @@ nndm( \arguments{ \item{tpoints}{sf or sfc point object. Contains the training points samples.} -\item{modeldomain}{sf polygon object or SpatRaster defining the prediction area (see Details).} +\item{modeldomain}{sf polygon object or SpatRaster defining the prediction area. Optional; alternative to ppoints (see Details).} \item{ppoints}{sf or sfc point object. Contains the target prediction points. -Optional. Alternative to modeldomain (see Details).} +Optional. Optional; alternative to modeldomain (see Details).} \item{samplesize}{numeric. How many points in the modeldomain should be sampled as prediction points? Only required if modeldomain is used instead of ppoints.} diff --git a/tests/testthat/test-knndm.R b/tests/testthat/test-knndm.R index 488730f4..8ec5b4da 100644 --- a/tests/testthat/test-knndm.R +++ b/tests/testthat/test-knndm.R @@ -186,3 +186,20 @@ test_that("kNNDM recognizes erroneous input", { clustering="kmeans")) }) +test_that("kNNDM yields the expected results with SpatRast modeldomain", { + + set.seed(1234) + + # prepare sample data + dat <- readRDS(system.file("extdata","Cookfarm.RDS",package="CAST")) + dat <- terra::aggregate(dat[,c("DEM","TWI", "NDRE.M", "Easting", "Northing","VW")], + by=list(as.character(dat$SOURCEID)),mean) + pts <- dat[,-1] + pts <- sf::st_as_sf(pts,coords=c("Easting","Northing")) + sf::st_crs(pts) <- 26911 + studyArea <- terra::rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST")) + pts <- sf::st_transform(pts, crs = sf::st_crs(studyArea)) + + knndm_folds <- knndm(pts, modeldomain = studyArea) + expect_equal(as.numeric(knndm(pts, modeldomain = studyArea)$Gjstar[40]), 61.935505) +})