diff --git a/NEWS.md b/NEWS.md index 9b329cf5..4d1cf9e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # Version 1.3-2 (development) +* #139 add `na.action` argument to `geary.test`, `geary.mc` and `globalG.test` + * add `style` to `sn2listw` use in `tri2nb` # Version 1.3-1 (2023-11-23) diff --git a/R/geary.R b/R/geary.R index e8093d44..383e5e05 100644 --- a/R/geary.R +++ b/R/geary.R @@ -1,4 +1,4 @@ -# Copyright 2001-9, 2021 by Roger Bivand +# Copyright 2001-24 by Roger Bivand # @@ -7,6 +7,7 @@ geary <- function(x, listw, n, n1, S0, zero.policy=attr(listw, "zero.policy")) { zero.policy <- get("zeroPolicy", envir = .spdepOptions) stopifnot(is.logical(zero.policy)) stopifnot(is.vector(x)) + stopifnot(all(is.finite(x))) # z <- scale(x, scale=FALSE) z <- scale(x) zz <- sum(z^2) @@ -34,16 +35,24 @@ geary.intern <- function(x, listw, n, zero.policy=attr(listw, "zero.policy"), ty } geary.test <- function(x, listw, randomisation=TRUE, zero.policy=attr(listw, "zero.policy"), - alternative="greater", spChk=NULL, adjust.n=TRUE) { + alternative="greater", spChk=NULL, adjust.n=TRUE, na.action=na.fail) { if (is.null(zero.policy)) zero.policy <- get("zeroPolicy", envir = .spdepOptions) stopifnot(is.logical(zero.policy)) alternative <- match.arg(alternative, c("less", "greater", "two.sided")) - if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), - "is not a listw object")) - if(!is.numeric(x)) stop(paste(deparse(substitute(x)), - "is not a numeric vector")) - if (any(is.na(x))) stop("NA in X") + wname <- deparse(substitute(listw)) + if (!inherits(listw, "listw")) stop(wname, "is not a listw object") + xname <- deparse(substitute(x)) + if(!is.numeric(x)) stop(xname, + " is not a numeric vector") + if (deparse(substitute(na.action)) == "na.pass") + stop("na.pass not permitted") + x <- na.action(x) + na.act <- attr(x, "na.action") + if (!is.null(na.act)) { + subset <- !(1:length(listw$neighbours) %in% na.act) + listw <- subset(listw, subset, zero.policy=zero.policy) + } n <- length(listw$neighbours) if (n != length(x)) stop("objects of different length") if (is.null(spChk)) spChk <- get.spChkOption() @@ -52,9 +61,9 @@ geary.test <- function(x, listw, randomisation=TRUE, zero.policy=attr(listw, "ze wc <- spweights.constants(listw, zero.policy, adjust.n=adjust.n) S02 <- wc$S0*wc$S0 - res <- geary(x, listw, wc$n, wc$n1, wc$S0, zero.policy) + res <- geary(as.vector(x), listw, wc$n, wc$n1, wc$S0, zero.policy) C <- res$C - if (is.na(C)) stop("NAs generated in geary - check zero.policy") + if (is.na(C)) stop("NAs generated in geary - check zero.policy and na.action") K <- res$K EC <- 1 if(randomisation) { @@ -85,29 +94,43 @@ geary.test <- function(x, listw, randomisation=TRUE, zero.policy=attr(listw, "ze names(vec) <- c("Geary C statistic", "Expectation", "Variance") method <- paste("Geary C test under", ifelse(randomisation, "randomisation", "normality")) - data.name <- paste(deparse(substitute(x)), "\nweights:", - deparse(substitute(listw)), "\n") + data.name <- paste(xname, "\nweights:", wname, + ifelse(is.null(na.act), "", paste("\nomitted:", + paste(na.act, collapse=", "))), + ifelse(adjust.n && isTRUE(any(sum(card(listw$neighbours) == 0L))), + "\nn reduced by no-neighbour observations", ""), "\n") res <- list(statistic=statistic, p.value=PrC, estimate=vec, alternative=ifelse(alternative == "two.sided", alternative, paste("Expectation", alternative, "than statistic")), method=method, data.name=data.name) + if (!is.null(na.act)) attr(res, "na.action") <- na.act class(res) <- "htest" res } geary.mc <- function(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), - alternative="greater", spChk=NULL, adjust.n=TRUE, return_boot=FALSE) { + alternative="greater", spChk=NULL, adjust.n=TRUE, return_boot=FALSE, + na.action=na.fail) { if (is.null(zero.policy)) zero.policy <- get("zeroPolicy", envir = .spdepOptions) stopifnot(is.logical(zero.policy)) stopifnot(is.vector(x)) alternative <- match.arg(alternative, c("less", "greater", "two.sided")) - if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), - "is not a listw object")) - if(!is.numeric(x)) stop(paste(deparse(substitute(x)), - "is not a numeric vector")) + wname <- deparse(substitute(listw)) + if (!inherits(listw, "listw")) stop(wname, "is not a listw object") + xname <- deparse(substitute(x)) + if(!is.numeric(x)) stop(xname, " is not a numeric vector") if(missing(nsim)) stop("nsim must be given") - if (any(is.na(x))) stop("NA in X") + if (deparse(substitute(na.action)) == "na.pass") + stop("na.pass not permitted") + x <- na.action(x) + na.act <- attr(x, "na.action") + if (!is.null(na.act)) { + subset <- !(1:length(listw$neighbours) %in% na.act) + listw <- subset(listw, subset, zero.policy=zero.policy) + if (return_boot) + message("NA observations omitted: ", paste(na.act, collapse=", ")) + } n <- length(listw$neighbours) if (n != length(x)) stop("objects of different length") if (is.null(spChk)) spChk <- get.spChkOption() @@ -134,7 +157,7 @@ geary.mc <- function(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), res <- numeric(length=nsim+1) for (i in 1:nsim) res[i] <- geary(sample(x), listw, n, wc$n1, wc$S0, zero.policy)$C - res[nsim+1] <- geary(x, listw, n, wc$n1, wc$S0, zero.policy)$C + res[nsim+1] <- geary(as.vector(x), listw, n, wc$n1, wc$S0, zero.policy)$C rankres <- rank(res) xrank <- rankres[length(res)] diff <- nsim - xrank @@ -153,12 +176,13 @@ geary.mc <- function(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), parameter <- xrank names(parameter) <- "observed rank" method <- "Monte-Carlo simulation of Geary C" - data.name <- paste(deparse(substitute(x)), "\nweights:", - deparse(substitute(listw)), "\nnumber of simulations + 1:", - nsim+1, "\n") + data.name <- paste(xname, "\nweights:", wname, + ifelse(is.null(na.act), "", paste("\nomitted:", paste(na.act, + collapse=", "))), "\nnumber of simulations + 1:", nsim+1, "\n") lres <- list(statistic=statistic, parameter=parameter, p.value=pval, alternative=alternative, method=method, data.name=data.name, res=res) + if (!is.null(na.act)) attr(res, "na.action") <- na.act class(lres) <- c("htest", "mc.sim") lres } diff --git a/R/globalG.R b/R/globalG.R index 7286b8f0..7fc442f5 100644 --- a/R/globalG.R +++ b/R/globalG.R @@ -1,26 +1,34 @@ -# Copyright 2002-2018 by Hisaji ONO and Roger Bivand +# Copyright 2002-2024 by Hisaji ONO and Roger Bivand # # General G Statistics # # globalG.test <- function(x, listw, zero.policy=attr(listw, "zero.policy"), - alternative="greater", spChk=NULL, adjust.n=TRUE, B1correct=TRUE, adjust.x=TRUE, Arc_all_x=FALSE) { + alternative="greater", spChk=NULL, adjust.n=TRUE, B1correct=TRUE, + adjust.x=TRUE, Arc_all_x=FALSE, na.action=na.fail) { if (is.null(zero.policy)) zero.policy <- get("zeroPolicy", envir = .spdepOptions) stopifnot(is.logical(zero.policy)) stopifnot(is.vector(x)) alternative <- match.arg(alternative, c("greater", "less", "two.sided")) - if (!inherits(listw, "listw")) - stop(paste(deparse(substitute(listw)), "is not a listw object")) + wname <- deparse(substitute(listw)) + if (!inherits(listw, "listw")) stop(wname, "is not a listw object") if (is.na(match(listw$style, c("B", "C", "U")))) warning("Binary weights recommended (especially for distance bands)") - if (!is.numeric(x)) - stop(paste(deparse(substitute(x)), "is not a numeric vector")) - if (any(is.na(x))) stop(paste("NA in ", deparse(substitute(x)))) - if (any(x < 0.0)) - stop(paste("Negative value in ", deparse(substitute(x)))) + xname <- deparse(substitute(x)) + if(!is.numeric(x)) stop(xname, " is not a numeric vector") + if (deparse(substitute(na.action)) == "na.pass") + stop("na.pass not permitted") + x <- na.action(x) + na.act <- attr(x, "na.action") + if (!is.null(na.act)) { + subset <- !(1:length(listw$neighbours) %in% na.act) + listw <- subset(listw, subset, zero.policy=zero.policy) + } + if (any(x < 0.0)) stop("Negative value in ", xname) + n <- length(listw$neighbours) - if (n != length(x))stop("Different numbers of observations") + if (n != length(x)) stop("Different numbers of observations") if (is.null(spChk)) spChk <- get.spChkOption() if (spChk && !chkIDs(x, listw)) stop("Check of data and weights ID integrity failed") @@ -80,8 +88,10 @@ globalG.test <- function(x, listw, zero.policy=attr(listw, "zero.policy"), warning("Out-of-range p-value: reconsider test arguments") vec <- c(G, E.G, var.G) names(vec) <- c("Global G statistic", "Expectation", "Variance") - data.name <- paste(deparse(substitute(x)), "\nweights:", - deparse(substitute(listw)), "\n") + data.name <- paste(xname, "\nweights:", wname, ifelse(is.null(na.act), + "", paste("\nomitted:", paste(na.act, collapse=", "))), + ifelse(adjust.n && isTRUE(any(sum(card(listw$neighbours) == 0L))), + "\nn reduced by no-neighbour observations", ""), "\n") res <- list(statistic=statistic, p.value=PrG, estimate=vec, alternative=alternative, data.name=data.name, method=method) class(res) <- "htest" diff --git a/R/moran.R b/R/moran.R index 7e07d49f..ef8c3f39 100644 --- a/R/moran.R +++ b/R/moran.R @@ -1,4 +1,4 @@ -# Copyright 2001-18 by Roger Bivand +# Copyright 2001-24 by Roger Bivand # moran <- function(x, listw, n, S0, zero.policy=attr(listw, "zero.policy"), NAOK=FALSE) { @@ -23,10 +23,10 @@ moran.test <- function(x, listw, randomisation=TRUE, zero.policy=attr(listw, "ze alternative="greater", rank = FALSE, na.action=na.fail, spChk=NULL, adjust.n=TRUE, drop.EI2=FALSE) { alternative <- match.arg(alternative, c("greater", "less", "two.sided")) - if (!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), - "is not a listw object")) - if (!is.numeric(x)) stop(paste(deparse(substitute(x)), - "is not a numeric vector")) + wname <- deparse(substitute(listw)) + if (!inherits(listw, "listw")) stop(wname, "is not a listw object") + xname <- deparse(substitute(x)) + if (!is.numeric(x)) stop(xname, " is not a numeric vector") if (is.null(zero.policy)) zero.policy <- get("zeroPolicy", envir = .spdepOptions) stopifnot(is.logical(zero.policy)) @@ -34,8 +34,6 @@ moran.test <- function(x, listw, randomisation=TRUE, zero.policy=attr(listw, "ze if (spChk && !chkIDs(x, listw)) stop("Check of data and weights ID integrity failed") # if (any(is.na(x))) stop("NA in X") - xname <- deparse(substitute(x)) - wname <- deparse(substitute(listw)) NAOK <- deparse(substitute(na.action)) == "na.pass" x <- na.action(x) na.act <- attr(x, "na.action") @@ -86,8 +84,8 @@ moran.test <- function(x, listw, randomisation=TRUE, zero.policy=attr(listw, "ze wname, ifelse(is.null(na.act), "", paste("\nomitted:", paste(na.act, collapse=", "))), ifelse(adjust.n && isTRUE(any(sum(card(listw$neighbours) == 0L))), - "n reduced by no-neighbour observations\n", ""), - ifelse(drop.EI2, "EI^2 term dropped in VI", ""), "\n") + "\nn reduced by no-neighbour observations", ""), + ifelse(drop.EI2, "\nEI^2 term dropped in VI", ""), "\n") res <- list(statistic=statistic, p.value=PrI, estimate=vec, alternative=alternative, method=method, data.name=data.name) if (!is.null(na.act)) attr(res, "na.action") <- na.act @@ -99,10 +97,10 @@ moran.mc <- function(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), alternative="greater", na.action=na.fail, spChk=NULL, return_boot=FALSE, adjust.n=TRUE) { alternative <- match.arg(alternative, c("greater", "less", "two.sided")) - if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), - "is not a listw object")) - if(!is.numeric(x)) stop(paste(deparse(substitute(x)), - "is not a numeric vector")) + wname <- deparse(substitute(listw)) + if(!inherits(listw, "listw")) stop(wname, "is not a listw object") + xname <- deparse(substitute(x)) + if(!is.numeric(x)) stop(xname, "is not a numeric vector") if (is.null(zero.policy)) zero.policy <- get("zeroPolicy", envir = .spdepOptions) stopifnot(is.logical(zero.policy)) @@ -114,8 +112,6 @@ moran.mc <- function(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), if (!zero.policy && any(cards == 0)) stop("regions with no neighbours found") # if (any(is.na(x))) stop("NA in X") - xname <- deparse(substitute(x)) - wname <- deparse(substitute(listw)) if (deparse(substitute(na.action)) == "na.pass") stop("na.pass not permitted") x <- na.action(x) @@ -123,6 +119,8 @@ moran.mc <- function(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), if (!is.null(na.act)) { subset <- !(1:length(listw$neighbours) %in% na.act) listw <- subset(listw, subset, zero.policy=zero.policy) + if (return_boot) + message("NA observations omitted: ", paste(na.act, collapse=", ")) } n <- length(listw$neighbours) if (n != length(x)) stop("objects of different length") diff --git a/man/geary.mc.Rd b/man/geary.mc.Rd index aaeb9f7b..477e3af8 100644 --- a/man/geary.mc.Rd +++ b/man/geary.mc.Rd @@ -7,7 +7,7 @@ } \usage{ geary.mc(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), alternative="greater", - spChk=NULL, adjust.n=TRUE, return_boot=FALSE) + spChk=NULL, adjust.n=TRUE, return_boot=FALSE, na.action=na.fail) } \arguments{ \item{x}{a numeric vector the same length as the neighbours list in listw} @@ -18,7 +18,7 @@ geary.mc(x, listw, nsim, zero.policy=attr(listw, "zero.policy"), alternative="gr \item{spChk}{should the data vector names be checked against the spatial objects for identity integrity, TRUE, or FALSE, default NULL to use \code{get.spChkOption()}} \item{adjust.n}{default TRUE, if FALSE the number of observations is not adjusted for no-neighbour observations, if TRUE, the number of observations is adjusted} \item{return_boot}{return an object of class \code{boot} from the equivalent permutation bootstrap rather than an object of class \code{htest}} -} + \item{na.action}{a function (default \code{na.fail}), can also be \code{na.omit} or \code{na.exclude} - in these cases the weights list will be subsetted to remove NAs in the data. It may be necessary to set zero.policy to TRUE because this subsetting may create no-neighbour observations. Note that only weights lists created without using the glist argument to \code{nb2listw} may be subsetted. \code{na.pass} is not permitted because it is meaningless in a permutation test.}} \value{ A list with class \code{htest} and \code{mc.sim} containing the following components: @@ -37,6 +37,7 @@ A list with class \code{htest} and \code{mc.sim} containing the following compon \examples{ data(oldcol) +set.seed(1) sim1 <- geary.mc(COL.OLD$CRIME, nb2listw(COL.nb, style="W"), nsim=99, alternative="less") sim1 @@ -52,5 +53,18 @@ sim3 <- geary.mc(COL.OLD$CRIME, nb2listw(colold.lags[[3]], style="W"), nsim=99) sim3 summary(sim3$res) +crime <- COL.OLD$CRIME +is.na(crime) <- sample(1:length(crime), 10) +try(geary.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, + na.action=na.fail)) +geary.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + na.action=na.omit) +geary.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + return_boot=TRUE, na.action=na.omit) +geary.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + na.action=na.exclude) +geary.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + return_boot=TRUE, na.action=na.exclude) +try(geary.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, na.action=na.pass)) } \keyword{spatial} diff --git a/man/geary.test.Rd b/man/geary.test.Rd index 79639384..8bb818dc 100644 --- a/man/geary.test.Rd +++ b/man/geary.test.Rd @@ -7,7 +7,7 @@ } \usage{ geary.test(x, listw, randomisation=TRUE, zero.policy=attr(listw, "zero.policy"), - alternative="greater", spChk=NULL, adjust.n=TRUE) + alternative="greater", spChk=NULL, adjust.n=TRUE, na.action=na.fail) } \arguments{ @@ -18,6 +18,7 @@ geary.test(x, listw, randomisation=TRUE, zero.policy=attr(listw, "zero.policy"), \item{alternative}{a character string specifying the alternative hypothesis, must be one of "greater" (default), "less" or "two.sided".} \item{spChk}{should the data vector names be checked against the spatial objects for identity integrity, TRUE, or FALSE, default NULL to use \code{get.spChkOption()}} \item{adjust.n}{default TRUE, if FALSE the number of observations is not adjusted for no-neighbour observations, if TRUE, the number of observations is adjusted} + \item{na.action}{a function (default \code{na.fail}), can also be \code{na.omit} or \code{na.exclude} - in these cases the weights list will be subsetted to remove NAs in the data. It may be necessary to set zero.policy to TRUE because this subsetting may create no-neighbour observations. Note that only weights lists created without using the glist argument to \code{nb2listw} may be subsetted. \code{na.pass} is not permitted.} } \value{ @@ -62,5 +63,13 @@ geary.test(COL.OLD$CRIME, listw2U(nb2listw(COL.k4.nb, style="W"))) geary.test(COL.OLD$CRIME, listw2U(nb2listw(COL.k4.nb, style="W")), randomisation=FALSE) +crime <- COL.OLD$CRIME +is.na(crime) <- sample(1:length(crime), 10) +try(geary.test(crime, nb2listw(COL.nb, style="W"), na.action=na.fail)) +geary.test(crime, nb2listw(COL.nb, style="W"), zero.policy=TRUE, + na.action=na.omit) +geary.test(crime, nb2listw(COL.nb, style="W"), zero.policy=TRUE, + na.action=na.exclude) +try(geary.test(crime, nb2listw(COL.nb, style="W"), na.action=na.pass)) } \keyword{spatial} diff --git a/man/globalG.test.Rd b/man/globalG.test.Rd index ee484cf4..67cc6f5d 100644 --- a/man/globalG.test.Rd +++ b/man/globalG.test.Rd @@ -7,7 +7,8 @@ The global G statistic for spatial autocorrelation, complementing the local Gi L } \usage{ globalG.test(x, listw, zero.policy=attr(listw, "zero.policy"), alternative="greater", - spChk=NULL, adjust.n=TRUE, B1correct=TRUE, adjust.x=TRUE, Arc_all_x=FALSE) + spChk=NULL, adjust.n=TRUE, B1correct=TRUE, adjust.x=TRUE, Arc_all_x=FALSE, + na.action=na.fail) } \arguments{ \item{x}{a numeric vector the same length as the neighbours list in listw} @@ -19,6 +20,7 @@ globalG.test(x, listw, zero.policy=attr(listw, "zero.policy"), alternative="grea \item{B1correct}{default TRUE, if TRUE, the erratum referenced below: "On page 195, the coefficient of W2 in B1, (just below center of the page) should be 6, not 3." is applied; if FALSE, 3 is used (as in CrimeStat IV)} \item{adjust.x}{default TRUE, if TRUE, x values of observations with no neighbours are omitted in the denominator of G} \item{Arc_all_x}{default FALSE, if Arc_all_x=TRUE and adjust.x=TRUE, use the full x vector in part of the denominator term for G} + \item{na.action}{a function (default \code{na.fail}), can also be \code{na.omit} or \code{na.exclude} - in these cases the weights list will be subsetted to remove NAs in the data. It may be necessary to set zero.policy to TRUE because this subsetting may create no-neighbour observations. Note that only weights lists created without using the glist argument to \code{nb2listw} may be subsetted. \code{na.pass} is not permitted.} } \value{ @@ -59,5 +61,16 @@ for (i in 1:ndists) { ZG[[i]] <- globalG.test(sidsrate79, thislw, zero.policy=TRUE, alternative="two.sided") } t(sapply(ZG, function(x) c(x$estimate[1], x$statistic, p.value=unname(x$p.value)))) +data(oldcol) +crime <- COL.OLD$CRIME +is.na(crime) <- sample(1:length(crime), 10) +res <- try(globalG.test(crime, nb2listw(COL.nb, style="B"), + na.action=na.fail)) +res +globalG.test(crime, nb2listw(COL.nb, style="B"), zero.policy=TRUE, + na.action=na.omit) +globalG.test(crime, nb2listw(COL.nb, style="B"), zero.policy=TRUE, + na.action=na.exclude) +try(globalG.test(crime, nb2listw(COL.nb, style="B"), na.action=na.pass)) } \keyword{spatial} diff --git a/man/moran.mc.Rd b/man/moran.mc.Rd index 31851a5d..7735961b 100644 --- a/man/moran.mc.Rd +++ b/man/moran.mc.Rd @@ -56,5 +56,18 @@ summary(sim2$res[1:nsim]) sim3 <- moran.mc(COL.OLD$CRIME, nb2listw(colold.lags[[3]], style="W"), nsim=nsim) summary(sim3$res[1:nsim]) +crime <- COL.OLD$CRIME +is.na(crime) <- sample(1:length(crime), 10) +try(moran.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, + na.action=na.fail)) +moran.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + na.action=na.omit) +moran.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + return_boot=TRUE, na.action=na.omit) +moran.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + na.action=na.exclude) +moran.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, zero.policy=TRUE, + return_boot=TRUE, na.action=na.exclude) +try(moran.mc(crime, nb2listw(COL.nb, style="W"), nsim=99, na.action=na.pass)) } \keyword{spatial}