-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathENMTools_removecol.r
75 lines (75 loc) · 2.68 KB
/
ENMTools_removecol.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
removeCollinearity.terra <- function (raster.stack, multicollinearity.cutoff = 0.7, select.variables = FALSE,
sample.points = FALSE, nb.points = 10000, plot = FALSE, method = "pearson")
{
if (sample.points) {
if (!is.numeric(nb.points)) {
stop("nb.points must be a numeric value corresponding to the number of pixels to sample from raster.stack")
}
env.df <- sampleRandom(raster.stack, size = nb.points,
na.rm = TRUE)
}
else {
env.df <- readValues(raster.stack)
if (any(is.na(env.df))) {
env.df <- env.df[-unique(which(is.na(env.df), arr.ind = T)[,
1]), ]
}
}
if (!is.numeric(multicollinearity.cutoff)) {
stop("You must provide a numeric cutoff between 0 and 1 in multicollinearity.cutoff")
}
else if (multicollinearity.cutoff > 1 | multicollinearity.cutoff <
0) {
stop("You must provide a numeric cutoff between 0 and 1 in multicollinearity.cutoff")
}
cor.matrix <- matrix(data = 0, nrow = nlayers(raster.stack),
ncol = nlayers(raster.stack), dimnames = list(names(raster.stack),
names(raster.stack)))
cor.matrix <- 1 - abs(stats::cor(env.df, method = method))
dist.matrix <- stats::as.dist(cor.matrix)
ahc <- stats::hclust(dist.matrix, method = "complete")
groups <- stats::cutree(ahc, h = 1 - multicollinearity.cutoff)
if (length(groups) == max(groups)) {
message(paste(" - No multicollinearity detected in your data at threshold ",
multicollinearity.cutoff, "\n", sep = ""))
mc <- FALSE
}
else {
mc <- TRUE
}
}
if (plot) {
op <- par(no.readonly = TRUE)
graphics::par(mar = c(5.1, 5.1, 4.1, 3.1))
plot(ahc, hang = -1, xlab = "", ylab = "Distance (1 - Pearson's r)",
main = "", las = 1, sub = "", axes = F)
graphics::axis(2, at = seq(0, 1, length = 6), las = 1)
if (mc) {
graphics::title(paste("Groups of intercorrelated variables at cutoff",
multicollinearity.cutoff))
par(xpd = T)
rect.hclust(ahc, h = 1 - multicollinearity.cutoff)
}
else {
graphics::title(paste("No intercorrelation among variables at cutoff",
multicollinearity.cutoff))
}
par(op)
}
if (select.variables) {
sel.vars <- NULL
for (i in 1:max(groups)) {
sel.vars <- c(sel.vars, sample(names(groups[groups ==
i]), 1))
}
}
else {
if (mc) {
sel.vars <- list()
for (i in groups) {
sel.vars[[i]] <- names(groups)[groups == i]
}
}
else {
sel.vars <- names(raster.stack)
}