-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path5_bivariate_model_interpretation.R
266 lines (214 loc) · 17.1 KB
/
5_bivariate_model_interpretation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
########################### Opening packages
library(tidyverse) ## Version ‘2.0.0’
library(purrr) ## Version ‘1.0.1’
library(patchwork) ## Version ‘1.2.0.9000’
library(landscapemetrics) ## Version ‘2.1.4’
source("03_Analysis/modelling_adults/functions.R") #### Need of functiosn which allow the interpretation of
###########################
#########'Interpretation of the GLMMS results for the presence and abundance
###########################
#### Open results of GLMMs
glmm_univ_presence <- read.csv('02_Data/processed_data/glmm_univ_presence_PT_GDD_POL_R2.csv', stringsAsFactors = F)
glmm_univ_abundance <- read.csv('02_Data/processed_data/glmm_univ_abundance_PT_GDD_POL_R2.csv', stringsAsFactors = F)
#### Adding the indicator
glmm_univ_presence$indicator <- 'presence'
glmm_univ_abundance$indicator <- 'abundance'
###########################
#########'Macro-meteorological data preceding collection (CMM)
###########################
#### Meteorological data from the ODEE
univ_glmm_temporal_ode <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>% ## to put together presence and abundance models
filter(grepl("RFDode|TMINode|TMAXode|TMNode|GDDjour",term)) %>% ## to select variables of interest: Minimum temperature, maximum temperayure, average temperature, GDD and Rainfall
filter(!grepl("collection|prec",term)) %>% ## to not select micro climatic data
mutate(var = sub('\\_.*', '', term)) %>%
mutate(label = case_when(var == "RFDode" ~ "Rainfall",
var == "TMINode" ~ "Minimum temperature",
var == "TMAXode" ~ "Maximum temperature",
var == "TMNode" ~ "Average temperature",
var == "GDDjour" ~ "GDD daily")) %>%
mutate(time_lag_1 = as.numeric(sub('.*\\_', '', term)), time_lag_2 = as.numeric(stringr::str_match( term, '([^_]+)(?:_[^_]+){1}$')[,2])) %>% ## adding the time lag values
arrange(var, indicator, time_lag_1, time_lag_2) %>%
rename(correlation = estimate) %>%
mutate(correlation = ifelse(p.value<=0.2,correlation,NA)) %>% ## to put NA if p-value not significant (p-value >0.2)
mutate(r2 = ifelse(p.value<=0.2,r2,NA)) %>% ## to put NA if p-value not significant (p-value >0.2)
nest(-c(indicator,var))
plots_univ_glmm_temporal_ode <- univ_glmm_temporal_ode %>% ## to use the function to put in CCMs format
arrange(rev(indicator),factor(var, levels = c("RFDode","TMNode","TMINode","TMAXode","TAMPode","GDDjour"))) %>%
mutate(univ_temporal = pmap(list(data,indicator), ~fun_ccm_plot_r2(correlation_df = ..1, var = ..1$label[1], time_frame = 1, indicator = ..2, metric_name = "glmm"))) %>%
nest(-indicator) %>%
mutate(univ_temporal = map(data, ~wrap_plots(.x$univ_temporal, nrow = 1, ncol = 8,guides = "collect"))) %>%
dplyr::select(-data)
p_meteo_ode <- wrap_plots(plots_univ_glmm_temporal_ode$univ_temporal[1][[1]],plots_univ_glmm_temporal_ode$univ_temporal[2][[1]], ncol = 1, nrow = 2, tag_level = "new") ## to put together the different plots
ggsave(filename = "02_Data/processed_data/plots/modelling_adults_abundance/cross_correlation_maps_ode.pdf",plot = p_meteo_ode, device = "pdf",width = 11, height = 8) ## to save the plot
#### Meteorological data from Meteo france
univ_glmm_temporal_mf <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>% ## to put together presence and abundance models
filter(grepl("RFDmf|TMINmf|TMAXmf|TMNmf|RHmf|WINDmf",term)) %>% ## to select variables of interest: Minimum temperature, maximum temperayure, average temperature, rainfall, wind speed and relative humidity
filter(!grepl("collection|prec",term)) %>% # to not select micro climatic data
mutate(var = sub('\\_.*', '', term)) %>%
mutate(label = case_when(var == "RFDmf" ~ "Rainfall",
var == "TMINmf" ~ "Minimum temperature",
var == "TMAXmf" ~ "Maximum temperature",
var == "TMNmf" ~ "Average temperature",
var == "RHmf" ~ "Relative humidity",
var == "WINDmf" ~ "Wind speed")) %>%
mutate(time_lag_1 = as.numeric(sub('.*\\_', '', term)), time_lag_2 = as.numeric(stringr::str_match( term, '([^_]+)(?:_[^_]+){1}$')[,2])) %>% ## adding the time lag values
arrange(var, indicator, time_lag_1, time_lag_2) %>%
rename(correlation = estimate) %>%
mutate(correlation = ifelse(p.value<=0.2,correlation,NA)) %>% ## to put NA if p-value not significant (p-value >0.2)
mutate(r2 = ifelse(p.value<=0.2,r2,NA)) %>% ## to put NA if p-value not significant (p-value >0.2)
nest(-c(indicator,var))
plots_univ_glmm_temporal_mf <- univ_glmm_temporal_mf %>% ## to use the function to put in CCMs format
arrange(rev(indicator),factor(var, levels = c("RFDmf","TMNmf","TMINmf","TMAXmf","RHmf","WINDmf"))) %>%
mutate(univ_temporal = pmap(list(data,indicator), ~fun_ccm_plot_r2(correlation_df = ..1, var = ..1$label[1], time_frame = 1, indicator = ..2, metric_name = "glmm"))) %>%
nest(-indicator) %>%
mutate(univ_temporal = map(data, ~wrap_plots(.x$univ_temporal, nrow = 1, ncol = 7,guides = "collect"))) %>%
dplyr::select(-data)
p_meteo_mf <- wrap_plots(plots_univ_glmm_temporal_mf$univ_temporal[1][[1]],plots_univ_glmm_temporal_mf$univ_temporal[2][[1]], ncol = 1, nrow = 2, tag_level = "new") ## to put together the different plots
ggsave(filename = "02_Data/processed_data/plots/modelling_adults_abundance/cross_correlation_maps_meteofrance.pdf",plot = p_meteo_mf, device = "pdf", width = 11, height = 8) ## to save the plot
###########################
#########'Air quality data preceding collection (CMM)
###########################
univ_glmm_temporal_pol <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>% ## putting together presence and abundance models
filter(grepl("NO|NOX|NO2|PM2.5|PM10|O3",term)) %>% ## to select pollluants
filter(!grepl("collection|prec",term)) %>%
mutate(var = sub('\\_.*', '', term)) %>%
mutate(label = case_when(var == "NO" ~ "Nitrogen monoxide",
var == "NO2" ~ "Nitrogen dioxide",
var == "NOX" ~ "Nitrogen oxides",
var=="PM2.5"~"Particule matter of 2.5",
var == "PM10" ~ "Particule matter of 10",
var=="O3"~"tropospheric ozone")) %>%
mutate(time_lag_1 = as.numeric(sub('.*\\_', '', term)), time_lag_2 = as.numeric(stringr::str_match( term, '([^_]+)(?:_[^_]+){1}$')[,2])) %>%
arrange(var, indicator, time_lag_1, time_lag_2) %>%
rename(correlation = estimate) %>%
mutate(correlation = ifelse(p.value<=0.2,correlation,NA)) %>% ## to select significative correlations with p-value <)=0
mutate(r2 = ifelse(p.value<=0.2,r2,NA)) %>% ## to select significative correlations with p-value <)=0
nest(-c(indicator,var))
plots_univ_glmm_temporal_pol <- univ_glmm_temporal_pol %>% ## to use the functions of CCM
arrange(rev(indicator),factor(var, levels = c("NO","NO2","NOX", "PM2.5", "PM10", "O3"))) %>%
mutate(univ_temporal = pmap(list(data,indicator), ~fun_ccm_plot_r2(correlation_df = ..1, var = ..1$label[1], time_frame = 1, indicator = ..2, metric_name = "glmm"))) %>%
nest(-indicator) %>%
mutate(univ_temporal = map(data, ~wrap_plots(.x$univ_temporal, nrow = 1, ncol = 7,guides = "collect"))) %>%
#mutate(univ_temporal = pmap(list(univ_temporal,species,country,indicator), ~..1 + plot_annotation(title = paste(..2,..3,..4, sep = " - "), subtitle = paste("CCM using spearman coefficient"), caption = "Only significant results (pval <= 0.05) are displayed (non-signficant results are greyed out)"))) %>%
dplyr::select(-data)
p_poll <- wrap_plots(plots_univ_glmm_temporal_pol$univ_temporal[1][[1]],plots_univ_glmm_temporal_pol$univ_temporal[2][[1]], ncol = 1, nrow = 2, tag_level = "new")
ggsave(filename = "02_Data/processed_data/plots/modelling_adults_abundance/cross_correlation_maps_polluants.pdf",plot = p_poll, device = "pdf",width = 11, height = 8) ## to save
###########################
#########'Micro climatic data preceding collection
###########################
univ_glmm_microclim <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>% ## To put together presence and abundance models
filter(grepl("RFSUM|RFMIN|RFMAX|TMEAN|TMIN|TMAX|TAMP|RHMEAN|RHMIN|RHMAX",term)) %>% ## To select the variables of interest: relative humidity, temperature, and presence of rainfall
filter(grepl("collection|prec",term)) %>%
mutate(type = sub('\\_.*', '', term)) %>%
mutate(type = gsub("RFSUM|RFMAX|RFMIN","Rainfall",type)) %>% ## to add a type for variables: variables related to rainfall
mutate(type = gsub("TMEAN|TMIN|TMAX|TAMP","Temperature",type)) %>%## to add a type for variables: variables related to temperature
mutate(type = gsub("RHMEAN|RHMIN|RHMAX","Humidity",type)) %>% ## to add a type for variables: variables related to relative humidity
mutate(buffer = case_when(grepl("collection",term) ~ "during coll.", ## to name different time lags
grepl("24h",term) ~ "24h",
grepl("48h",term) ~ "48h",
grepl("1s",term) ~ "1 week",
grepl("2s",term) ~ "2 week",
grepl("3s",term) ~ "3 week",
grepl("4s",term) ~ "4 week",
grepl("5s",term) ~ "5 week",
grepl("6s",term) ~ "6 week")) %>%
mutate(term = gsub("collection","during collection",term)) %>%
mutate(term = gsub("24hprec|24h_prec","24h preceding collection",term)) %>%
mutate(term = gsub("48hprec|48h_prec","48h preceding collection",term)) %>%
mutate(term = gsub("1s_prec|1sprec","the week preceding collection",term)) %>%
mutate(term = gsub("2s_prec|2sprec","b/w 0 and 2 weeks preceding collection",term)) %>%
mutate(term = gsub("3s_prec|3sprec","b/w 0 and 3 weeks preceding collection",term)) %>%
mutate(term = gsub("4s_prec|4sprec","b/w 0 and 4 weeks preceding collection",term)) %>%
mutate(term = gsub("5s_prec|5sprec","b/w 0 and 5 weeks preceding collection",term)) %>%
mutate(term = gsub("6s_prec|6sprec","b/w 0 and 6 weeks preceding collection",term)) %>%
mutate(label = case_when(grepl("MAX",term) ~ "Maximum",
grepl("MIN",term) ~ "Minimum",
grepl("AMP",term) ~ "Amplitude",
grepl("MEAN|SUM",term) ~ "Average")) %>%
mutate(term = gsub("RFSUM","Cumulative/average",term)) %>%
mutate(term = gsub("RFMAX","Maximum",term)) %>%
mutate(term = gsub("RFMIN","Minimum",term)) %>%
mutate(term = gsub("TMEAN","Cumulative/average",term)) %>%
mutate(term = gsub("TMIN","Minimum",term)) %>%
mutate(term = gsub("TMAX","Maximum",term)) %>%
mutate(term = gsub("TAMP","Amplitude",term)) %>%
mutate(term = gsub("RHMEAN","Cumulative/average",term)) %>%
mutate(term = gsub("RHMIN","Minimum",term)) %>%
mutate(term = gsub("RHMAX","Maximum",term)) %>%
mutate(term = gsub("_"," ",term)) %>%
mutate(correlation = ifelse(p.value<=0.2,estimate,NA)) %>% ## to put NA if p-value > 0.2
mutate(r2 = ifelse(p.value<=0.2,r2,NA)) %>% ## to put NA if p-value > 0.2
mutate(indicator = forcats::fct_relevel(indicator, c("presence","abundance"))) %>%
mutate(label = forcats::fct_relevel(label, c("Minimum","Maximum","Average","Amplitude"))) %>%
mutate(buffer = forcats::fct_relevel(buffer, c("during coll.","24h","48h","1 week","2 week","3 week","4 week","5 week","6 week")))
p_microclim <- fun_plot_tile_univ_spatial_r2(univ_glmm_microclim, metric_name = "glmm", indicator = univ_glmm_microclim$term, lc_source = "Microclimatic conditions", type = "", xlabel = "time before collection") ## to use the function to allow graphical interpretation
ggsave(filename = "02_Data/processed_data/plots/modelling_adults_abundance/microclim.pdf",plot = p_microclim, device = "pdf", width = 11, height = 8) ## to save
###########################
#########'landscape metrics with different buffers
###########################
landcover_grouped_veget_data_dict <- read.csv("02_Data/processed_data/03_Landcover_Data/landcover_grouped_veget_data_dic.csv", stringsAsFactors = F) %>% mutate(lc_source = "LCG")
data_dic <- read.csv("02_Data/processed_data/01_Adults_Abundance/data_dictionary.csv", stringsAsFactors = F)
lsm_data_dic <- landcover_grouped_veget_data_dict ## to create data dictionnary for landscape metrics to interprate different class
mutate(class = as.character(class))
univ_glmm_lsm <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>% ## to put together presence and abundance models
filter(grepl("lsm_c_pland|lsm_l_shdi",term)) %>% ## to select the variables related to the percentage of every class and the complewxity of the landscape
mutate(function_name = sub("\\_L.*", "", term), lc_source = word(term, -3, sep = "_"), buffer = word(term, -2, sep = "_"), class = word(term, -1, sep = "_")) %>%
mutate(buffer = forcats::fct_relevel(buffer, c("0","20","50","100","250"))) %>% ## different buffer : 20 m, 50 m, 100 and 250 m
left_join(list_lsm()) %>%
left_join(lsm_data_dic) %>% ## to join the landscape metrics interpretation
rename(correlation = estimate) %>%
mutate(label = ifelse(level=="landscape","landscape",label)) %>%
mutate(label = paste0(label, " - ", metric," - ",name)) %>%
nest(-lc_source)
plots_univ_glmm_spatial <- univ_glmm_lsm %>%
mutate(univ_spatial = pmap(list(data,lc_source), ~fun_plot_tile_univ_spatial_r2(correlation_df = ..1, metric_name = "glmm", indicator = ..1$indicator, lc_source = ..2, type = "", xlabel = "buffer radius around the collection site (meters)"))) %>% ## to use the function to interprate graphically the GLMMs
dplyr::select(-data)
pmap(list(plots_univ_glmm_spatial$lc_source,plots_univ_glmm_spatial$univ_spatial),~ggsave(filename = paste0("02_Data/processed_data/plots/modelling_adults_abundance/",..1,".pdf"),plot = ..2, device = "pdf", width = 7,height = 14)) ## to save
###########################
#########'Socio economics data
###########################
fil <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>% ## to put together presence and abundance models
filter(grepl("FIL",term)) %>% ## to select socio economics variables
mutate(indicator = forcats::fct_relevel(indicator, c("presence","abundance"))) %>%
mutate(estimate = ifelse(indicator=="abundance",estimate,estimate-1)) %>%
mutate(r2_modif=dplyr::case_when(indicator=='presence'& abs(estimate)>=1~r2, indicator=='presence'& abs(estimate)<1~r2*(-1), ## for presence /abundance model: it highlights the direction of the correlation and adapts the marginal r2: if correlation is negative, r2 is going to be negative also.
indicator=="abundance"&estimate>=0~r2, indicator=="abundance"&estimate<0~r2*(-1)))%>%
filter(p.value<0.2) ## to select only variables significtaives (<0.2)
p2 <- ggplot(fil, aes(y = term, x = r2_modif)) + ## to represent graphically
geom_point() +
facet_wrap(.~indicator) +
ggtitle("FILOSOFI")
ggsave(filename = "02_Data/processed_data/plots/modelling_adults_abundance/filosofi.jpg",plot = p2, device = "jpg") ## to save
###########################
#########'Other spatial variables, like population density
###########################
univ_glmm_other_spat <- glmm_univ_presence %>%
bind_rows(glmm_univ_abundance) %>%## to put together presence and abundance models
filter(!grepl("lsm|RFD|TMIN|TMAX|TMN|TAMP|RH|WIND|RFSUM|RFMIN|RFMAX|TMEAN|TMIN|TMAX|RHMEAN|RHMIN|RHMAX|GDD|NO|O3|PM",term)) %>% ## to select every variables which are not pollutant, land cover, meteorological, mciro climate and socio economics
mutate(buffer = ifelse(!grepl("BATH|BATS", term), word(term, 2, sep = "_"), word(term, 3, sep = "_"))) %>%
mutate( var = word(term, 1, sep = "_")) %>%
filter(!grepl("LCZ",term)) %>%
filter(!grepl("nearest",buffer)) %>%
filter(var!="FIL") %>%
mutate(buffer = ifelse(is.na(buffer), 0, buffer)) %>% ## to indicate if a buffer is used
mutate(buffer = forcats::fct_relevel(buffer, c("0","20","50","100","250"))) %>%
left_join(data_dic) %>%
rename(correlation = estimate) %>%
mutate(type = "other spatial variables") %>%
mutate(fun_summarize = case_when(grepl("mean|mn",term) ~ "mean" ,
grepl("sd|stdev",term) ~ "sd",
grepl("nb",term) ~ "nb",
grepl("min",term) ~ "min",
grepl("max",term) ~ "max",
grepl("sum_vol|sum", term) ~ "sum",
TRUE ~ "")
) %>%
mutate(label = paste0(label," - ",fun_summarize))
p <- fun_plot_tile_univ_spatial_r2(correlation_df = univ_glmm_other_spat, metric_name = "glmm", indicator = univ_glmm_other_spat$indicator, lc_source = "Other spatial variables", type = "", xlabel = "buffer radius around the collection site (meters)") ## to use the function to interprate graphically the GLMMs
ggsave(filename = "02_Data/processed_data/plots/modelling_adults_abundance/other_spatial_var.jpg",plot = p, device = "jpg", width = 7,height = 14) ## To save