-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathibt-sem.Rmd
521 lines (440 loc) · 16.3 KB
/
ibt-sem.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
---
title: "Nutrient pathways SEM analysis"
author: "Debora Obrist"
date: "14/10/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## SEM Analysis:
The purpose of this analysis is to answer these questions:
1.) What are the dominant pathways by which nutrients travel from the sea into upper-level trophic consumers in island ecosystems?
and
2.) How far up the food chain do they travel?
First, load required packages and data:
```{r, warning = FALSE, message = FALSE}
library(tidyverse)
library(lavaan)
library(semTools)
library(mice)
set.seed(123)
dat <- read.csv("ibt-sem-data-2021.csv")
dat_d15n <- dat %>%
dplyr::select(island, l.area.std, sqrt.wrack.std, soil.d15n, SAL.d15n, FLV.d15n, CUR.d15n, ISO.d15n, COL.d15n, feces.d15n)
dat_d13c <- dat %>%
dplyr::select(island, l.area.std, sqrt.wrack.std, soil.d13c, SAL.d13c, FLV.d13c, CUR.d13c, ISO.d13c, COL.d13c, feces.d13c)
```
This was my first attempt at the global model.
```{r}
# Starting global model:
global_1 = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ SAL.d15n + FLV.d15n
ISO.d15n ~ soil.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
SAL.d15n ~ soil.d15n
FLV.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
'
global_sem_1 <- runMI(global_1,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
fitMeasures(global_sem_1,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
These diagnostics are not great. The chi-square value should be < 2x the degrees of freedom, and the p-value should not be significant.
The Comparative Fit Index (cfi) should ideally be > 0.95.
The Root Mean Square Error of Approximation (rmsea) should be < 0.08 or < 0.1 depending on the reference.
The Standardized Root Mean Residual (srmr) should also be < 0.08.
Attempt #1 Diagnostics:
chi-sq: 75.376
df: 10.000
cfi: 0.853
rmsea: 0.260
srmr: 0.073
I used the following to look at the modification indeces. These should be looked at very critically because it involves looking at the data in order to better fit the model to said data. It can get you to fit associations that make sense numerically but no sense at all biologically.
```{r}
plyr::arrange(modificationIndices.mi(global_sem_1), mi, decreasing = TRUE)
```
I decided to add in an error correlation between FLV.d15n and SAL.d15n. It seems reasonable that the error of the two plants' isotopes could covary in a way I haven't accounted for (soil moisture, slope, exposure, etc).
I refit this:
```{r}
global_2 = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ SAL.d15n + FLV.d15n
ISO.d15n ~ soil.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
SAL.d15n ~ soil.d15n
FLV.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
FLV.d15n ~~ SAL.d15n
'
global_sem_2 <- runMI(global_2,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
fitMeasures(global_sem_2,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
Attempt #2 Diagnostics:
chi-sq: 49.246
df: 9.000
cfi: 0.909
rmsea: 0.215
srmr: 0.061
Slightly better, but not great. I went back to the modification indeces:
```{r}
plyr::arrange(modificationIndices.mi(global_sem_2), mi, decreasing = TRUE)
```
and I added an error correlation between CUR and soil, since the herbivorous weevils we were surveying likely were not eating the plants that we had surveyed. They likely eat hemlock seedlings. Maybe hemlock seedlings more closely reflects the nutrients in the soil than the plants we had sampled.
This gives:
```{r}
global_3 = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ SAL.d15n + FLV.d15n
ISO.d15n ~ soil.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
SAL.d15n ~ soil.d15n
FLV.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
FLV.d15n ~~ SAL.d15n
CUR.d15n ~~ soil.d15n
'
global_sem_3 <- runMI(global_3,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
fitMeasures(global_sem_3,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
This is not bad.
Attempt #3 Diagnostics:
Chi-sq: 33.190
df: 8.000
cfi: 0.943
rmsea: 0.180
srmr: 0.066
So close! The p-value for the chi-sq is still significant, and the rmsea is still too high.
```{r}
plyr::arrange(modificationIndices.mi(global_sem_3), mi, decreasing = TRUE)
```
Okay, attempt #4!
I am now adding in a correlation between COL.d15n and ISO.d15n because ISO could be eating wrack and COL could be eating amphipods on the wrack but sqrt.wrack.std might not be giving an accurate representation of the presence of wrack over time. The samples were not taken at the same time as the wrack was measured in most cases so there could be some disconnect there. ISO and COL could correlate due to some factor (through wrack deposition) that is not accounted for in this model.
```{r}
global_4 = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ SAL.d15n + FLV.d15n
ISO.d15n ~ soil.d15n + SAL.d15n + FLV.d15n + sqrt.wrack.std
SAL.d15n ~ soil.d15n
FLV.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
FLV.d15n ~~ SAL.d15n
CUR.d15n ~~ soil.d15n
COL.d15n ~~ ISO.d15n
'
global_sem_4 <- runMI(global_4,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
fitMeasures(global_sem_4,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
This looks good!
Chi sq: 9.739
df: 7.000
cfi: 0.994
rmsea: 0.064
srmr: 0.052
Check for multicollinearity.
```{r}
# Based on global_mod_4:
feces.mod <- lm(feces.d15n ~ COL.d15n +
ISO.d15n +
CUR.d15n +
SAL.d15n +
FLV.d15n +
sqrt.wrack.std,
data = dat_d15n)
car::vif(feces.mod) # VIF for SAL.d15n and FLV.d15n are greater than 5.
COL.mod <- lm(COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std, data = dat_d15n)
car::vif(COL.mod) # This is fine.
CUR.mod <- lm(CUR.d15n ~ SAL.d15n + FLV.d15n, data = dat_d15n)
car::vif(CUR.mod) # SAL and FLV are just over 5.
ISO.mod <- lm(ISO.d15n ~ soil.d15n +
SAL.d15n +
FLV.d15n +
sqrt.wrack.std, data = dat_d15n)
car::vif(ISO.mod) # soil, salal, flv all over 5.
SAL.mod <- lm(SAL.d15n ~ soil.d15n, data = dat_d15n)
FLV.mod <- lm(FLV.d15n ~ soil.d15n, data = dat_d15n)
soil.mod <- lm(soil.d15n ~ sqrt.wrack.std, data = dat_d15n)
```
Decision: Too much multicollinearity - going to combine the two plant species into one plant parameter instead. Take an average plant d15N and plant d13C per island to represent primary producers overall.
# Restart with plants now combined:
```{r}
rm(list = ls())
library(tidyverse)
library(lavaan)
library(semTools)
library(mice)
set.seed(123)
dat <- read.csv("ibt-sem-data-2021-plants-combined.csv")
dat_d15n <- dat %>%
dplyr::select(island, l.area.std, sqrt.wrack.std, soil.d15n, veg.d15n, CUR.d15n, ISO.d15n, COL.d15n, feces.d15n)
dat_d13c <- dat %>%
dplyr::select(island, l.area.std, sqrt.wrack.std, soil.d13c, veg.d13c, CUR.d13c, ISO.d13c, COL.d13c, feces.d13c)
```
# Model - d15n:
```{r}
global_comb = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + veg.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ veg.d15n
ISO.d15n ~ soil.d15n + veg.d15n + sqrt.wrack.std
veg.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
'
global_comb_sem <- runMI(global_comb,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
```
# Check diagnostics:
Chi-sq: 47.315
df: 7.00
cfi: 0.880
rmsea: 0.244
srmr: 0.061
```{r}
fitMeasures(global_comb_sem,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
# Check multicollinearity:
```{r}
feces.mod <- lm(feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + veg.d15n + sqrt.wrack.std, data = dat_d15n)
car::vif(feces.mod) # all < 5
COL.mod <- lm(COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std, data = dat_d15n)
car::vif(COL.mod) # Good
CUR.mod <- lm(CUR.d15n ~ veg.d15n, data = dat_d15n)
ISO.mod <- lm(ISO.d15n ~ soil.d15n + veg.d15n + sqrt.wrack.std, data = dat_d15n)
car::vif(ISO.mod) # soil and veg > 5 but there is a regression fit between these two in the SEM so should be fine.
veg.mod <- lm(veg.d15n ~ soil.d15n, data = dat_d15n)
soil.mod <- lm(soil.d15n ~ sqrt.wrack.std, data = dat_d15n)
```
# Okay, check which terms are problematic for fit:
```{r}
plyr::arrange(modificationIndices.mi(global_comb_sem2), mi, decreasing = TRUE)
```
# Again adding in CUR.d15n ~~ soil.d15n:
The herbivorous weevils we were surveying likely were not eating the exact plants that we had surveyed (although apparently they do eat salal sometimes) they likely prefer to eat hemlock seedlings. Maybe hemlock seedlings more closely reflect the nutrients in the soil than the plants we had sampled.
```{r}
global_comb2 = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + veg.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ veg.d15n
ISO.d15n ~ soil.d15n + veg.d15n + sqrt.wrack.std
veg.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
CUR.d15n ~~ soil.d15n
'
global_comb_sem2 <- runMI(global_comb2,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
```
# Check diagnostics again:
Chi-sq: 25.425
df: 5.000
cfi: 0.939
rmsea: 0.205
srmr: 0.056
```{r}
fitMeasures(global_comb_sem3,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
# Okay again check which terms are problematic for fit:
```{r}
plyr::arrange(modificationIndices.mi(global_comb_sem2), mi, decreasing = TRUE)
```
# I am adding back in a correlation between COL.d15n and ISO.d15n because ISO could be eating wrack and COL could be eating amphipods on the wrack but sqrt.wrack.std might not be giving an accurate representation of the presence of wrack over time. This could cause the two to correlate in a non-causative way.
```{r}
global_comb3 = '
feces.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + veg.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ veg.d15n
ISO.d15n ~ soil.d15n + veg.d15n + sqrt.wrack.std
veg.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
CUR.d15n ~~ soil.d15n
COL.d15n ~~ ISO.d15n
'
global_comb_sem3 <- runMI(global_comb3,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
```
# Check diagnostics:
Chi-sq: 10.234
df: 5.000
cfi: 0.984
rmsea: 0.104
srmr: 0.054
```{r}
fitMeasures(global_comb_sem3,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
# Save output:
```{r}
d15n_plants_combined <- summary(global_comb_sem3, ci = FALSE, fmi = TRUE, output = "data.frame") %>%
dplyr::mutate(ci.lower = est - 1.96 * se) %>%
dplyr::mutate(ci.upper = est + 1.96 * se) %>%
dplyr::filter(!is.na(pvalue)) %>%
arrange(desc(pvalue)) %>%
mutate_if("is.numeric","round", 3)
write.csv(d15n_plants_combined, "data-generated/sem_results_d15n_combined_plants.csv", row.names = FALSE)
```
# d13C model:
I'm going to use model #3 from above as a starting point for fitting the d13c model too. I took out the connections from soil.d13c -> SAL.d13c and FLV.d13c since plants get carbon from the air, not the soil:
```{r}
global_comb_c = '
feces.d13c ~ COL.d13c + ISO.d13c + CUR.d13c + veg.d13c + sqrt.wrack.std
COL.d13c ~ ISO.d13c + CUR.d13c + sqrt.wrack.std
CUR.d13c ~ veg.d13c
ISO.d13c ~ soil.d13c + veg.d13c + sqrt.wrack.std
veg.d13c ~ soil.d13c
soil.d13c ~ sqrt.wrack.std
CUR.d13c ~~ soil.d13c
COL.d13c ~~ ISO.d13c
'
global_sem_c <- runMI(global_comb_c,
data = dat_d13c,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
```
# Diagnostics:
Attempt #1:
Chisq: 8.751
df: 5.000
cfi: 0.883 <- this isn't perfect but the other diagnostics look good.
rmsea: 0.088
srmr: 0.070
```{r}
fitMeasures(global_sem_c,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
```
Save the results:
```{r}
d13c_plants_combined <- summary(global_sem_c, ci = FALSE, fmi = TRUE, output = "data.frame") %>%
dplyr::mutate(ci.lower = est - 1.96 * se) %>%
dplyr::mutate(ci.upper = est + 1.96 * se) %>%
dplyr::filter(!is.na(pvalue)) %>%
arrange(desc(pvalue)) %>%
mutate_if("is.numeric","round",3)
write.csv(d13c_plants_combined, "data-generated/sem_results_d13c_combined_plants.csv", row.names = FALSE)
```
# Rerun with feather data:
```{r}
dat_d15n <- dat %>%
dplyr::select(island, l.area.std, sqrt.wrack.std, soil.d15n, veg.d15n, CUR.d15n, ISO.d15n, COL.d15n, feather.d15n)
dat_d13c <- dat %>%
dplyr::select(island, l.area.std, sqrt.wrack.std, soil.d13c, veg.d13c, CUR.d13c, ISO.d13c, COL.d13c, feather.d13c)
global_comb_feather = '
feather.d15n ~ COL.d15n + ISO.d15n + CUR.d15n + veg.d15n + sqrt.wrack.std
COL.d15n ~ ISO.d15n + CUR.d15n + sqrt.wrack.std
CUR.d15n ~ veg.d15n
ISO.d15n ~ soil.d15n + veg.d15n + sqrt.wrack.std
veg.d15n ~ soil.d15n
soil.d15n ~ sqrt.wrack.std
CUR.d15n ~~ soil.d15n
COL.d15n ~~ ISO.d15n
'
global_comb_feather_sem <- runMI(global_comb_feather,
data = dat_d15n,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
fitMeasures(global_comb_feather_sem,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
d15n_plants_combined_feather <- summary(global_comb_feather_sem, ci = FALSE, fmi = TRUE, output = "data.frame") %>%
dplyr::mutate(ci.lower = est - 1.96 * se) %>%
dplyr::mutate(ci.upper = est + 1.96 * se) %>%
dplyr::filter(!is.na(pvalue)) %>%
arrange(desc(pvalue)) %>%
mutate_if("is.numeric","round", 3)
write.csv(d15n_plants_combined_feather, "data-generated/sem_results_d15n_combined_plants_feather.csv", row.names = FALSE)
global_comb_c_feather = '
feather.d13c ~ COL.d13c + ISO.d13c + CUR.d13c + veg.d13c + sqrt.wrack.std
COL.d13c ~ ISO.d13c + CUR.d13c + sqrt.wrack.std
CUR.d13c ~ veg.d13c
ISO.d13c ~ soil.d13c + veg.d13c + sqrt.wrack.std
veg.d13c ~ soil.d13c
soil.d13c ~ sqrt.wrack.std
CUR.d13c ~~ soil.d13c
COL.d13c ~~ ISO.d13c
'
global_comb_c_feather_sem <- runMI(global_comb_c_feather,
data = dat_d13c,
fun = "sem",
miPackage = "mice",
seed = 100,
m = 100)
fitMeasures(global_comb_c_feather_sem,
fit.measures = "all",
baseline.model = NULL,
output = "vector",
omit.imps = c("no.conv", "no.se"))
d13c_plants_combined_feather <- summary(global_comb_c_feather_sem, ci = FALSE, fmi = TRUE, output = "data.frame") %>%
dplyr::mutate(ci.lower = est - 1.96 * se) %>%
dplyr::mutate(ci.upper = est + 1.96 * se) %>%
dplyr::filter(!is.na(pvalue)) %>%
arrange(desc(pvalue)) %>%
mutate_if("is.numeric","round", 3)
write.csv(d13c_plants_combined_feather, "data-generated/sem_results_d13c_combined_plants_feather.csv", row.names = FALSE)
```