forked from ameliaritger/mbon-shiny-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
new_data.Rmd
305 lines (257 loc) · 14 KB
/
new_data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
---
title: "Untitled"
author: "Amelia Ritger"
date: "6/15/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE)
library(tidyverse)
library(janitor)
library(sf)
library(tmap)
library(vegan)
library(gt)
```
## Organize the data
```{r}
#Read in data
reef1 <- read_csv("MBON_photo_quadrat_point_cover_20200612.csv")
```
```{r}
mpa_sites <- c("Anacapa Landing", "Arroyo Quemado", "Carpinteria", "Cathedral Cove", "Gull Island", "Isla Vista", "Naples", "West End Cat Rock")
pal <- c(
"Annelida" = "#D2691E",
"Arthropoda" = "#CDCDB4",
"Chlorophyta" = "#A2CD5A",
"Chordata" = "#FFB90F",
"Cnidaria" = "#B4CDCD",
"Echinodermata" = "#FF6347",
"Ectoprocta" = "#FF8C00",
#"Fish" = "#CD3700",
#"Heterokontophyta" = "#8B814C",
"Ochrophyta" = "#8B814C",
"Mollusca" = "#708090",
"Phoronida" = "#FAFAD2",
"Porifera" = "#EEDC82",
"Rhodophyta" = "#DB7093"
)
```
```{r}
#Tidy up the data
reef1_tidy <- reef1 %>%
clean_names() %>% #standardize names
filter(!category=="no data") %>% #remove values related to data collection issue
filter(!category=="substrate") %>% #remove substrate values
filter(!common_name=="dead") %>% #remove dead organisms
rename(value = percent_cover,
latitude = lat,
longitude = lon) %>%
group_by(location) %>% #group by location to get one lat/long value for each location
mutate(latitude=head(latitude,1),
longitude=head(longitude,1)) %>% #get one value for lat/long (because that's how that works...)
ungroup() %>% #Important!
mutate(common_name = str_replace_all(common_name, pattern="\\.", " ")) %>% #replace . in common names with a space
mutate(species_new = ifelse(is.na(species)=="TRUE", common_name, species), #replace NAs with common name
genus_new = ifelse(is.na(genus)=="TRUE", common_name, genus),
order_new = ifelse(is.na(order)=="TRUE", common_name, order),
species_new = ifelse(is.na(species_new)=="TRUE", paste(genus_new, "spp."), #if no common name, make species name "Genus spp."
ifelse(species_new==genus_new, species_new, paste(genus_new,species_new)))) %>% #for those organisms with only a common name identifier for genus and species, only use the common name in the species_new column (to avoid duplication)
mutate(mpa = ifelse(location %in% c(mpa_sites), "mpa", "unprotected"), #add column for MPA versus non-MPA sites
order_new = ifelse(str_detect(species_new, pattern = "sponge"), "Other sponges",
ifelse(str_detect(species_new, pattern = "brown blade"), "Other brown algae",
ifelse(str_detect(species_new, pattern = "tunicate"), "Other tunicates",
ifelse(str_detect(species_new, pattern = "hydroid"), "Other hydroids",
ifelse(str_detect(species_new, pattern = "red filamentous algae"), "Other red algae",
ifelse(str_detect(species_new, pattern = "red turf algae"), "Other red algae",
ifelse(str_detect(species_new, pattern = "red feather a algae"), "Other red algae",
order_new))))))),
genus_new = ifelse(str_detect(species_new, pattern = "red filament worm"), "Other worms",
ifelse(str_detect(species_new, pattern = "anemone"), "Other anemones",
ifelse(str_detect(species_new, pattern = "fine bryozoan"), "Other Cyclostomatids",
ifelse(str_detect(species_new, pattern = "encrusting bryozoan"), "Other Cheilostomatids",
ifelse(str_detect(species_new, pattern = "spirorbid"), "Other Sabellids",
ifelse(str_detect(species_new, pattern = "white worm"), "Other Sabellids",
ifelse(str_detect(species_new, pattern = "red filamentous algae"), "Other red algae",
ifelse(str_detect(species_new, pattern = "red turf algae"), "Other red algae",
ifelse(str_detect(species_new, pattern = "red feather a algae"), "Other red algae",
genus_new))))))))))
# ifelse(str_detect(species_new, pattern = "green filamentous algae"), "Other green algae",
# ifelse(str_detect(species_new, pattern = "red filamentous algae"), "Other red algae",
# ifelse(str_detect(species_new, pattern = "red turf algae"), "Other red algae",
# ifelse(str_detect(species_new, pattern = "red feather a algae"), "Other red algae",
# order_new))))))))),
#
# ifelse(str_detect(species_new, pattern = "nongeniculate"), "Other coralline algae",
#mutate_at(vars(phylum, genus_new), character) %>%
#filter(str_detect(species_new, pattern="red"))
#sort(unique(reef1_tidy$species_new))
reef1_location <- reef1_tidy %>%
distinct(location, latitude, longitude)
reef1_vegan <- reef1_tidy %>% #named so because of the vegan package!
group_by(location,species_new, mpa) %>% #group by location, then lat/long
summarize(mean_count = mean(value)) %>% #get the mean count
select(location, species_new, mpa, mean_count) %>%
ungroup()
#Calculate species diversity and richness for each site
reef1_vegan_subset <- reef1_vegan %>%
pivot_wider(names_from=species_new, values_from=mean_count) %>%
replace(is.na(.), 0) %>% #replace all NA values with zeros
select(`Abeitinaria spp.`:`Zonaria farlowii`)
diversity1 <- diversity(reef1_vegan_subset)
richness1 <- specnumber(reef1_vegan_subset)
reef1_vegan <- reef1_location %>%
add_column(diversity1, richness1)
```
Diversity tab troubleshooting
```{r}
coord_sbc <- st_bbox(reef1_vegan %>%
st_as_sf(coords=c("longitude", "latitude"), crs=4326))
reef1_vegan_sf <- reef1_vegan %>%
st_as_sf(coords=c("longitude", "latitude"), crs=4326) #create sticky geometry for lat/long
#create index map
reef1_map_index <- tm_basemap("Esri.WorldImagery") +
tm_shape(reef1_vegan_sf, bbox = coord_sbc) +
tm_symbols(id="location", col = "richness1", size = "richness1", scale=2, #point size corresponds to value
palette = "inferno", contrast = c(1,0.5)) #set viridis palette to match plot
tmap_mode("view") #view = interactive
reef1_map_index
```
# Community tab troubleshooting
### Plot
```{r}
#generate reactive summary data
reef1_summary_community <- reef1_tidy %>%
filter(value > "0") %>% #filter out species not present
filter(location=="Naples", #filter for location of interest
str_detect(orientation,pattern="l")) %>% #filter for orientation of interest
group_by(phylum) %>% #group by phylum
summarize(mean_count = mean(value), #get the mean count
median_count = median(value), #get the median count
sd_count = sd(value), #get the s.d. count
iqr = IQR(value), #get the interquartile range for the count
sample_size = n())
#generate plot
ggplot(data=reef1_summary_community, aes(x=reorder(phylum, sample_size), #order bars by descending value
y=sample_size,
fill=phylum)) + #color bars by phylum identity
geom_col() +
scale_fill_manual(values=pal, limits=names(pal), guide=FALSE) + #color bars by phylum color palette, remove legend
coord_flip() +
ylab(paste("Number of plots")) +
xlab("Phylum") +
theme_minimal() +
theme(text = element_text(size = 15))
```
### Sankey diagram
```{r}
#Prep data
reef_top <- reef1_summary_community %>%
group_by(phylum) %>%
tally(sample_size) %>%
top_n(2)
reef_sankey <- reef1_tidy %>%
filter(value > "0") %>% #filter out species not present
filter(location=="Carpinteria", #filter for location of interest
str_detect(orientation,pattern="l")) %>% #filter for orientation of interest
group_by(phylum, order_new, species_new) %>%
summarize(`mean abundance` = mean(value)) %>%
ungroup() %>%
filter(phylum %in% c(reef_top$phylum)) %>%
select(phylum, order_new, species_new, `mean abundance`)
reef_names <- reef_sankey %>%
select(phylum, order_new, species_new)
node_names <- factor(sort(unique(as.character(unname(unlist(reef_names))))))
nodes <- data.frame(name = node_names)
links <- data.frame(source = match(c(reef_sankey$phylum,reef_sankey$order_new), node_names) - 1,
target = match(c(reef_sankey$order_new, reef_sankey$species_new), node_names) - 1,
value = reef_sankey$`mean abundance`,
group = c(reef_sankey$phylum, reef_sankey$order_new))
#turn palette color list into df
pal_df <- as.data.frame(pal)
pal_df$pal <- as.character(pal)
pal_df <- pal_df %>%
mutate(phylum = row.names(pal_df))
#Create a df with colors associated with phylum names, plus all other organism names
links_new <- left_join(nodes, pal_df, by=c("name"="phylum"))
#Add phylum color palette to reef_tidy df
reef_palette <- merge(reef1_tidy, pal_df, by="phylum") %>%
select(species_new, genus_new, order_new, phylum, pal) %>%
distinct(species_new, .keep_all=TRUE) %>%
pivot_longer(species_new:phylum) %>%
rename(color=pal,
group=name,
name=value) %>%
distinct(name, .keep_all=TRUE)
links_noo <- left_join(links_new, reef_palette, by = "name")
#mutate(color = ifelse(group %in% pal_noo$organism, pal_noo$pal, "random"))
#sample(pal, 60, replace=TRUE))
#Set color palette that can be recognized by sankeyNetwork
# my_color <- 'd3.scaleOrdinal() .domain(["Annelida","Arthropoda","Chlorophyta","Chordata","Cnidaria","Echinodermata","Ectoprocta","Fish","Heterokontophyta","Mollusca","Phoronida","Porifera","Rhodophyta"]) .range(["#D2691E", "#CDCDB4", "#3CB371", "#EE9A00","#6CA6CD", "#FF6347", "#F4A460", "#CD3700", "#6B8E23", "#708090", "#FAFAD2","#EEDD82", "#DB7093"])'
colors <- paste(links_noo$color, collapse = '", "')
critters <- paste(links_noo$name, collapse = '", "')
# colorJS2 <- paste('d3.scaleOrdinal(["', colors2, '"])')
colorJS <- paste('d3.scaleOrdinal() .domain(["',critters,'"]) .range(["',colors,'"])')
#Sankey diagram
sankeyNetwork(Links = links, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "name",
fontSize = 20, nodeWidth = 5,
colourScale = colorJS,
LinkGroup="group", NodeGroup = NULL)
```
#Fix Neighbors tab table
```{r}
#Find number of times focal phylum makes an appearance
reef_focal <- reef1_tidy %>%
filter(value > "0") %>%
mutate(to_match = ifelse(phylum %in% c("Cnidaria"), filename, "FALSE")) %>% #create a column that we can subset all rows in a plot based on the presence of focal phylum in the plot at least once
filter(filename %in% to_match) %>% #if focal phylum is present, keep all observations of that plot ("filename")
filter(!str_detect(to_match, pattern="FALSE")) %>% #drop rows containing FALSE
distinct(filename) #get unique plots containing focal phylum
present <- nrow(reef_focal)
#Find number of times neighbor phyla make an appearance
reef_neighbor <- reef1_tidy %>%
filter(value > "0") %>%
mutate(to_match = ifelse(phylum %in% c("Annelida", "Ectoprocta"), filename, "FALSE")) %>%
filter(filename %in% to_match) %>%
filter(!str_detect(to_match, pattern="FALSE")) %>%
distinct(filename)
reef_neighbor2 <- reef1_tidy %>%
filter(value > "0",
phylum %in% c("Annelida", "Ectoprocta", "Arthropoda")) %>% #filter for neighbor phyla
distinct(filename, phylum, .keep_all=TRUE) %>% #filter for unique phylum values for each plot
group_by(filename) %>% #group by quadrat
summarize(sample_size = n()) %>% #get the number of times each quadrat has an observation (of any neighbor phylum)
ungroup() %>%
filter(sample_size==max(sample_size))
absent <- nrow(reef_neighbor)
#Find number of times focal phylum co-occurs with neighbor genus
reef_together <- reef1_tidy %>%
filter(value > "0") %>%
mutate(to_match = ifelse(phylum=="Rhodophyta", filename, "FALSE")) %>%
filter(filename %in% to_match) %>%
filter(phylum %in% c("Annelida", "Ectoprocta", "Arthropoda")) %>%
distinct(filename, phylum) %>% #filter for unique phylum values for each plot
group_by(filename) %>% #group by quadrat
summarize(sample_size = n()) %>%
ungroup() %>%
filter(sample_size==max(sample_size))
%>% #filter for neighbor phyla
mutate(to_match = ifelse(phylum %in% c("Arthropoda", "Annelida"), filename, "FALSE")) %>% #do the same thing again for the neighbor phyla
filter(filename %in% to_match) %>%
distinct(filename)
group_by(filename) %>% #group by quadrat
summarize(sample_size = n()) %>% #get the number of times each quadrat has an observation (of any neighbor phylum)
ungroup() %>%
filter(sample_size==max(sample_size)) #only keep quadrats containing all selected neighboring phyla (AKA the "max" sample size)
})
neighborly <- nrow(reef_together)
#Create basic table
reef_table <- as.data.frame(cbind(present, absent, neighborly))%>%
mutate(percent_occur = neighborly/present,
percent_absent = neighborly/absent)
```
# TO DO:
- group all of the common names together (like anemones, bryozoans)
- fix Neighbors tab >> picking neighbors that are not even present at location must include them in the table calcuations