fertilization-analysis-REDO.Rmd

---
title: "Fertilization Analysis (Final)"
author: "Laura H Spencer"
date: "April 29, 2020"
output: html_document
---

### Load libraries 

```{r setup, message=FALSE, warning=FALSE, results=FALSE}
knitr::opts_chunk$set(echo = TRUE)

list.of.packages <- c("gsheet", "tidyverse", "janitor", "plotly", "glmmTMB", "metafor", "broom.mixed", "car", "ggpubr", "scales") #add new libraries here 

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# Load all libraries 
lapply(list.of.packages, FUN = function(X) {
  do.call("require", list(X)) 
})
sessionInfo()
```

### Prepare data 

NOTE: data is read directly from the GoogleSheet using a share link that was set to "anyone with a link can view"
 
```{r}
# Read in data from GoogleSheet 
data.fert <- as_tibble(gsheet2tbl('https://docs.google.com/spreadsheets/d/111SuH548Et6HDckjbQjtYk-B8A5VfNkUYZgcUNRPxxY/edit?usp=sharing'))

#replace NR (not reported) with NA and convert columns to factor / numeric where needed 
data.fert <- data.fert %>% 
  na_if("NR") %>%     
  clean_names()   %>% # fill in spaces with underscores for column names 
  mutate_at(c('phylum', 'study', 'taxonomic_group', 'common_name', 'latin_name', 'error_statistic'), as.factor) %>%
  mutate_at(c('p_h_experimental', 'p_h_control', 'p_co2_experimental', 'p_co2_control', 'ave_fert_percent_p_h', 'error_percent_p_h', 'number_trials_p_h', 'insemination_time', 'sperm_per_m_l', 'sperm_egg_ratio', 'number_females', 'number_males'), as.numeric) %>%
  mutate(H_experimental=10^(-1*p_h_experimental), 
                     H_control=10^(-1*p_h_control))


#data.fert %>% select(study, ave_fert_percent_p_h, error_percent_p_h, error_statistic, number_trials_p_h) %>% View()
```

### Explore data with figures 

```{r}
ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) + 
  geom_point(size=1.5, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, linear") +
  theme_minimal() 
)

ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=study, col=study, text=`common_name`)) + 
  geom_point(size=1.5, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, linear") +
  theme_minimal() 
)

ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) + 
  geom_point(size=1, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, formula=y ~ poly(x, 2, raw=TRUE), aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, polynomial") +
  theme_minimal()
)
```

### Convert all error values to separate SE & SD columns 

#### 95% Confidence Interval 

Upper 95% CI = Mean + 1.96*SE;  I recorded the difference between the upper 95%CI and the mean (Upper 95%CI - Mean). To convert I will use:  

SE = (Upper 95%CI - Mean) / 1.96  
SD = ((Upper 95%CI - Mean) / 1.96) * sqrt(n)   

#### Standard Deviation / Standard Error conversions 

SE= SD/sqrt(n)   
SD = SE*sqrt(n)  
where n=sample size  

```{r}
data.fert <- data.fert %>% 
  mutate(SE =  case_when(error_statistic == "SD" ~ error_percent_p_h/sqrt(number_trials_p_h), 
         error_statistic == "95% CI" ~ error_percent_p_h/1.96,
         is.na(error_statistic) ~ error_percent_p_h, 
         error_statistic == "SE" ~ error_percent_p_h)) 

data.fert <- data.fert %>% 
  mutate(SD =  case_when(error_statistic == "SE" ~ error_percent_p_h*sqrt(number_trials_p_h), 
         error_statistic == "95% CI" ~ (error_percent_p_h/1.96)*sqrt(number_trials_p_h),
         is.na(error_statistic) ~ error_percent_p_h, 
         error_statistic == "SD" ~ error_percent_p_h)) 
```

### Calculate delta pH (pH experimental - pH control) 

```{r}
data.fert <- data.fert %>% 
  mutate(pH_delta = p_h_control-p_h_experimental)
```

### Convert % fertilization data to proportion fertilized, and replace any values >1 (aka 100% fertilized) with 1 

```{r}
data.fert <- data.fert %>% 
  mutate(ave_fert_proport = case_when(ave_fert_percent_p_h <= 100 ~ ave_fert_percent_p_h/100,
                                      ave_fert_percent_p_h > 100 ~ 1))
```

### Inspect data 

```{r}
data.fert$ave_fert_proport %>% hist()

ggplot(data.fert, aes(group=phylum, col=phylum)) + geom_density(aes(ave_fert_proport))

# How many studies per ~phylum? 
data.fert %>%
  select(phylum, study) %>%
  distinct(phylum, study) %>%
  group_by(phylum) %>% count()

# How many studies per taxonomic group? 
data.fert %>%
  select(phylum, study, taxonomic_group) %>%
  distinct(phylum, study, taxonomic_group) %>%
  group_by(taxonomic_group) %>% count()
```

### Calculate weights for models 

```{r}
weights <- metafor::escalc(measure='MN',
                mi=data.fert$ave_fert_percent_p_h,
                sdi = data.fert$SD,
                ni=data.fert$number_trials_p_h, options(na.action="na.pass"))  
data.fert$w <-weights$vi
```

### How many studies per phylum after filtering out those w/o weight calculation? 

```{r}
# How many studies per ~phylum? 
data.fert %>% drop_na(w) %>%
  select(phylum, study) %>%
  distinct(phylum, study) %>%
  group_by(phylum) %>% count()
```


### Generate binomial models  

 - Include study as random effect for all models   
 - Include varying combinations of phylum & experimental pH  
 - Decided NOT to include taxa, since so few studies per group  

## Test all candidate covariates alone - are any significant? Answer: only experimental pH 

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #yes
Anova(glmmTMB(ave_fert_proport ~ p_h_control + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ insemination_time + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_per_m_l + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_egg_ratio + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_females + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_males + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
```

## Test all candidate covariates with experimental pH - are any significant? Answer: only experimental pH 

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*pH_delta + (1|study), data=drop_na(data.fert, c(w)), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*p_h_control + (1|study), data=drop_na(data.fert, c(w)), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*insemination_time + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*sperm_per_m_l + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*sperm_egg_ratio + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*number_females + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental*number_males + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
```


### Save best fit model (called "best") to object, and examine (experimental pH as sole predictor) 

```{r}
car::Anova(best <- glmmTMB(ave_fert_proport ~ pH_delta + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #phylum, pH, & phylum:pH sign. factors 
summary(best)
```

### Re-do model using delta pH (exp - control) instead

Same sign. as with pH_experimental 

```{r}
Anova(glmmTMB(ave_fert_proport ~ pH_delta + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #yes 
```
## Redo model construction using [H+] instead of pH 
[H+] converted to ppm (is that okay to do?)

Significance not as strong, but p=0.05825

```{r}
Anova(glmmTMB(ave_fert_proport ~ I(H_experimental*1000000) + (1|study), data=drop_na(data.fert, c(w, H_experimental)), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2))
```
## Redo model construction with only pH 7.0 and greater 
[H+] converted to ppm (is that okay to do?)

No longer significant, p=0.1361 

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=subset(drop_na(data.fert, w), p_h_experimental>=7.0), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2))
```


### Write a function to predict %fertilization at various pH levels (and specifying the model) 

```{r}
predict.fert <- function(pH, model) {
    linear.predictor <- as.vector((summary(best)$coefficients$cond[,"Estimate"]["(Intercept)"] + 
                           summary(best)$coefficients$cond[,"Estimate"]["p_h_experimental"]*pH))
    predicted <- exp(linear.predictor) / (1+exp(linear.predictor))
    return(paste("Fertilization rate predicted for pH ", pH, ": ", 
                 scales::percent(x=predicted, accuracy = .01), sep=""))
} 

# Estimate % fertilization @ pH 8.0 using hand-typed equation 
exp(-22.382104 + 3.098070*8) / (1 + exp(-22.382104 + 3.098070*8)) 

# now use function 
predict.fert(8.0, best)
predict.fert(7.5, best)
predict.fert(7.0, best)
predict.fert(6.0, best)
```

### Generate estimates & confidence intervals (log likelihood)

```{r}
confint(best)
```

### Inspect residuals ~ fitted values 

```{r}
aa5 <- augment(best, data=drop_na(data.fert, w))
#ggplotly(
ggplot(aa5, aes(x=.fitted,y=.resid)) + 
    geom_point() + 
  geom_smooth()
#)
```

### Generate model predictions and plot against real data 

```{r}
ph.min.max <- drop_na(data.fert, w) %>% 
  select(phylum, p_h_experimental) %>% 
  group_by(phylum) %>% 
  summarize(min=min(p_h_experimental, na.rm=TRUE), max=max(p_h_experimental, na.rm=TRUE))
  
phylum.list <- list()
for (i in 1:nrow(ph.min.max)) {
  phylum.list[[i]] <- data.frame(ph=c(seq(from=as.numeric(ph.min.max[i,"min"]), 
                            to=as.numeric(ph.min.max[i,"max"]), 
                            by=0.01)),
                   phylum=rep(c(ph.min.max[i,"phylum"])))
}
new.data <- bind_rows(phylum.list) %>% purrr::set_names(c("p_h_experimental", "phylum"))
new.data$study <- NA 
new.data$w <- NA 

predict.test.df <- predict(best, newdata = new.data, se.fit = TRUE, type="response")
predict.test.df.df <- predict.test.df %>%
  as.data.frame() %>%
  cbind(new.data)

#scales::show_col(c("#e41a1c","#4daf4a","#ff7f00","#984ea3",'#377eb8'))
```

### Figure caption: 

Fertilization success (%) by experimental pH across marine taxa examined in this review. Meta-analysis was performed using a binomial regression model, and indicates that fertilization success decreases with pH across Crustacean (5 studies), Echinoderm (12 studies), and Mollusc (18 studies). Fertilization success was not significantly affected by pH in Cnidarian (4 studies). Each point reflects the average % fertilization reported by one study at an experimental pH. 

### Generate single figure with all phyla 

```{r}
# Examine pH-experimental significance as predictor using X-squared test  
Anova(best)

#ggplotly(
ggplot() + 
  geom_jitter(data=drop_na(data.fert, w), aes(x=p_h_experimental, y=ave_fert_proport, group=phylum, col=phylum, label=study), size=1.2, width=0.03) + #, col="gray40"
  #facet_wrap(~phylum, scales="free") + 
  theme_minimal() +
  ggtitle("A. Fertilization success ~ pH with model predictions (all taxa)") + 
  xlab("Experimental pH") + ylab("Proportion fertilization success") +
  scale_color_manual(name=NULL, values=c("#ca0020","#f4a582","#92c5de",'#0571b0')) + 
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.test.df.df, aes(x=p_h_experimental, y=fit), col="gray50") + #, col=phylum
  geom_ribbon(data = predict.test.df.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="gray50") + theme(legend.position = "top") + guides(colour = guide_legend(override.aes = list(size=4))) + 
  theme(legend.text = element_text(size = 12)) + 
  annotate(geom="text", x=6.4, y=0.9, size=5, colour="gray40", label=paste("χ2 p-value =", round(Anova(best)[,"Pr(>Chisq)"], digits =4), sep=" ")) #) #add fill=phylum in geom_ribbon aes if color desired 
```

# Examine color scheme (`show_col` is from scales library)

```{r}
show_col(c("#ca0020","#f4a582","#92c5de",'#0571b0'))
#e41a1c = Cnidarian (red)  
#ff7f00 = Crustacean (orange)  
#4daf4a = Echinoderm (green)  
#377eb8 = Mollusc (blue)  

#ca0020 = coral 
#f4a582 = crustacean 
#92c5de = echinoderm
#0571b0 = mollusc
```

### Generate figure: pH data by phylum, each with binomial regression model fit + CI 

```{r}
ggplotly(
ggplot() + 
  geom_jitter(data=drop_na(data.fert, w), aes(x=p_h_experimental, y=ave_fert_proport, group=phylum, col=phylum, label=study), size=1.2, width=0.03, col="gray40") +
  facet_wrap(~phylum, scales="free") + theme_minimal() +
  ggtitle("Fertilization success ~ pH with binomial-regression model predictions") + 
  xlab("Experimental pH") + ylab("Proportion fertilization success") +
  #scale_color_manual(name=NULL, values=c("#e41a1c","#ff7f00","#4daf4a",'#377eb8')) +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.test.df.df, aes(x=p_h_experimental, y=fit), col="gray50") + #, col=phylum
  geom_ribbon(data = predict.test.df.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="gray50") + theme(legend.position = "none")) #add fill=phylum in geom_ribbon aes if color desired 
```


## Run GLMs on each phylum  and generate plots 

1) Are slopes significantly different from zero? 
2) If so, what is the equation? 

### Cnidarian 

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_control + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ insemination_time + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_per_m_l + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_egg_ratio + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_females + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_males + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no

# None significant, but still develop model for plot 
model.cnidarian <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidarian"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
Anova(model.cnidarian)
summary(model.cnidarian) 

paste("Experimental pH χ2 p-value =", round(Anova(model.cnidarian)[,"Pr(>Chisq)"], digits = 3), sep=" ")

predict.cnidarian <- predict(model.cnidarian, newdata = subset(new.data, phylum=="Cnidarian"), se.fit = TRUE, type="response")
predict.cnidarian.df <- predict.cnidarian %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Cnidarian"))

plot.cnidarian <- ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% 
                filter(phylum=="Cnidarian"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#ca0020") +
  ggtitle("B. Cnidarian") + 
  xlab(NULL) + ylab("Proportion fertilization success") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, size=13, colour="gray30")) +
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.cnidarian.df, aes(x=p_h_experimental, y=fit), col="#ca0020") + 
  geom_ribbon(data = predict.cnidarian.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#ca0020") + 
  annotate(geom="text", x=6.5, y=0.85, size=3.5, colour="gray20", label=paste("χ2 p-value =\n", round(Anova(model.cnidarian)[,"Pr(>Chisq)"], digits = 3), sep=" ")) #Significance of experimental pH \nas predictor: 
print(plot.cnidarian)

```

### Crustacean  

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacean"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ p_h_control + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacean"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ insemination_time + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacean"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no

model.crustacean <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacean"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
Anova(model.crustacean)
summary(model.crustacean) 

predict.crustacean <- predict(model.crustacean, newdata = subset(new.data, phylum=="Crustacean"), se.fit = TRUE, type="response")
predict.crustacean.df <- predict.crustacean %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Crustacean"))

plot.crustacean <- ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% 
                filter(phylum=="Crustacean"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#f4a582") +
  ggtitle("C. Crustacean") + 
  xlab(NULL) + ylab(NULL) +
  theme_minimal() + 
  theme(plot.title = element_text(hjust = 0.5, size=13, colour="gray30")) +
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.crustacean.df, aes(x=p_h_experimental, y=fit), col="#f4a582") +
  geom_ribbon(data = predict.crustacean.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#f4a582")  + 
  annotate(geom="text", x=6.5, y=0.85, size=3.5, colour="gray20", label=paste("χ2 p-value =\n", round(Anova(model.crustacean)[,"Pr(>Chisq)"], digits = 3), sep=" "))
print(plot.crustacean)
```

### Echinoderm

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #yes
Anova(glmmTMB(ave_fert_proport ~ p_h_control + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ insemination_time + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_per_m_l + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_egg_ratio + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_females + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_males + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no

model.echinoderm <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
Anova(model.echinoderm)
summary(model.echinoderm) 

# Try with pH >= 7.0
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinoderm")  %>% filter(p_h_experimental>=7.0), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2))

predict.echinoderm <- predict(model.echinoderm, newdata = subset(new.data, phylum=="Echinoderm"), se.fit = TRUE, type="response")
predict.echinoderm.df <- predict.echinoderm %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Echinoderm"))

plot.echino <- ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% 
                filter(phylum=="Echinoderm"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#92c5de") +
  ggtitle("D. Echinoderm") + 
  xlab("Experimental pH") + ylab("Proportion fertilization success") +
  theme_minimal() + 
  theme(plot.title = element_text(hjust = 0.5, size=13, colour="gray30")) +
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.echinoderm.df, aes(x=p_h_experimental, y=fit), col="#92c5de") +
  geom_ribbon(data = predict.echinoderm.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#92c5de") + 
  annotate(geom="text", x=6.4, y=0.85, size=3.5, colour="gray20", label=paste("χ2 p-value =\n", round(Anova(model.echinoderm)[,"Pr(>Chisq)"], digits = 3), sep=" "))
print(plot.echino)
```

### Mollusc  

```{r}
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #meh 
Anova(glmmTMB(ave_fert_proport ~ p_h_control + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ insemination_time + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_per_m_l + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ sperm_egg_ratio + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_females + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no
Anova(glmmTMB(ave_fert_proport ~ number_males + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no

model.mollusc <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
Anova(model.mollusc)
summary(model.mollusc) 

# Try with pH >= 7.0
Anova(glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusc")  %>% filter(p_h_experimental>=7.0), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2))

predict.mollusc <- predict(model.mollusc, newdata = subset(new.data, phylum=="Mollusc"), se.fit = TRUE, type="response")
predict.mollusc.df <- predict.mollusc %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Mollusc"))

plot.mollusc <- ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% 
                filter(phylum=="Mollusc"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#0571b0") +
  ggtitle("E. Mollusc") + 
  xlab("Experimental pH") + ylab(NULL) +
  theme_minimal() + 
  theme(plot.title = element_text(hjust = 0.5, size=13, colour="gray30")) +
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.mollusc.df, aes(x=p_h_experimental, y=fit), col="#0571b0") + 
  geom_ribbon(data = predict.mollusc.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#0571b0")  + 
  annotate(geom="text", x=6.2, y=0.88, size=3.5, colour="gray20", label=paste("χ2 p-value =\n", round(Anova(model.mollusc)[,"Pr(>Chisq)"], digits = 3), sep=" "))
print(plot.mollusc)
```

```{r}
all.plots <- ggarrange(plot.cnidarian + ylab(NULL), plot.crustacean, plot.echino + ylab(NULL), plot.mollusc + rremove("y.text"), ncol=2, nrow=2)
annotate_figure(all.plots, left = text_grob("Proportion Fertilization Success", color = "gray20", rot = 90))

#add the following to remove x and y axis labels: ` + rremove("x.text") + rremove("y.text")`
# add the following to add plot labels: `, labels=c("A", "B", "C", "D")`
```


## Additional analysis steps, for posterity: inspect candidate covariates - are these important experimental design factors?  

### convert candidate covariates to numeric 

```{r}
data.fert <- data.fert %>%
  mutate_at(vars(number_trials_p_h, insemination_time, sperm_per_m_l, sperm_egg_ratio, number_females, number_males), as.numeric) 
```

### Test Control pH

```{r}
car::Anova(covariates1 <- glmmTMB(ave_fert_proport ~ p_h_control*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no interaction 
ggplotly(
  ggplot(data.fert, aes(x=p_h_control, y=ave_fert_proport)) + 
    geom_jitter(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + ggtitle("% Fertilization ~ Control pH\nsign. interaction") + 
    scale_color_gradientn(colours = rainbow(5))) 
#    scale_color_gradient(low = "red", high = "blue"))
```

### Test Insemination time 

```{r}
car::Anova(covariates2 <- glmmTMB(ave_fert_proport ~  insemination_time*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no interaction  
ggplotly(
  ggplot(data.fert, aes(x=insemination_time, y=ave_fert_proport)) + 
    geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + ggtitle("% Fertilization ~ Insemination Time\nsign. interaction_") + 
    scale_color_gradientn(colours = rainbow(5))) 
#    scale_color_gradient(low = "red", high = "blue"))
```

### Test sperm concentration 

```{r}
car::Anova(covariates3 <- glmmTMB(ave_fert_proport ~ sperm_per_m_l*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign 
ggplotly(ggplot(data.fert, aes(x=sperm_per_m_l, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ Sperm Concentration\nnot sign.") + 
          scale_color_gradientn(colours = rainbow(5))) 
#           scale_color_gradient(low = "red", high = "blue"))
```

### Test sperm:egg ratio 

```{r}
car::Anova(covariates4 <- glmmTMB(ave_fert_proport ~ sperm_egg_ratio*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign 
ggplotly(ggplot(data.fert, aes(x=sperm_egg_ratio, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ Sperm:Egg Ratio\nnot sign.") + 
    scale_color_gradientn(colours = rainbow(5))) 
#           scale_color_gradient(low = "red", high = "blue"))
```

### Test no. females used for experimental eggs 

```{r}
car::Anova(covariates5 <- glmmTMB(ave_fert_proport ~  number_females*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign.
ggplotly(ggplot(data.fert, aes(x=number_females, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ No. Females\nno interaction") + 
    scale_color_gradientn(colours = rainbow(5))) 
#           scale_color_gradient(low = "red", high = "blue"))
```

### Test no. males used for experimental eggs 

```{r}
car::Anova(covariates6 <- glmmTMB(ave_fert_proport ~  number_males*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign. 
ggplotly(ggplot(data.fert, aes(x=number_males, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ No. Males\nsign. interaction") + 
     scale_color_gradientn(colours = rainbow(5))) 
#          scale_color_gradient(low = "red", high = "blue"))
```