HPSightings.Rmd

---
title: ""
author: ""
output: 
  word_document:
    reference_docx: mytemplate.docx

---

```{r include = FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, results = 'asis', fig.width = 5.5, fig.height = 5, citr.use_betterbiblatex = FALSE)
```

```{r load packages, include = FALSE, results = 'hide'}

library(devtools)
library(tidyr)
library(citr)
library(ggmap)
library(data.table)
library(ggfortify)
library(dplyr)
library(ggplot2)
library(cowplot)
library(scales)
library(captioner)
library(knitr)
library(reshape2)
library(stringr)
library(magrittr)
library(MASS)
library(stats) #kruskal.test
library(PMCMR) # posthoc.kruskal.nemenyi.test https://cran.r-project.org/web/packages/PMCMR/vignettes/PMCMR.pdf
library(pgirmess) #kruskalmc https://cran.r-project.org/web/packages/pgirmess/pgirmess.pdf 

#setwd("~/Documents/R/HarborPorpoiseSightings")

# Define Plot Theme
fig_theme <- function(...) {
  theme(
    #text = element_text(size = 11),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black", size = 10),
    axis.text.y = element_text(vjust = 0.5, color = "black", size = 10), 
    axis.title = element_text(size = 10),
    axis.line.y = element_line(colour = "black"), 
    axis.line.x = element_line(colour = "black"), 
    plot.background = element_rect(), 
    panel.background = element_blank(), 
    panel.grid = element_blank(), 
    legend.title = element_blank(),
    legend.key = element_blank(),
    legend.text = element_text(size = 8),
    ...)
}

pubfigure_simple_theme <- function(...) {
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_text(vjust = 0.5, color = "black", size = 10), 
    axis.title.x = element_blank(),
    axis.line.y = element_line(colour = "black"), 
    axis.line.x = element_line(colour = "black"), 
    plot.background = element_rect(), 
    panel.background = element_blank(), 
    panel.grid = element_blank(),
    legend.title = element_blank(),
    legend.key = element_blank(),
    legend.text = element_text(size = 8),
    ...)
}

color6 <- c("#999aa7", "#f6b61c", "#4aaaa5", "#3b98ee", "#3f1d58", "#302f49", "#e45f56")
color4 <- c("#a3d39c", "#e45f56", "#f6b61c", "#6ea3ec")
color1 <- c("#a3d39c", "#6ea3ec")
color2 <- c("#4aaaa5", "#e45f56") 

case <- function(x)
  paste0(toupper(substr(x, 1, 1)), tolower(substring(x, 2)))

figs <- captioner(prefix="Figure")
tbls <- captioner(prefix="Table")

# figs("name") <-- "Figure  1: Caption."
# figs("name",display="cite") <-- "Figure  1"
# figs("name",display="num") <-- "1"

```

```{r load and clean data, include = FALSE}
setwd("~/Documents/R/HarborPorpoiseSightings")

##Data cleaning - no longer want to run each time

# #load csv derived from manual input of lat/longs from grid in arcGIS
input_lat_long <- read.csv("QuadLatLong.csv", header = TRUE, na.strings = "", stringsAsFactors = FALSE)

#sightings/strandings data
sightings_data <- read.csv("HPData.csv", header = TRUE, na.strings = "", stringsAsFactors = FALSE) %>%
  dplyr::select(-Count)
sightings_data$Count_clean <- gsub("\\~", "", sightings_data$Count_clean) #get symbols out of count column
sightings_data$Count_clean <- gsub("\\+", "", sightings_data$Count_clean)
sightings_data$Count <- as.numeric(sightings_data$Count_clean)
sightings_data$Count <- round(sightings_data$Count, 0)

#Existing lat/long by quadrant to merge back and fill with the empty ones where possible
mean_lat_long <- sightings_data %>%
  filter(ActualLatitude > 0) %>%
  group_by(Quadrant) %>%
  summarize(mean_long = mean(ActualLongitude), mean_lat = mean(ActualLatitude))

sightings_data <- sightings_data %>%
  dplyr::select(-Count_clean) %>%
  merge(mean_lat_long, by = 'Quadrant', all = T) %>%
  transform(Reliable_Source = ifelse(Source == 'Reliable', 'Trained_Observer', 'Public')) %>%
  transform(Record_Type = ifelse(is.na(Stranded) | Stranded == 'Yes', 'Stranding', ifelse(Stranded == 'No', 'Sighting', 'Other'))) %>%
  transform(Season = ifelse(Month == 12 | Month < 3, 'Winter',
                     ifelse(Month < 6 & Month > 2, 'Spring',
                     ifelse(Month < 9 & Month > 5, 'Summer', 'Fall')))) %>%
  transform(Group_size = ifelse(Count < 5, '1-5',
            ifelse(Count > 4 & Count < 10, '5-10',
            ifelse(Count > 9 & Count < 25, '10-25',
            ifelse(Count > 24 & Count < 50, '25-50',
            ifelse(Count > 49 & Count < 100, '50-100', '100+')))))) %>%
  merge(input_lat_long, by = 'Quadrant', all = T)
sightings_data$Group_size <- factor(sightings_data$Group_size, levels = c('1-5', '5-10', '10-25', '25-50', '50-100', '100+'))
sightings_data$Season = factor(sightings_data$Season, levels = c('Spring', 'Summer', 'Fall', 'Winter'))

#Use mean where lat/longs that are missing or 0.00
sightings_data$lat_applied <- ifelse(is.na(sightings_data$ActualLatitude) |
                                    sightings_data$ActualLatitude < 1,
                              sightings_data$mean_lat, sightings_data$ActualLatitude)
sightings_data$long_applied <- ifelse(is.na(sightings_data$ActualLongitude) |
                                    sightings_data$ActualLongitude < 1,
                              sightings_data$mean_long, sightings_data$ActualLongitude)
#use manual input where lat/longs are still missing
sightings_data$lat_all <- ifelse(is.na(sightings_data$lat_applied), sightings_data$Arc_Lat, sightings_data$lat_applied)
sightings_data$long_all <- ifelse(is.na(sightings_data$long_applied), sightings_data$Arc_Long, sightings_data$long_applied)

HP_data <- sightings_data %>%
  dplyr::select(-c(ActualLatitude, ActualLongitude, mean_lat, mean_long, lat_applied, long_applied, Direction, Location2, Pod, LikelyPod, ReportingParty, Stranded))

#create column where there is only one unique lat/long per quadrant for maps
mean_lat_long_final <- HP_data %>%
  #filter(lat_all > 0) %>%
  group_by(Quadrant) %>%
  summarize(mean_long = mean(long_all), mean_lat = mean(lat_all))

#merge back with main dataframe
HP_data <- HP_data %>% merge(mean_lat_long_final, by = 'Quadrant')

write.csv(HP_data, "HP_data.csv", row.names = F)

#HP_data <- read.csv("HP_data.csv", header = TRUE, na.strings = "", stringsAsFactors = FALSE)

```

```{r basic annual monthly seasonal figures, include = FALSE}

numberperyear <- HP_data %>%
  group_by(Year, Record_Type) %>%
  summarize(cnt = n_distinct(RecordID))

numberpermonth <- HP_data %>%
  group_by(Year, Month, Record_Type) %>%
  summarize(cnt = n_distinct(RecordID))

numberperseason <- HP_data %>%
  group_by(Year, Season, Record_Type) %>%
  summarize(cnt = n_distinct(RecordID)) 


# #Bar for average seasonal counts
records_seas_mean <- ggplot(data = numberperseason,
                       aes(x = factor(Season), y = cnt)) +
  geom_boxplot() +
  xlab("") + ylab("Mean Seasonal Records") +
  scale_y_continuous(labels = comma) +
  fig_theme()

```

```{r timeseries, include = FALSE}

#facet record type, linetype source
yr_type_source <- ggplot(data = HP_data %>% group_by(Year, Record_Type, Reliable_Source) %>% summarize(cnt = n_distinct(RecordID)), aes(x = Year, y = cnt, group = Reliable_Source, linetype = Reliable_Source)) + 
  geom_line(size = 0.6) +
  #geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
  xlab("") + ylab("Annual Records") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = c(1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015)) +
  fig_theme(legend.position = 'bottom',
            strip.background = element_blank(),
            axis.title.x = element_blank(),
            strip.text = element_text(size = 10),
            legend.key.size = unit(0.9, "cm")) + 
  scale_linetype_manual(values = c("dashed", "solid", "dashed", "solid"), labels = c("Public", "Trained Observer")) +
  facet_grid(~Record_Type)
figs("yr_type_source", caption = "Number of sightings and strandings per year according to reporting source.")

```

```{r seasonal, include = FALSE}

#seasons by type and source
records_seas_source <- ggplot(data = HP_data %>% group_by(Season, Record_Type, Reliable_Source) %>% summarize(cnt = n_distinct(RecordID)), aes(x = factor(Season), y = cnt, 
                          group = Reliable_Source, fill = Reliable_Source)) + 
  geom_bar(stat = 'identity', position = 'dodge') +
  #geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
  xlab("") + ylab("Total Records") +
  scale_y_continuous(labels = comma) +
  fig_theme(legend.position = 'bottom',
            strip.text = element_text(size = 10),
            strip.background = element_blank(),
            axis.title.x = element_blank()) + 
  scale_fill_manual(values = c("grey65", "grey20", "grey65", "grey20"), labels = c("Public", "Trained Observer")) +
  facet_grid(~Record_Type)
figs("records_seas_source", caption = "Sum of sightings and strandings reported in each season by trained observers (black) and other public reporting sources (grey).")

#monthly by source
records_mo_source <- ggplot(data = HP_data %>% group_by(Month, Reliable_Source) %>% summarize(cnt = n_distinct(RecordID)), aes(x = factor(Month), y = cnt, 
                          group = Reliable_Source, fill = Reliable_Source)) + 
  geom_bar(stat = 'identity', position = 'dodge') +
  #geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
  xlab("") + ylab("Monthly Sightings by Source") +
  scale_y_continuous(labels = comma) +
  fig_theme(legend.position = 'top') + 
  scale_fill_manual(values = color1)

#monthly by type
records_mo_type <- ggplot(data = HP_data %>% group_by(Month, Record_Type) %>% summarize(cnt = n_distinct(RecordID)), aes(x = factor(Month), y = cnt, 
                          group = Record_Type, fill = Record_Type)) + 
  geom_bar(stat = 'identity', position = 'dodge') +
  #geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
  xlab("") + ylab("Monthly Sightings by Type") +
  scale_y_continuous(labels = comma) +
  fig_theme(legend.position = 'top') + 
  scale_fill_manual(values = color2)

```

```{r mapping, include = FALSE}

#########MAPS#######
WAbox <- c(-124.8, 46.95, -121.7, 49.3)
pugetbox <- c(-124.1, 47.1, -122.1, 49.8)

###all records, overlapping jittered dots
# allrecords <- ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
#    geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), width = .1, height = .1, size = .7, alpha = .5, color = 'grey50') +
# scale_alpha(range = c(0, 0.3), guide = FALSE) +
#   xlab("") + ylab("") +
#   theme(
#     axis.line = element_blank(),
#     axis.ticks = element_blank(),
#     axis.text = element_blank(),
#     strip.text = element_text(size = 10),
#     strip.background = element_blank(),
#     panel.border = element_rect(colour = "black", fill = NA, size = 5)) 

###all records, summed by quadrant; color = more records
quadrant_counts <- HP_data %>%
  filter(Record_Type == 'Sighting') %>%
  group_by(Quadrant, mean_lat, mean_long) %>%
  summarize(Records = n_distinct(RecordID))

#all quadtrants - sightings only
map_records_size <- 
  ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
  geom_point(data = quadrant_counts, aes(x = mean_long, y = mean_lat, color = Records, 
                                         #size = Records, 
                                         alpha = .9)) +
scale_alpha(range = c(0.6, 0.9), guide = FALSE) +
  xlab("") + ylab("") +
  theme(legend.text = element_text(size = 10), 
        legend.title = element_text(size = 10),
    axis.line = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    strip.text = element_text(size = 10),
    strip.background = element_blank(),
    legend.position = 'top',
    panel.border = element_rect(colour = "black", fill = NA, size = 5)) + 
  scale_color_continuous(trans = 'reverse') + 
  guides(color = guide_legend("Records")) 
         #size = guide_legend("Records"))

figs("map_records_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with a larger number of total sightings records.")

#puget sound zoom - sightings only, darker symbols = more records
map_quadsize_zoom <- ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
   geom_point(data = quadrant_counts, aes(x = mean_long, y = mean_lat, color = Records, alpha = .9)) +
scale_alpha(range = c(0.6, 0.9), guide = FALSE) +
  xlab("") + ylab("") +
  theme(
    legend.position = 'top',
    legend.text = element_text(size = 10), 
    legend.title = element_text(size = 10),
    axis.line = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    strip.text = element_text(size = 10),
    strip.background = element_blank(),
    panel.border = element_rect(colour = "black", fill = NA, size = 5)) + 
  scale_color_continuous(trans = 'reverse') + 
  guides(color = guide_legend("Records"), size = guide_legend("Records"))

```

```{r maps by group sighting size, include = FALSE}

###all records; colors by group sighting size
map_group_size_cat <- ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
   geom_jitter(data = HP_data %>% filter(Record_Type == 'Sighting' & !is.na(Group_size)), 
               aes(x = mean_long, y = mean_lat, col = factor(Group_size)), height = 0.1, width = 0.1, alpha = 0.5) +
scale_alpha(range = c(0, 0.3), guide = FALSE) +
  xlab("") + ylab("") +
  theme(legend.text = element_text(size = 10), 
        legend.title = element_text(size = 11), 
        legend.position = 'bottom', 
    axis.line = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    strip.text = element_text(size = 10),
    strip.background = element_blank(),
    panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
  scale_color_manual(values = color6, name = 'Group Size')

#symbol size and color across quadrant by mean group count size
quad_size <- HP_data %>%
  filter(Record_Type == 'Sighting' & Count != 'NA') %>%
  group_by(Quadrant, mean_lat, mean_long) %>%
  summarize(count_mean = mean(Count))

map_group_size <- ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
   geom_point(data = quad_size, aes(x = mean_long, y = mean_lat, color = count_mean, size = count_mean, alpha = .9)) +
scale_alpha(range = c(0.6, 0.9), guide = FALSE) +
  xlab("") + ylab("") +
  theme(
    legend.position = 'top',
    legend.text = element_text(size = 10), 
    legend.title = element_text(size = 10),
    axis.line = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    strip.text = element_text(size = 10),
    strip.background = element_blank(),
    panel.border = element_rect(colour = "black", fill = NA, size = 5)) + 
  scale_color_continuous(trans = 'reverse') + 
  guides(color = guide_legend("Group Size"), size = guide_legend("Group Size"))
figs("map_group_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with larger mean sighting group size.")

```

```{r maps by source, include = FALSE}

# #Source - facet

##nearly identical distribution over San Juans, extra reliable cluster over whidbey

map_source_zoom <- 
  ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
  geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), 
              width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50')  +
  stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Reliable_Source, linetype = Reliable_Source), 
              bins = 4, size = .5) +
  theme(legend.text = element_text(size = 10),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(), 
    axis.line = element_blank(),
    legend.title = element_blank(),
    strip.background = element_blank(),
    strip.text = element_text(size = 10),
    legend.position = 'bottom', 
    legend.key.size = unit(0.7, "cm")) +
  facet_grid(~Record_Type) +
  scale_linetype_manual(values = c("solid", "dashed", "solid", "dashed"), labels = c("Public", "Trained Observer")) +
  scale_color_manual(values = c("grey30", "grey25", "grey30", "grey25"), labels = c("Public", "Trained Observer"))  

figs("map_source_zoom", caption = "Kernel density clusters of strandings and sightings from trained observers and other reporting sources.")

```

```{r seasonal heatmaps, include = FALSE}

#Seasonal hotspots -facet

map_seasons <-   
 ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
  geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), 
              width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50')  +
  stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Season), 
              bins = 4, size = .5) +
  theme(legend.text = element_text(size = 10),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(), 
    axis.line = element_blank(),
    legend.title = element_blank(),
    legend.position = 'bottom',
    strip.text = element_text(size = 10),
    strip.background = element_blank()) + 
  scale_color_manual(values = color4) + 
  facet_wrap(~Record_Type)

map_seasons_zoom <-   
 ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
  geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), 
              width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50')  +
  stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Season), 
              bins = 4, size = .5) +
  theme(legend.text = element_text(size = 10),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(), 
    axis.line = element_blank(),
    legend.title = element_blank(),
    legend.position = 'bottom',
    strip.text = element_text(size = 10),
    strip.background = element_blank()) + 
  scale_color_manual(values = color4) + 
  facet_wrap(~Record_Type)

figs("map_seasons_zoom", caption = "Kernel density clusters of sightings and strandings during each season of the year.")

```

```{r inline chunks, include = FALSE}
#Inline chunks

overall_counts <- HP_data %>%
  group_by(Record_Type, Reliable_Source) %>%
  summarize(cnt = n_distinct(RecordID)) %>%
  dcast(Record_Type ~ Reliable_Source, value.var = 'cnt') %>%
  dplyr::select(Record_Type, Trained_Observer, Public)

group_size_counts <- HP_data %>%
  filter(Record_Type == 'Sighting') %>%
  group_by(Group_size) %>%
  summarize(Count = n_distinct(RecordID))

quad_sight_counts <- HP_data %>%
  filter(Record_Type == 'Sighting') %>%
  group_by(Quadrant) %>%
  summarize(cnt = n_distinct(RecordID)) %>% 
  # group_by(Quadrant) %>%
  # summarize(mean = mean(cnt)) %>%
  # arrange(desc(mean))
  filter(!is.na(cnt)) %>%
  filter(cnt != 'NA' & Quadrant != 'NA') 

quad_strands_counts <- HP_data %>%
  filter(Record_Type == 'Stranding') %>%
  group_by(Quadrant) %>%
  summarize(cnt = n_distinct(RecordID)) %>%
  # group_by(Quadrant) %>%
  # summarize(mean = mean(cnt)) %>%
  # arrange(desc(mean))
  filter(!is.na(cnt)) %>%
  filter(cnt != 'NA' & Quadrant != 'NA') 
```

```{r temporal stats, include = FALSE}
#Temporal stats

#Per year - all
summary(glm.nb(cnt ~ Year, data = numberperyear %>% filter(Record_Type == 'Sighting'))) #not sig
summary(glm.nb(cnt ~ Year, data = numberperyear %>% filter(Record_Type == 'Stranding'))) #sig

#Per year - any quadrants increasing? Not significantly
agg_data <- HP_data %>% 
  filter(Record_Type == 'Stranding') %>%
  group_by(Year, Quadrant) %>%
  dplyr::summarize(cnt = n_distinct(RecordID)) %>%
  transform(Quadrant = factor(Quadrant))

summary(glm.nb(cnt ~ Year + Quadrant, data = agg_data))
summary(glm(cnt ~ Year + Quadrant, family = poisson(link = log), data = agg_data))
summary(glm(cnt ~ Year + Quadrant, family = 'quasipoisson', data = agg_data))

#Group size?
agg_data <- HP_data %>% 
  filter(Record_Type == 'Sighting') %>%
  group_by(Year, Count) %>%
  dplyr::summarize(cnt = n_distinct(RecordID))

summary(glm.nb(cnt ~ Year + Count, data = agg_data))
summary(glm(cnt ~ Year + Count, family = poisson(link = log), data = agg_data)) #both sig
summary(glm(cnt ~ Year + Count, family = 'quasipoisson', data = agg_data)) #count sig, not year

summary(lm(cnt ~ Year + Count, data = agg_data)) #decreasing by 0.02 cases per year?

#Reliable source? No, both not increasing
agg_data <- HP_data %>% 
  filter(Record_Type == 'Sighting' & Reliable_Source != 'Trained_Observer') %>%
  group_by(Year) %>%
  dplyr::summarize(cnt = n_distinct(RecordID))

summary(glm.nb(cnt ~ Year, data = agg_data))

#Seasonality
#Sights
mean_seas <- HP_data %>%
  filter(Record_Type == 'Sighting') %>%
  group_by(Year, Season) %>%
  summarize(cnt = n_distinct(RecordID)) %>%
  group_by(Season) %>%
  summarize(mean = mean(cnt))
  
agg_data <- HP_data %>% 
  filter(Record_Type == 'Sighting') %>%
  group_by(Year, Season) %>%
  dplyr::summarize(cnt = n_distinct(RecordID))

summary(glm.nb(cnt ~ Season, data = agg_data))
summary(lm(cnt ~ Season, contrasts = list(Season = 'contr.sum'), data = agg_data))

#Strands
mean_seas_strands <- HP_data %>%
  filter(Record_Type == 'Stranding') %>%
  group_by(Year, Season) %>%
  summarize(cnt = n_distinct(RecordID)) %>%
  group_by(Season) %>%
  summarize(mean = mean(cnt))
  
agg_data <- HP_data %>% 
  filter(Record_Type == 'Stranding') %>%
  group_by(Year, Season) %>%
  dplyr::summarize(cnt = n_distinct(RecordID))

summary(glm.nb(cnt ~ Season, data = agg_data))
summary(lm(cnt ~ Season, contrasts = list(Season = 'contr.sum'), data = agg_data))

```

**Harbor Porpoise Abundance and Distribution from Opportunistic Sightings and Strandings in the Salish Sea, Washington**  

*Abstract*  
- Abundance and distribution are essential for conservation and management.    
- Spatial modeling techniques can be used to derive abundance and distribution estimates at a lower cost using data from opportunistic sightings or "platforms of opportunity".  
- This study aims to describe and characterize `r n_distinct(HP_data$RecordID)` harbor porpoise sightings and strandings records compiled from a range of sources from 1976 to 2014 in the Salish Sea, including the inland waters of northern Washington and western British Columbia.   
- Over the study period, `r round(HP_data %>% filter(Record_Type == "Sighting") %>% summarize(n_distinct(RecordID))/(n_distinct(HP_data$RecordID)), 2)*100`% of the records were sightings records, and the mean group size was `r round(mean(HP_data[HP_data$Record_Type == 'Sighting', "Count"], na.rm = T), 1)` individuals.    
- The number of sightings was highest in the summer and the number of strandings was highest in the spring.    
- Both sightings and strandings were concentrated around the San Juan Islands, and the spatial distribution of reported sightings was similar between trained observers and other public reporting sources.  
- The high degree of agreement/spatial overlap across years, seasons, reporting sources, and record type (stranding vs. sighting) is evidence that these data should be considered reliable and could be used in future spatial modeling to examine changes in abundance, distribution, or projected human impacts over time in a region where not a lot is known about the presence of this species.  

*Introduction*  
Understanding cetacean abundance and distribution is essential for conservation and management, but deriving accurate and timely estimates can be resource intensive. Population abundance and distribution can be estimated in a number of ways, balancing funding availability, data availability, and the life history characteristics of the species. One approach involves modeling population estimates using data gathered from what have been termed "platforms of opportunity", where animals are sighted and recorded from ferries, fishing vessels, the Coast Guard, and a variety of other possible sources (Aragones et al., 1997; Hedley et al., 1999; Marques, 2001; Williams, 2003). While these data are not as statistically robust as those from design-based approaches such as aerial or ship-based line transect surveys (due to non-randomization of survey track, range of expertise, etc.), it is a cost-effective way of obtaining information in situations where little to no data would exist otherwise (Leaper et al., 1997; Silva et al., 2003; Williams et al., 2006; Thomas et al., 2007; Dawson et al., 2008; Peschko et al., 2016). 

Like all survey methods, using opportunistic sightings has advantages and disadvantages. Data from platforms of opportunity can capture seasonal trends in cetacean density and distribution more effectively than an annual ship-based or aerial survey (Seibert et al., 2006). Additionally, opportunistic sampling can be used in pilot studies to identify areas of high density cetacean habitat use that can then be prioritized in future targeted line transect surveys, mark-recapture studies, or habitat modeling (Koshure, 2012). Opportunistic sampling can also be a lower-cost way of training observers and fine-tuning distance sampling protocols, correction factors, and estimates of bias (Williams et al., 2007; Koshure, 2012). Disadvantages of the approach include the limitations of modeling applications when effort information is not systematically documented or data are not gathered from a range of habitat areas (Buckland et al., 2001; Evans & Hammond 2004; Cañadas et al., 2005; Kaschner et al, 2012; Williams et al 2011). Researchers have used this technique around the world and have developed best practice guidelines (Thomas et al., 2002; Evans & Hammond, 2004; Redfern et al., 2006).  

Most commonly, cetacean abundance and distribution estimates are modeled using multiple data sources across multiple timeframes and the resulting estimates are compared or used to create an overall range. Harbor porpoise (*Phocoena phocoena*), being one of the most widely distributed cosmopolitan cetacean species, is a common subject of using and comparing both design-based and model-based approaches that include data from line transect surveys, opportunistic sightings, and strandings. Harbor porpoise presence and distribution in German waters was examined from the 1980s into the early 2000s using data from aerial surveys, incidental sightings, and strandings, with estimates matching across modeling approaches (Siebert et al., 2006). The abundance and distribution of humpback, fin, and minke whales was estimated using cruise ships as platforms of opportunity in the Southern Ocean over two austral summers in 2001 and 2002 (Williams et al., 2006), being a prime example of the utility of cost-effective approaches in remote areas. Off the coast of Chile, researchers studying blue whales developed a broadly-applicable framework to estimate variance and uncertainty in spatial modeling aimed at deriving abundance estimates (Williams et al., 2011). Off the coast of northwestern France, data from opportunistic sightings, aerial surveys, and strandings were used to ascertain that harbor porpoises had a renewed presence in the area (Jung et al., 2009). In the eastern tropical Pacific, abundance and distribution of offshore spotted dolphins has been estimated by applying generalized additive modeling of oceanographic variables and opportunistic sightings data from commercial fishing vessels (Buckland et al., 1992; Marques, 2001; Lennert-Cody et al., 2016).

In the Pacific Northwest, harbor porpoise are delineated into two coastal stocks and an inland Washington stock. Aerial and ship-based surveys in addition to anecdotal observations suggest that harbor porpoise presence in inland Washington waters may be growing after a period of unexplained decline throughout the 1980s and 1990s (Carretta et al., 2016). Seasonal presence in the region is also poorly understood, but it is presumed that the stock is not migratory. Opportunistic data have been used to estimate harbor porpoise abundance and examine seasonal distribution patterns around Vancouver Island and inland Washington waters (Keple, 2002; Hall, 2004; Koshure, 2012). Existing harbor porpoise abundance estimates from the late 1990s and 2000s range from 675 (Hall, 2004) to just over 1,200 individuals (Calambokidis et al., 1997) in various sub-regions of inland Washington and British Columbia waters. Estimates for all inland Washington and British Columbia waters combined range from almost 2,900 (Calambokidis et al. 1997) to just over 3,000 indiviuals (Carretta et al., 2016), with a total of just over 9,000 individuals coastwide in British Columbia (Williams & Thomas, 2007).  

This study aims to characterize the spatial and temporal distribution of harbor porpoise sightings and strandings in inland Washington waters from 1976 to 2014 to examine trends over time, seasonality, and consistency between different sighting reporting sources. Similar to other uses of data compiled from platforms of opportunity, we anticipate that spatial modeling techniques could be applied to these data to vastly improve our understanding of harbor porpoise presence, habitat use, and likely impacts of anthropogenic activities in the region. 


*Methods*  
Study area - The study area includes inland waters spanning Puget Sound, the Salish Sea, and the Strait of Juan de Fuca in Washington State and British Columbia. The study area was subdivided into `r HP_data %>% summarize(n_distinct(Quadrant))` survey grid quadrants.

Data sources and characterization - Sightings records from 1976 to 2014 were compiled from [insert details]. Stranding records were compiled from [insert details].

Sighting and stranding records include the date, latitude, longitude, survey quadrant location, numerous reporting sources, and observation condition (dead or alive). Sighting records additionally contain an estimated number of individuals included in the sighting (*i.e*, group size). Group size was binned into the following categories: 1-4 individuals, 5-9, 10-24, 25-49, 50-99, 100+. Record source (*i.e.*, reporting entity) was grouped into "trained observers" (including X and Y) and "other public sources" (including X, Y, Z). All records were seasonally categorized as occurring in spring (Mar-May), summer (Jun-Aug), fall (Sep-Nov), and winter (Dec-Feb). Missing latitude and longitude data in a given quadrant were replaced with the mean values of records in that quadrant.

Statistical analyses - To examine temporal changes in the number of sightings and strandings over the study period, negative binomial generalized linear models (GLM) were examined with year as the predictor and the number of records as the response. Quadrant and reporting source were also examined as possible predictors to examine data consistency over time (whether records were increasing or decreasing in one area or another, or from one reporting source or another).
 
Kernel density maps were created using the statdensity function in R and depict sighting and stranding clusters by season and reporting source. Maps depicting the total number of records and mean sighting group size per quadrant were created using the ggplot2 function in R. 

*Results*  
From 1976 to 2014, there were a total of `r HP_data %>% filter(Record_Type == "Stranding") %>% summarize(n_distinct(RecordID))` harbor porpoise strandings and `r HP_data %>% filter(Record_Type == "Sighting") %>% summarize(n_distinct(RecordID))` sightings for a combined total of `r n_distinct(HP_data$RecordID)` records reported from `r HP_data %>% summarize(n_distinct(Quadrant))` unique survey quadrants. Trained observers or stranding responders accounted for `r round(overall_counts[overall_counts$Record_Type == 'Stranding', 'Trained_Observer']/(HP_data %>% filter(Record_Type == "Stranding") %>% summarize(n_distinct(RecordID))), 2)*100`% of stranding records, and likewise accounted for `r round(overall_counts[overall_counts$Record_Type == 'Sighting', 'Trained_Observer']/(HP_data %>% filter(Record_Type == "Sighting") %>% summarize(n_distinct(RecordID))), 2)*100`% of sightings records, with the remainder being reported by the public. Over the study period, the mean group size of sightings was `r round(mean(HP_data[HP_data$Record_Type == 'Sighting', "Count"], na.rm = T), 1)` individuals. Smaller group sizes were more common, with `r HP_data %>% filter(Record_Type == 'Sighting' & Count < 5) %>% summarize(n_distinct(RecordID))` records containing less than five individuals and `r HP_data %>% filter(Record_Type == 'Sighting' & Count > 49) %>% summarize(n_distinct(RecordID))` records containing more than 50 individuals (Table). Mean group size of sightings made by trained observers was `r round(HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source == 'Trained_Observer') %>% summarize(mean(Count, na.rm = T)), 1)` individuals, compared to `r round(HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source != 'Trained_Observer') %>% summarize(mean(Count, na.rm = T)), 1)` group size of sightings reported from the public. The maximum group count size of records made by trained observers was `r HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source == 'Trained_Observer') %>% summarize(max(Count, na.rm = T))` individuals, compared to `r HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source != 'Trained_Observer') %>% summarize(max(Count, na.rm = T))`

Temporal patterns - The number of sightings has not significantly increased over time (p > 0.05; `r figs("yr_type_source", display = 'cite')`), though group size was a significant predictor of reported sightings (y = -0.02x, t = -2.17, p < 0.05), suggesting a slight decrease in the number of individuals per sighting over time. 

Annual strandings increased significantly over time (y = 1.084^x^, z = 4.6, p < 0.001), ranging from `r round(HP_data %>% filter(Year < 2000 & Record_Type == 'Stranding') %>% summarize(n_distinct(RecordID))/(2000-1976), 1)` individuals per year prior to 2000 and `r round(HP_data %>% filter(Year > 1999 & Record_Type == 'Stranding') %>% summarize(n_distinct(RecordID))/(2014-2000), 1)` per year after 2000 (`r figs("yr_type_source", display = 'cite')`). Quadrant (*i.e.*, stranding location) was not a significant predictor of increasing strandings, suggesting that strandings have increased uniformly across space. 

Seasonal patterns – Sightings are significantly higher in the summer (`r round(mean_seas[mean_seas$Season == 'Summer', 'mean'], 1)` per month; y = 7.7x, t = 3.6, p < 0.001) compared to `r round(mean_seas[mean_seas$Season == 'Spring', 'mean'], 1)` in spring and fall and `r round(mean_seas[mean_seas$Season == 'Winter', 'mean'], 1)` in winter (`r figs("records_seas_source", display = 'cite')`). Strandings are significantly higher in the spring (specifically May) (`r round(mean_seas_strands[mean_seas_strands$Season == 'Spring', 'mean'], 1)` per month; y = 7.7x, t = 3.6, p < 0.001) compared to the other seasons `r round(mean_seas_strands[mean_seas_strands$Season == 'Summer', 'mean'], 1)` in summer and fall and `r round(mean_seas_strands[mean_seas_strands$Season == 'Winter', 'mean'], 1)` in winter (`r figs("records_seas_source", display = 'cite')`). 

Spatial patterns – Sightings were distributed across `r quad_sight_counts %>% summarize(n_distinct(Quadrant))` quadrants. The total number of sightings was highest on the western side of San Juan Island (quadrant 181, 48.504N -123.134W, `r quad_sight_counts[quad_sight_counts$Quadrant == '181', 'cnt']` reports) and the northeastern tip of San Juan Island (quadrant 167, 48.624N, -123.076W, `r quad_sight_counts[quad_sight_counts$Quadrant == '167', 'cnt']` reports; and quadrant 192, 48.609N, -123.030W, `r quad_sight_counts[quad_sight_counts$Quadrant == '192', 'cnt']` reports) (`r figs("map_records_size", display = 'cite')`). Only `r round(quad_sight_counts %>% filter(cnt > 9) %>% summarize(n_distinct(Quadrant))/(quad_sight_counts %>% summarize(n_distinct(Quadrant))), 2)*100`% of quadrants had 10 or more sightings over the study period. Quadrants with the larger reported mean sighting group size do not overlap with quadrants with the largest number of records, as some quadrants may have had one very large sighting while others have numerous sightings of less than five individuals (`r figs("map_group_size", display = 'cite')`). 

Strandings were distributed across `r quad_strands_counts %>% summarize(n_distinct(Quadrant))` quadrants. The total number of strandings was highest in waters between San Juan and Lopez Islands (quadrant 187, 48.506N, -122.964W, `r quad_strands_counts[quad_strands_counts$Quadrant == '187', 'cnt']` reports; and quadrant 186, 48.448N, -122.942W, `r quad_strands_counts[quad_strands_counts$Quadrant == '186', 'cnt']` reports), and the western side of San Juan Island (quadrant 181, 48.504N -123.134W, `r quad_strands_counts[quad_strands_counts$Quadrant == '181', 'cnt']` reports). Only `r round(quad_strands_counts %>% filter(cnt > 5) %>% summarize(n_distinct(Quadrant))/(quad_strands_counts %>% summarize(n_distinct(Quadrant))), 2)*100`% of quadrants had more than five strandings over the study period.

The distribution of sightings and strandings is nearly identical from both trained observers and other public reporting sources, with primary clusters centered in the San Juan Islands and an additional cluster near Whidbey Island reported from trained observers (`r figs("map_source_zoom", display = 'cite')`). Sightings clusters were similar across seasons with the exception of winter, where there were fewer total records and an additional cluster around Whidbey Island (`r figs("map_source_zoom", display = 'cite')`), possibly due to the presence of a dedicated survey or other research project. Stranding records were most broadly distributed in the fall and most constricted in winter, though clusters all overlapped in San Juan Island waters (`r figs("map_source_zoom", display = 'cite')`). The spatial distribution of sightings and strandings did not significantly change over time, as suggested above.

*Discussion*  

Harbor porpoise sightings and strandings records in the Salish Sea from 1976 to 2014 were concentrated around the San Juan Islands, the majority of which were sightings. Annual sightings remained constant over time while strandings increased over the study period. The number of sightings was significantly higher in the summer while strandings were higher during spring. The distribution of sightings was consistent across reporting sources and remained constant over time and throughout the seasons of the year. 

Over the study period, the mean sighting group size was `r round(mean(HP_data[HP_data$Record_Type == 'Sighting', "Count"], na.rm = T), 1)` individuals. The mean and maximum group sizes in this study was higher than those estimated by others after correcting for detectability and survey effort, ranging from 1.8 to 2.5 individuals per group and a maximum group size of 40 (Calambokidis et al., 2004; Siebert et al., 2006; Koshure, 2012). The highest number of sightings records in our study occurred June through August, while others have found higher encounter rates or higher relative abundance August through October around Vancounter Island, though most studies consistently report lowest density and abundance during winter (Keple, 2002; Olesiuk et al., 2002). [Insert other seasonal results comparisons?]. 

Spatial modeling: 
- Buckland et al. 2004 (line transect + animal rel to env factors + detection probability)  
- Data could be used in more complex habitat spatial prediction modeling (Hedley et al., 1999; Hedley & Buckland, 2004; Redfern et al., 2006; Gomez de Segura et al., 2007)  
- Williams et al 2006 - can further use information to understand WHY animals are there (spatial modeling)

Williams et al 2011 - framework for variance estimator

On opportunistic data:  
- Success of opportunistic somewhat has to do with behavior of species (Jung suggest non-distinct/unremarkable HP behavior reason why only 9% sightings in opp = hp)

- Often matched up with survey-based; lots of examples - striped dolphins in mediterranean (Gomez de Segura et al 2007)

Importance/Future:  
- Existing data from line transect surveys around the world is patchy - majority of effort concentrated in relatively small areas - need to combine what we have to scale up to broader spatial scales (Kaschner et al., 2012).  
 - Could also be used to gather useful information when monitoring other impacts, so as to reduce/consolidate impacts to animals and research effort (Koshure, 2012).  
- Predictive spatial habitat use models used to project estimated impacts of anthropogenic activities.  


*References* 

Aragones, L.V., Jefferson, T.A., and Marsh, H. 1997. Marine mammal survey techniques applicable in developing countries. Asian Marine Biology 14: 15-39.

Borchers DL, Buckland ST, Zucchini W. 2002. Estimating Animal Abundance. Springer: London, UK

Brereton, T., Williams, A.D. & Williams, R. (2000) A low-cost method to determine bathypelagic and seasonal occupancy of fin whale *Balaenoptera physalus* in the Bay of Biscay, In: European Research on Cetaceans – 14 (Ed. by P.G.H. Evans), pp. 14–18. European Cetacean Society, Rome, Italy.

Buckland, S.T., Cattanach, K.L., & Anganuzzi, A.A. 1992. Estimating trends in abundance of dolphins associated with tuna in the eastern tropical Pacific Ocean, using sightings data on commercial tuna vessels. Fishery Bulletin 90: 1-12.

Buckland, S.T., Anderson, D.R., Burnham, K.P., Laake, J.L., Borchers, D.L., & Thomas, L. 2001. Introduction to distance sampling: estimating abundance of biological populations. Oxford, UK: Oxford University Press.

Calambokidis, J., J.L. Laake, & Osmek S.D. 1997. Aerial surveys for marine mammals in Washington and British Columbia inside waters. Final report to the National Marine Mammal Laboratory, Seattle, WA 98115. 

Calambokidis, J., Steiger, G.H., Ellifrit, D.K., Troutman, B., & Bowlby, C.E. 2004. Distribution and abundance of humpback whales and other marine mammals off the northern Washington coast. Fish. Bull. 102:563–580.

Cañadas A, Sagarminaga R, De Stephanis R, Urquiola E, Hammond PS. 2005. Habitat preference modelling as a conservation tool: proposals for marine protected areas for cetaceans in southern Spanish waters. Aquat Conserv 15: 495–521.

Carretta, J., Oleson, E., Baker, J., Weller, D., Lang, A., Forney, K.,... & Brownell, R. 2016. U.S. Pacific Marine Mammal Stock Assessments: 2015. NOAA-TM-NMFS-SWFSC-561, U.S.Dept of Commerce, doi:10.7289/V5/TM-SWFSC-561.

Dawson, S., Wade, P., Slooten, E. and Barlow, J. 2008. Design and field methods for sighting surveys of cetaceans in coastal and riverine habitats. Publications, Agencies and Staff of the U.S. Department of Commerce. Paper 255. http://digitalcommons.unl.edu/usdeptcommercepub/255.

Evans, P.G. and Hammond, P.S. 2004. Monitoring cetaceans in European waters. Mammal Review 34(1), 131 – 156.

Gomez de Segura, A.G, Hammond, P.S., Canadas, A., & Raga, J.A. 2007. Comparing cetacean abundance estimates derived from spatial models and design-based line transect methods. Marine Ecology Progress Series 329: 289-299.

Hall, A. 2004. Seasonal abundance, distribution and prey species of harbour porpoise (*Phocoena phocoena*) in southern Vancouver Island waters. Unpublished Master's thesis, University of British Columbia. Vancouver, British Columbia, Canada. 

Hedley, S.L., Buckland, S.T., & Borchers, D.L. 1999. Spatial modelling from line transect data: Journal of Cetacean Research and Management 1(3): 255-264.

Hedley, S.L. & Buckland, S.T. 2004. Spatial models for line transect sampling. Journal of Agricultural, Biological, and Environmental Statistics, 9(2): 181–199. DOI: 10.1198/1085711043578.

Jung, J.L., Stephan, E., Louis, M., Alfonsi, E., Liret, C., Carpentier, F.G., & Hassani, S. 2009. Harbour porpoises (*Phocoena phocoena*) in north-western France: aerial survey, opportunistic sightings and strandings monitoring. Journal of the Marine Biological Association of the United Kingdom, 89(5): 1045-1050.

Kaschner K, Quick NJ, Jewell R, Williams R, Harris CM. 2012. Global Coverage of Cetacean Line-Transect Surveys: Status Quo, Data Gaps and Future Challenges. PLoS ONE 7(9): e44075. DOI: 10.1371/journal.pone.0044075

Keple, A.R. 2002. Seasonal abundance and distribution of marine mammals in the Southern Strait of Georgia, British Columbia.  Unpublished Master's thesis, University of British Columbia. Vancouver, British Columbia, Canada.  

Koshure, N. 2012. Estimating cetacean abundanc and distribution around Vancouver Island, B.C., using data collected from an opportunistic platform. Unpublished Master's thesis, Simon Fraser University, Canada.

Leaper, R., Fairbairns, R., Gordon, J., Hiby, A., Lovell, P. and Papastavrou, V. (1997). Analysis of data collected from a whalewatching operation to assess relative abundance and distribution of the minke whale (*Balaenoptera acutorostrata*) around the Isle of Mull, Scotland. Reports of the International Whaling Commission 47: 505-511.

Lennert-Cody, C.E., Maunder, M.N., Fiedler, P.C., Minami, M., Gerrodette, T., Rusin, J.,... & Buckland, S.T. 2016. Purse-seine vessels as platforms for monitoring the population status of dolphin species in the eastern tropical Pacific Ocean. Fisheries Research 178: 101-113. 

Marques, F.F.C. (2001). Estimating Wildlife Distribution and Abundance from Line Transect Surveys Conducted from Platforms of Opportunity. Unpublished Ph.D. thesis, University of St. Andrews, Scotland.

<!-- Mikkelsen L, Rigét FF, Kyhn LA, Sveegaard S, Dietz R, Tougaard J, et al. (2016) Comparing Distribution of Harbour Porpoises (*Phocoena phocoena*) Derived from Satellite Telemetry and Passive Acoustic Monitoring. PLoS ONE 11(7): e0158788. doi:10.1371/journal.pone.0158788. -->

Olesiuk, P.F.,  L.M. Nichol,  M.J. Sowden, and J.K.B. Ford. 2002. Effect of the Sound Generated by an Acoustic Harassment Device on the Relative Abundance and Distribution of Harbor Porpoises  (*Phocoena  phocoena*)  in  Retreat  Passage,  British  Columbia. Marine Mammal Science 18:843-862. 

Peschko, V., Ronnenberg, K., Siebert, U., & Gilles, A. 2016. Trends of harbour porpoise (*Phocoena phocoena*) density in the southern North Sea. Ecological Indicators 60: 174–183.

Redfern et al. 2006. Techniques for cetacean– habitat modeling. Marine Ecology Progress Series 310: 271-295.

Siebert U., Gilles A., Lucke K., Ludwig M., Benke H., Kock K.H. and Scheidat M. (2006). A decade of harbour porpoise occurrence in German waters – analyses of aerial surveys, incidental sightings and strandings. Journal of Sea Research 56, 65–80.

Silva, et. al. 2003. Occurrence and distribution of cetaceans in the waters around the Azores (Portugal), Summer and Autumn 1999–2000. Aquatic Mammals, 29.1: 77–83.

Thomas, L., Williams, R., & Sandilands, D. 2007. Designing line transect surveys for complex survey regions. Journal of Cetacean Resource Management 9(1): 1-13.

Williams, R. 2003. Cetacean studies using platforms of opportunity. Unpublished Doctoral thesis, University of St. Andrews. St. Andrews, Scotland.

Williams, R., S. L. Hedley, and P. S. Hammond. 2006. Modeling distribution and abundance of Antarctic baleen whales using ships of opportunity. Ecology and Society 11 (1): 1.

Williams, R., Leaper, R., Zerbini, A.N., & Hammond, P.S. 2007. Methods for investigating measurement error in cetacean line-transect surveys. Journal of the Marine Biological Association of the United Kingdom, 87: 313–320.

Williams, R. & Thomas, L. 2007. Distribution and abundance of marine mammals in coastal waters of British Columbia, Canada. Journal of Cetacean Resource Management 9(1): 15-28.

Williams, R. & Thomas, L. 2009. Cost-effective abundance estimation of rare animals: Testing performance of small-boat surveys for killer whales in British Columbia. Biological Conservation 142(7): 1542-1547.

Williams, R., Hedley, S.L., Branch, T.A., Bravington, M.V., Zerbini, A.N. and Findlay, K.P. 2011. Chilean Blue Whales as a Case Study to Illustrate Methods to Estimate Abundance and Evaluate Conservation Status of Rare Species. Conservation Biology, 25: 526–535. doi:10.1111/j.1523-1739.2011.01656.x


*Figures*    

```{r}
yr_type_source
```

`r figs("yr_type_source", caption = "Number of sightings and strandings per year from trained observers (solid) and other public reporting sources (dashed).")`

```{r}
records_seas_source
```

`r figs("records_seas_source", caption = "Sum of sightings and strandings reported in each season by trained observers (black) and other public reporting sources (grey).")`


```{r}
records_mo_type
```

Do we like month better than season? Combined rather than broken into sightings vs strandings?


```{r}
records_seas_mean
```

Do we like mean boxplots rather than total bar chars?


```{r}
map_records_size
```

`r figs("map_records_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with a larger number of total sightings records.")`

```{r}
map_group_size
```

`r figs("map_group_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with larger mean sighting group size.")`

```{r}
map_source_zoom
```

`r figs("map_source_zoom", caption = "Kernel density clusters of strandings and sightings from trained observers and other reporting sources.")`


```{r}
map_seasons_zoom
```
`r figs("map_seasons_zoom", caption = "Kernel density clusters of sightings and strandings during each season of the year.")`

```{r archive figures, include = FALSE}

# ###all records jitter, facet - record type
# map_type <-
#   ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
#   geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), 
#               width = .1, height = .1, size = .7, alpha = .5, color = 'grey50') +
# scale_alpha(range = c(0, 0.3), guide = FALSE) +
#   xlab("") + ylab("") +
#   theme(
#     axis.line = element_blank(),
#     axis.ticks = element_blank(),
#     axis.text = element_blank(),
#     strip.text = element_text(size = 10),
#     strip.background = element_blank(),
#     panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
#  facet_wrap(~ Record_Type) 
# 
# #Record type - heatmap color
# map_type_zoom <- 
#   ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
#   geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), 
#               width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50')  +
#   stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Record_Type), 
#               bins = 4, size = .8) + scale_color_manual(values = color2) +
#   theme(legend.text = element_text(size = 11),
#     axis.title = element_blank(),
#     axis.ticks = element_blank(),
#     axis.text = element_blank(), 
#     axis.line = element_blank(),
#     legend.title = element_blank(),
#     legend.position = 'bottom')
#figs("map_type", caption = "Kernel density clusters of sightings and strandings over the study period.")

# #counts per year
# records_yr <- ggplot(data = numberperyear, aes(x = Year, y = cnt)) + 
#   geom_line() +
#   geom_smooth( method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
#   xlab("") + ylab("Annual Sightings") +
#   scale_y_continuous(labels = comma) +
#   scale_x_continuous(breaks = c(1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015)) +
#   fig_theme() 
# 
# #Line plot for sum of monthly counts
# records_month_tot <- ggplot(data = numberpermonth %>% group_by(Month) %>% summarize(cnt = sum(cnt)), 
#                        aes(x = factor(Month), y = cnt)) + 
#   geom_bar(stat = 'identity') +
#   xlab("") + ylab("Total Monthly Sightings") +
#   scale_y_continuous(labels = comma) +
#   fig_theme() 
# 
# #Bar for sum of seasonal counts
# records_seas_tot <- ggplot(data = numberperseason %>% group_by(Season) %>% summarize(cnt = sum(cnt)), 
#                        aes(x = factor(Season), y = cnt)) + 
#   geom_bar(stat = 'identity') +
#   xlab("") + ylab("Total Seasonal Sightings") +
#   scale_y_continuous(labels = comma) +
#   fig_theme() 
# 
  

```