-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHPSightings.Rmd
796 lines (619 loc) · 51.3 KB
/
HPSightings.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
---
title: ""
author: ""
output:
word_document:
reference_docx: mytemplate.docx
---
```{r include = FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, results = 'asis', fig.width = 5.5, fig.height = 5, citr.use_betterbiblatex = FALSE)
```
```{r load packages, include = FALSE, results = 'hide'}
library(devtools)
library(tidyr)
library(citr)
library(ggmap)
library(data.table)
library(ggfortify)
library(dplyr)
library(ggplot2)
library(cowplot)
library(scales)
library(captioner)
library(knitr)
library(reshape2)
library(stringr)
library(magrittr)
library(MASS)
library(stats) #kruskal.test
library(PMCMR) # posthoc.kruskal.nemenyi.test https://cran.r-project.org/web/packages/PMCMR/vignettes/PMCMR.pdf
library(pgirmess) #kruskalmc https://cran.r-project.org/web/packages/pgirmess/pgirmess.pdf
#setwd("~/Documents/R/HarborPorpoiseSightings")
# Define Plot Theme
fig_theme <- function(...) {
theme(
#text = element_text(size = 11),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black", size = 10),
axis.text.y = element_text(vjust = 0.5, color = "black", size = 10),
axis.title = element_text(size = 10),
axis.line.y = element_line(colour = "black"),
axis.line.x = element_line(colour = "black"),
plot.background = element_rect(),
panel.background = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
legend.key = element_blank(),
legend.text = element_text(size = 8),
...)
}
pubfigure_simple_theme <- function(...) {
theme(
axis.text.x = element_blank(),
axis.text.y = element_text(vjust = 0.5, color = "black", size = 10),
axis.title.x = element_blank(),
axis.line.y = element_line(colour = "black"),
axis.line.x = element_line(colour = "black"),
plot.background = element_rect(),
panel.background = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
legend.key = element_blank(),
legend.text = element_text(size = 8),
...)
}
color6 <- c("#999aa7", "#f6b61c", "#4aaaa5", "#3b98ee", "#3f1d58", "#302f49", "#e45f56")
color4 <- c("#a3d39c", "#e45f56", "#f6b61c", "#6ea3ec")
color1 <- c("#a3d39c", "#6ea3ec")
color2 <- c("#4aaaa5", "#e45f56")
case <- function(x)
paste0(toupper(substr(x, 1, 1)), tolower(substring(x, 2)))
figs <- captioner(prefix="Figure")
tbls <- captioner(prefix="Table")
# figs("name") <-- "Figure 1: Caption."
# figs("name",display="cite") <-- "Figure 1"
# figs("name",display="num") <-- "1"
```
```{r load and clean data, include = FALSE}
setwd("~/Documents/R/HarborPorpoiseSightings")
##Data cleaning - no longer want to run each time
# #load csv derived from manual input of lat/longs from grid in arcGIS
input_lat_long <- read.csv("QuadLatLong.csv", header = TRUE, na.strings = "", stringsAsFactors = FALSE)
#sightings/strandings data
sightings_data <- read.csv("HPData.csv", header = TRUE, na.strings = "", stringsAsFactors = FALSE) %>%
dplyr::select(-Count)
sightings_data$Count_clean <- gsub("\\~", "", sightings_data$Count_clean) #get symbols out of count column
sightings_data$Count_clean <- gsub("\\+", "", sightings_data$Count_clean)
sightings_data$Count <- as.numeric(sightings_data$Count_clean)
sightings_data$Count <- round(sightings_data$Count, 0)
#Existing lat/long by quadrant to merge back and fill with the empty ones where possible
mean_lat_long <- sightings_data %>%
filter(ActualLatitude > 0) %>%
group_by(Quadrant) %>%
summarize(mean_long = mean(ActualLongitude), mean_lat = mean(ActualLatitude))
sightings_data <- sightings_data %>%
dplyr::select(-Count_clean) %>%
merge(mean_lat_long, by = 'Quadrant', all = T) %>%
transform(Reliable_Source = ifelse(Source == 'Reliable', 'Trained_Observer', 'Public')) %>%
transform(Record_Type = ifelse(is.na(Stranded) | Stranded == 'Yes', 'Stranding', ifelse(Stranded == 'No', 'Sighting', 'Other'))) %>%
transform(Season = ifelse(Month == 12 | Month < 3, 'Winter',
ifelse(Month < 6 & Month > 2, 'Spring',
ifelse(Month < 9 & Month > 5, 'Summer', 'Fall')))) %>%
transform(Group_size = ifelse(Count < 5, '1-5',
ifelse(Count > 4 & Count < 10, '5-10',
ifelse(Count > 9 & Count < 25, '10-25',
ifelse(Count > 24 & Count < 50, '25-50',
ifelse(Count > 49 & Count < 100, '50-100', '100+')))))) %>%
merge(input_lat_long, by = 'Quadrant', all = T)
sightings_data$Group_size <- factor(sightings_data$Group_size, levels = c('1-5', '5-10', '10-25', '25-50', '50-100', '100+'))
sightings_data$Season = factor(sightings_data$Season, levels = c('Spring', 'Summer', 'Fall', 'Winter'))
#Use mean where lat/longs that are missing or 0.00
sightings_data$lat_applied <- ifelse(is.na(sightings_data$ActualLatitude) |
sightings_data$ActualLatitude < 1,
sightings_data$mean_lat, sightings_data$ActualLatitude)
sightings_data$long_applied <- ifelse(is.na(sightings_data$ActualLongitude) |
sightings_data$ActualLongitude < 1,
sightings_data$mean_long, sightings_data$ActualLongitude)
#use manual input where lat/longs are still missing
sightings_data$lat_all <- ifelse(is.na(sightings_data$lat_applied), sightings_data$Arc_Lat, sightings_data$lat_applied)
sightings_data$long_all <- ifelse(is.na(sightings_data$long_applied), sightings_data$Arc_Long, sightings_data$long_applied)
HP_data <- sightings_data %>%
dplyr::select(-c(ActualLatitude, ActualLongitude, mean_lat, mean_long, lat_applied, long_applied, Direction, Location2, Pod, LikelyPod, ReportingParty, Stranded))
#create column where there is only one unique lat/long per quadrant for maps
mean_lat_long_final <- HP_data %>%
#filter(lat_all > 0) %>%
group_by(Quadrant) %>%
summarize(mean_long = mean(long_all), mean_lat = mean(lat_all))
#merge back with main dataframe
HP_data <- HP_data %>% merge(mean_lat_long_final, by = 'Quadrant')
write.csv(HP_data, "HP_data.csv", row.names = F)
#HP_data <- read.csv("HP_data.csv", header = TRUE, na.strings = "", stringsAsFactors = FALSE)
```
```{r basic annual monthly seasonal figures, include = FALSE}
numberperyear <- HP_data %>%
group_by(Year, Record_Type) %>%
summarize(cnt = n_distinct(RecordID))
numberpermonth <- HP_data %>%
group_by(Year, Month, Record_Type) %>%
summarize(cnt = n_distinct(RecordID))
numberperseason <- HP_data %>%
group_by(Year, Season, Record_Type) %>%
summarize(cnt = n_distinct(RecordID))
# #Bar for average seasonal counts
records_seas_mean <- ggplot(data = numberperseason,
aes(x = factor(Season), y = cnt)) +
geom_boxplot() +
xlab("") + ylab("Mean Seasonal Records") +
scale_y_continuous(labels = comma) +
fig_theme()
```
```{r timeseries, include = FALSE}
#facet record type, linetype source
yr_type_source <- ggplot(data = HP_data %>% group_by(Year, Record_Type, Reliable_Source) %>% summarize(cnt = n_distinct(RecordID)), aes(x = Year, y = cnt, group = Reliable_Source, linetype = Reliable_Source)) +
geom_line(size = 0.6) +
#geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
xlab("") + ylab("Annual Records") +
scale_y_continuous(labels = comma) +
scale_x_continuous(breaks = c(1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015)) +
fig_theme(legend.position = 'bottom',
strip.background = element_blank(),
axis.title.x = element_blank(),
strip.text = element_text(size = 10),
legend.key.size = unit(0.9, "cm")) +
scale_linetype_manual(values = c("dashed", "solid", "dashed", "solid"), labels = c("Public", "Trained Observer")) +
facet_grid(~Record_Type)
figs("yr_type_source", caption = "Number of sightings and strandings per year according to reporting source.")
```
```{r seasonal, include = FALSE}
#seasons by type and source
records_seas_source <- ggplot(data = HP_data %>% group_by(Season, Record_Type, Reliable_Source) %>% summarize(cnt = n_distinct(RecordID)), aes(x = factor(Season), y = cnt,
group = Reliable_Source, fill = Reliable_Source)) +
geom_bar(stat = 'identity', position = 'dodge') +
#geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
xlab("") + ylab("Total Records") +
scale_y_continuous(labels = comma) +
fig_theme(legend.position = 'bottom',
strip.text = element_text(size = 10),
strip.background = element_blank(),
axis.title.x = element_blank()) +
scale_fill_manual(values = c("grey65", "grey20", "grey65", "grey20"), labels = c("Public", "Trained Observer")) +
facet_grid(~Record_Type)
figs("records_seas_source", caption = "Sum of sightings and strandings reported in each season by trained observers (black) and other public reporting sources (grey).")
#monthly by source
records_mo_source <- ggplot(data = HP_data %>% group_by(Month, Reliable_Source) %>% summarize(cnt = n_distinct(RecordID)), aes(x = factor(Month), y = cnt,
group = Reliable_Source, fill = Reliable_Source)) +
geom_bar(stat = 'identity', position = 'dodge') +
#geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
xlab("") + ylab("Monthly Sightings by Source") +
scale_y_continuous(labels = comma) +
fig_theme(legend.position = 'top') +
scale_fill_manual(values = color1)
#monthly by type
records_mo_type <- ggplot(data = HP_data %>% group_by(Month, Record_Type) %>% summarize(cnt = n_distinct(RecordID)), aes(x = factor(Month), y = cnt,
group = Record_Type, fill = Record_Type)) +
geom_bar(stat = 'identity', position = 'dodge') +
#geom_smooth(method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
xlab("") + ylab("Monthly Sightings by Type") +
scale_y_continuous(labels = comma) +
fig_theme(legend.position = 'top') +
scale_fill_manual(values = color2)
```
```{r mapping, include = FALSE}
#########MAPS#######
WAbox <- c(-124.8, 46.95, -121.7, 49.3)
pugetbox <- c(-124.1, 47.1, -122.1, 49.8)
###all records, overlapping jittered dots
# allrecords <- ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
# geom_jitter(data = HP_data, aes(x = long_all, y = lat_all), width = .1, height = .1, size = .7, alpha = .5, color = 'grey50') +
# scale_alpha(range = c(0, 0.3), guide = FALSE) +
# xlab("") + ylab("") +
# theme(
# axis.line = element_blank(),
# axis.ticks = element_blank(),
# axis.text = element_blank(),
# strip.text = element_text(size = 10),
# strip.background = element_blank(),
# panel.border = element_rect(colour = "black", fill = NA, size = 5))
###all records, summed by quadrant; color = more records
quadrant_counts <- HP_data %>%
filter(Record_Type == 'Sighting') %>%
group_by(Quadrant, mean_lat, mean_long) %>%
summarize(Records = n_distinct(RecordID))
#all quadtrants - sightings only
map_records_size <-
ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
geom_point(data = quadrant_counts, aes(x = mean_long, y = mean_lat, color = Records,
#size = Records,
alpha = .9)) +
scale_alpha(range = c(0.6, 0.9), guide = FALSE) +
xlab("") + ylab("") +
theme(legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
strip.text = element_text(size = 10),
strip.background = element_blank(),
legend.position = 'top',
panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
scale_color_continuous(trans = 'reverse') +
guides(color = guide_legend("Records"))
#size = guide_legend("Records"))
figs("map_records_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with a larger number of total sightings records.")
#puget sound zoom - sightings only, darker symbols = more records
map_quadsize_zoom <- ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
geom_point(data = quadrant_counts, aes(x = mean_long, y = mean_lat, color = Records, alpha = .9)) +
scale_alpha(range = c(0.6, 0.9), guide = FALSE) +
xlab("") + ylab("") +
theme(
legend.position = 'top',
legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
strip.text = element_text(size = 10),
strip.background = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
scale_color_continuous(trans = 'reverse') +
guides(color = guide_legend("Records"), size = guide_legend("Records"))
```
```{r maps by group sighting size, include = FALSE}
###all records; colors by group sighting size
map_group_size_cat <- ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
geom_jitter(data = HP_data %>% filter(Record_Type == 'Sighting' & !is.na(Group_size)),
aes(x = mean_long, y = mean_lat, col = factor(Group_size)), height = 0.1, width = 0.1, alpha = 0.5) +
scale_alpha(range = c(0, 0.3), guide = FALSE) +
xlab("") + ylab("") +
theme(legend.text = element_text(size = 10),
legend.title = element_text(size = 11),
legend.position = 'bottom',
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
strip.text = element_text(size = 10),
strip.background = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
scale_color_manual(values = color6, name = 'Group Size')
#symbol size and color across quadrant by mean group count size
quad_size <- HP_data %>%
filter(Record_Type == 'Sighting' & Count != 'NA') %>%
group_by(Quadrant, mean_lat, mean_long) %>%
summarize(count_mean = mean(Count))
map_group_size <- ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
geom_point(data = quad_size, aes(x = mean_long, y = mean_lat, color = count_mean, size = count_mean, alpha = .9)) +
scale_alpha(range = c(0.6, 0.9), guide = FALSE) +
xlab("") + ylab("") +
theme(
legend.position = 'top',
legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
strip.text = element_text(size = 10),
strip.background = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
scale_color_continuous(trans = 'reverse') +
guides(color = guide_legend("Group Size"), size = guide_legend("Group Size"))
figs("map_group_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with larger mean sighting group size.")
```
```{r maps by source, include = FALSE}
# #Source - facet
##nearly identical distribution over San Juans, extra reliable cluster over whidbey
map_source_zoom <-
ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
geom_jitter(data = HP_data, aes(x = long_all, y = lat_all),
width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50') +
stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Reliable_Source, linetype = Reliable_Source),
bins = 4, size = .5) +
theme(legend.text = element_text(size = 10),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.line = element_blank(),
legend.title = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 10),
legend.position = 'bottom',
legend.key.size = unit(0.7, "cm")) +
facet_grid(~Record_Type) +
scale_linetype_manual(values = c("solid", "dashed", "solid", "dashed"), labels = c("Public", "Trained Observer")) +
scale_color_manual(values = c("grey30", "grey25", "grey30", "grey25"), labels = c("Public", "Trained Observer"))
figs("map_source_zoom", caption = "Kernel density clusters of strandings and sightings from trained observers and other reporting sources.")
```
```{r seasonal heatmaps, include = FALSE}
#Seasonal hotspots -facet
map_seasons <-
ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
geom_jitter(data = HP_data, aes(x = long_all, y = lat_all),
width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50') +
stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Season),
bins = 4, size = .5) +
theme(legend.text = element_text(size = 10),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.line = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
strip.text = element_text(size = 10),
strip.background = element_blank()) +
scale_color_manual(values = color4) +
facet_wrap(~Record_Type)
map_seasons_zoom <-
ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
geom_jitter(data = HP_data, aes(x = long_all, y = lat_all),
width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50') +
stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Season),
bins = 4, size = .5) +
theme(legend.text = element_text(size = 10),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.line = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
strip.text = element_text(size = 10),
strip.background = element_blank()) +
scale_color_manual(values = color4) +
facet_wrap(~Record_Type)
figs("map_seasons_zoom", caption = "Kernel density clusters of sightings and strandings during each season of the year.")
```
```{r inline chunks, include = FALSE}
#Inline chunks
overall_counts <- HP_data %>%
group_by(Record_Type, Reliable_Source) %>%
summarize(cnt = n_distinct(RecordID)) %>%
dcast(Record_Type ~ Reliable_Source, value.var = 'cnt') %>%
dplyr::select(Record_Type, Trained_Observer, Public)
group_size_counts <- HP_data %>%
filter(Record_Type == 'Sighting') %>%
group_by(Group_size) %>%
summarize(Count = n_distinct(RecordID))
quad_sight_counts <- HP_data %>%
filter(Record_Type == 'Sighting') %>%
group_by(Quadrant) %>%
summarize(cnt = n_distinct(RecordID)) %>%
# group_by(Quadrant) %>%
# summarize(mean = mean(cnt)) %>%
# arrange(desc(mean))
filter(!is.na(cnt)) %>%
filter(cnt != 'NA' & Quadrant != 'NA')
quad_strands_counts <- HP_data %>%
filter(Record_Type == 'Stranding') %>%
group_by(Quadrant) %>%
summarize(cnt = n_distinct(RecordID)) %>%
# group_by(Quadrant) %>%
# summarize(mean = mean(cnt)) %>%
# arrange(desc(mean))
filter(!is.na(cnt)) %>%
filter(cnt != 'NA' & Quadrant != 'NA')
```
```{r temporal stats, include = FALSE}
#Temporal stats
#Per year - all
summary(glm.nb(cnt ~ Year, data = numberperyear %>% filter(Record_Type == 'Sighting'))) #not sig
summary(glm.nb(cnt ~ Year, data = numberperyear %>% filter(Record_Type == 'Stranding'))) #sig
#Per year - any quadrants increasing? Not significantly
agg_data <- HP_data %>%
filter(Record_Type == 'Stranding') %>%
group_by(Year, Quadrant) %>%
dplyr::summarize(cnt = n_distinct(RecordID)) %>%
transform(Quadrant = factor(Quadrant))
summary(glm.nb(cnt ~ Year + Quadrant, data = agg_data))
summary(glm(cnt ~ Year + Quadrant, family = poisson(link = log), data = agg_data))
summary(glm(cnt ~ Year + Quadrant, family = 'quasipoisson', data = agg_data))
#Group size?
agg_data <- HP_data %>%
filter(Record_Type == 'Sighting') %>%
group_by(Year, Count) %>%
dplyr::summarize(cnt = n_distinct(RecordID))
summary(glm.nb(cnt ~ Year + Count, data = agg_data))
summary(glm(cnt ~ Year + Count, family = poisson(link = log), data = agg_data)) #both sig
summary(glm(cnt ~ Year + Count, family = 'quasipoisson', data = agg_data)) #count sig, not year
summary(lm(cnt ~ Year + Count, data = agg_data)) #decreasing by 0.02 cases per year?
#Reliable source? No, both not increasing
agg_data <- HP_data %>%
filter(Record_Type == 'Sighting' & Reliable_Source != 'Trained_Observer') %>%
group_by(Year) %>%
dplyr::summarize(cnt = n_distinct(RecordID))
summary(glm.nb(cnt ~ Year, data = agg_data))
#Seasonality
#Sights
mean_seas <- HP_data %>%
filter(Record_Type == 'Sighting') %>%
group_by(Year, Season) %>%
summarize(cnt = n_distinct(RecordID)) %>%
group_by(Season) %>%
summarize(mean = mean(cnt))
agg_data <- HP_data %>%
filter(Record_Type == 'Sighting') %>%
group_by(Year, Season) %>%
dplyr::summarize(cnt = n_distinct(RecordID))
summary(glm.nb(cnt ~ Season, data = agg_data))
summary(lm(cnt ~ Season, contrasts = list(Season = 'contr.sum'), data = agg_data))
#Strands
mean_seas_strands <- HP_data %>%
filter(Record_Type == 'Stranding') %>%
group_by(Year, Season) %>%
summarize(cnt = n_distinct(RecordID)) %>%
group_by(Season) %>%
summarize(mean = mean(cnt))
agg_data <- HP_data %>%
filter(Record_Type == 'Stranding') %>%
group_by(Year, Season) %>%
dplyr::summarize(cnt = n_distinct(RecordID))
summary(glm.nb(cnt ~ Season, data = agg_data))
summary(lm(cnt ~ Season, contrasts = list(Season = 'contr.sum'), data = agg_data))
```
**Harbor Porpoise Abundance and Distribution from Opportunistic Sightings and Strandings in the Salish Sea, Washington**
*Abstract*
- Abundance and distribution are essential for conservation and management.
- Spatial modeling techniques can be used to derive abundance and distribution estimates at a lower cost using data from opportunistic sightings or "platforms of opportunity".
- This study aims to describe and characterize `r n_distinct(HP_data$RecordID)` harbor porpoise sightings and strandings records compiled from a range of sources from 1976 to 2014 in the Salish Sea, including the inland waters of northern Washington and western British Columbia.
- Over the study period, `r round(HP_data %>% filter(Record_Type == "Sighting") %>% summarize(n_distinct(RecordID))/(n_distinct(HP_data$RecordID)), 2)*100`% of the records were sightings records, and the mean group size was `r round(mean(HP_data[HP_data$Record_Type == 'Sighting', "Count"], na.rm = T), 1)` individuals.
- The number of sightings was highest in the summer and the number of strandings was highest in the spring.
- Both sightings and strandings were concentrated around the San Juan Islands, and the spatial distribution of reported sightings was similar between trained observers and other public reporting sources.
- The high degree of agreement/spatial overlap across years, seasons, reporting sources, and record type (stranding vs. sighting) is evidence that these data should be considered reliable and could be used in future spatial modeling to examine changes in abundance, distribution, or projected human impacts over time in a region where not a lot is known about the presence of this species.
*Introduction*
Understanding cetacean abundance and distribution is essential for conservation and management, but deriving accurate and timely estimates can be resource intensive. Population abundance and distribution can be estimated in a number of ways, balancing funding availability, data availability, and the life history characteristics of the species. One approach involves modeling population estimates using data gathered from what have been termed "platforms of opportunity", where animals are sighted and recorded from ferries, fishing vessels, the Coast Guard, and a variety of other possible sources (Aragones et al., 1997; Hedley et al., 1999; Marques, 2001; Williams, 2003). While these data are not as statistically robust as those from design-based approaches such as aerial or ship-based line transect surveys (due to non-randomization of survey track, range of expertise, etc.), it is a cost-effective way of obtaining information in situations where little to no data would exist otherwise (Leaper et al., 1997; Silva et al., 2003; Williams et al., 2006; Thomas et al., 2007; Dawson et al., 2008; Peschko et al., 2016).
Like all survey methods, using opportunistic sightings has advantages and disadvantages. Data from platforms of opportunity can capture seasonal trends in cetacean density and distribution more effectively than an annual ship-based or aerial survey (Seibert et al., 2006). Additionally, opportunistic sampling can be used in pilot studies to identify areas of high density cetacean habitat use that can then be prioritized in future targeted line transect surveys, mark-recapture studies, or habitat modeling (Koshure, 2012). Opportunistic sampling can also be a lower-cost way of training observers and fine-tuning distance sampling protocols, correction factors, and estimates of bias (Williams et al., 2007; Koshure, 2012). Disadvantages of the approach include the limitations of modeling applications when effort information is not systematically documented or data are not gathered from a range of habitat areas (Buckland et al., 2001; Evans & Hammond 2004; Cañadas et al., 2005; Kaschner et al, 2012; Williams et al 2011). Researchers have used this technique around the world and have developed best practice guidelines (Thomas et al., 2002; Evans & Hammond, 2004; Redfern et al., 2006).
Most commonly, cetacean abundance and distribution estimates are modeled using multiple data sources across multiple timeframes and the resulting estimates are compared or used to create an overall range. Harbor porpoise (*Phocoena phocoena*), being one of the most widely distributed cosmopolitan cetacean species, is a common subject of using and comparing both design-based and model-based approaches that include data from line transect surveys, opportunistic sightings, and strandings. Harbor porpoise presence and distribution in German waters was examined from the 1980s into the early 2000s using data from aerial surveys, incidental sightings, and strandings, with estimates matching across modeling approaches (Siebert et al., 2006). The abundance and distribution of humpback, fin, and minke whales was estimated using cruise ships as platforms of opportunity in the Southern Ocean over two austral summers in 2001 and 2002 (Williams et al., 2006), being a prime example of the utility of cost-effective approaches in remote areas. Off the coast of Chile, researchers studying blue whales developed a broadly-applicable framework to estimate variance and uncertainty in spatial modeling aimed at deriving abundance estimates (Williams et al., 2011). Off the coast of northwestern France, data from opportunistic sightings, aerial surveys, and strandings were used to ascertain that harbor porpoises had a renewed presence in the area (Jung et al., 2009). In the eastern tropical Pacific, abundance and distribution of offshore spotted dolphins has been estimated by applying generalized additive modeling of oceanographic variables and opportunistic sightings data from commercial fishing vessels (Buckland et al., 1992; Marques, 2001; Lennert-Cody et al., 2016).
In the Pacific Northwest, harbor porpoise are delineated into two coastal stocks and an inland Washington stock. Aerial and ship-based surveys in addition to anecdotal observations suggest that harbor porpoise presence in inland Washington waters may be growing after a period of unexplained decline throughout the 1980s and 1990s (Carretta et al., 2016). Seasonal presence in the region is also poorly understood, but it is presumed that the stock is not migratory. Opportunistic data have been used to estimate harbor porpoise abundance and examine seasonal distribution patterns around Vancouver Island and inland Washington waters (Keple, 2002; Hall, 2004; Koshure, 2012). Existing harbor porpoise abundance estimates from the late 1990s and 2000s range from 675 (Hall, 2004) to just over 1,200 individuals (Calambokidis et al., 1997) in various sub-regions of inland Washington and British Columbia waters. Estimates for all inland Washington and British Columbia waters combined range from almost 2,900 (Calambokidis et al. 1997) to just over 3,000 indiviuals (Carretta et al., 2016), with a total of just over 9,000 individuals coastwide in British Columbia (Williams & Thomas, 2007).
This study aims to characterize the spatial and temporal distribution of harbor porpoise sightings and strandings in inland Washington waters from 1976 to 2014 to examine trends over time, seasonality, and consistency between different sighting reporting sources. Similar to other uses of data compiled from platforms of opportunity, we anticipate that spatial modeling techniques could be applied to these data to vastly improve our understanding of harbor porpoise presence, habitat use, and likely impacts of anthropogenic activities in the region.
*Methods*
Study area - The study area includes inland waters spanning Puget Sound, the Salish Sea, and the Strait of Juan de Fuca in Washington State and British Columbia. The study area was subdivided into `r HP_data %>% summarize(n_distinct(Quadrant))` survey grid quadrants.
Data sources and characterization - Sightings records from 1976 to 2014 were compiled from [insert details]. Stranding records were compiled from [insert details].
Sighting and stranding records include the date, latitude, longitude, survey quadrant location, numerous reporting sources, and observation condition (dead or alive). Sighting records additionally contain an estimated number of individuals included in the sighting (*i.e*, group size). Group size was binned into the following categories: 1-4 individuals, 5-9, 10-24, 25-49, 50-99, 100+. Record source (*i.e.*, reporting entity) was grouped into "trained observers" (including X and Y) and "other public sources" (including X, Y, Z). All records were seasonally categorized as occurring in spring (Mar-May), summer (Jun-Aug), fall (Sep-Nov), and winter (Dec-Feb). Missing latitude and longitude data in a given quadrant were replaced with the mean values of records in that quadrant.
Statistical analyses - To examine temporal changes in the number of sightings and strandings over the study period, negative binomial generalized linear models (GLM) were examined with year as the predictor and the number of records as the response. Quadrant and reporting source were also examined as possible predictors to examine data consistency over time (whether records were increasing or decreasing in one area or another, or from one reporting source or another).
Kernel density maps were created using the statdensity function in R and depict sighting and stranding clusters by season and reporting source. Maps depicting the total number of records and mean sighting group size per quadrant were created using the ggplot2 function in R.
*Results*
From 1976 to 2014, there were a total of `r HP_data %>% filter(Record_Type == "Stranding") %>% summarize(n_distinct(RecordID))` harbor porpoise strandings and `r HP_data %>% filter(Record_Type == "Sighting") %>% summarize(n_distinct(RecordID))` sightings for a combined total of `r n_distinct(HP_data$RecordID)` records reported from `r HP_data %>% summarize(n_distinct(Quadrant))` unique survey quadrants. Trained observers or stranding responders accounted for `r round(overall_counts[overall_counts$Record_Type == 'Stranding', 'Trained_Observer']/(HP_data %>% filter(Record_Type == "Stranding") %>% summarize(n_distinct(RecordID))), 2)*100`% of stranding records, and likewise accounted for `r round(overall_counts[overall_counts$Record_Type == 'Sighting', 'Trained_Observer']/(HP_data %>% filter(Record_Type == "Sighting") %>% summarize(n_distinct(RecordID))), 2)*100`% of sightings records, with the remainder being reported by the public. Over the study period, the mean group size of sightings was `r round(mean(HP_data[HP_data$Record_Type == 'Sighting', "Count"], na.rm = T), 1)` individuals. Smaller group sizes were more common, with `r HP_data %>% filter(Record_Type == 'Sighting' & Count < 5) %>% summarize(n_distinct(RecordID))` records containing less than five individuals and `r HP_data %>% filter(Record_Type == 'Sighting' & Count > 49) %>% summarize(n_distinct(RecordID))` records containing more than 50 individuals (Table). Mean group size of sightings made by trained observers was `r round(HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source == 'Trained_Observer') %>% summarize(mean(Count, na.rm = T)), 1)` individuals, compared to `r round(HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source != 'Trained_Observer') %>% summarize(mean(Count, na.rm = T)), 1)` group size of sightings reported from the public. The maximum group count size of records made by trained observers was `r HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source == 'Trained_Observer') %>% summarize(max(Count, na.rm = T))` individuals, compared to `r HP_data %>% filter(Record_Type == 'Sighting' & Reliable_Source != 'Trained_Observer') %>% summarize(max(Count, na.rm = T))`
Temporal patterns - The number of sightings has not significantly increased over time (p > 0.05; `r figs("yr_type_source", display = 'cite')`), though group size was a significant predictor of reported sightings (y = -0.02x, t = -2.17, p < 0.05), suggesting a slight decrease in the number of individuals per sighting over time.
Annual strandings increased significantly over time (y = 1.084^x^, z = 4.6, p < 0.001), ranging from `r round(HP_data %>% filter(Year < 2000 & Record_Type == 'Stranding') %>% summarize(n_distinct(RecordID))/(2000-1976), 1)` individuals per year prior to 2000 and `r round(HP_data %>% filter(Year > 1999 & Record_Type == 'Stranding') %>% summarize(n_distinct(RecordID))/(2014-2000), 1)` per year after 2000 (`r figs("yr_type_source", display = 'cite')`). Quadrant (*i.e.*, stranding location) was not a significant predictor of increasing strandings, suggesting that strandings have increased uniformly across space.
Seasonal patterns – Sightings are significantly higher in the summer (`r round(mean_seas[mean_seas$Season == 'Summer', 'mean'], 1)` per month; y = 7.7x, t = 3.6, p < 0.001) compared to `r round(mean_seas[mean_seas$Season == 'Spring', 'mean'], 1)` in spring and fall and `r round(mean_seas[mean_seas$Season == 'Winter', 'mean'], 1)` in winter (`r figs("records_seas_source", display = 'cite')`). Strandings are significantly higher in the spring (specifically May) (`r round(mean_seas_strands[mean_seas_strands$Season == 'Spring', 'mean'], 1)` per month; y = 7.7x, t = 3.6, p < 0.001) compared to the other seasons `r round(mean_seas_strands[mean_seas_strands$Season == 'Summer', 'mean'], 1)` in summer and fall and `r round(mean_seas_strands[mean_seas_strands$Season == 'Winter', 'mean'], 1)` in winter (`r figs("records_seas_source", display = 'cite')`).
Spatial patterns – Sightings were distributed across `r quad_sight_counts %>% summarize(n_distinct(Quadrant))` quadrants. The total number of sightings was highest on the western side of San Juan Island (quadrant 181, 48.504N -123.134W, `r quad_sight_counts[quad_sight_counts$Quadrant == '181', 'cnt']` reports) and the northeastern tip of San Juan Island (quadrant 167, 48.624N, -123.076W, `r quad_sight_counts[quad_sight_counts$Quadrant == '167', 'cnt']` reports; and quadrant 192, 48.609N, -123.030W, `r quad_sight_counts[quad_sight_counts$Quadrant == '192', 'cnt']` reports) (`r figs("map_records_size", display = 'cite')`). Only `r round(quad_sight_counts %>% filter(cnt > 9) %>% summarize(n_distinct(Quadrant))/(quad_sight_counts %>% summarize(n_distinct(Quadrant))), 2)*100`% of quadrants had 10 or more sightings over the study period. Quadrants with the larger reported mean sighting group size do not overlap with quadrants with the largest number of records, as some quadrants may have had one very large sighting while others have numerous sightings of less than five individuals (`r figs("map_group_size", display = 'cite')`).
Strandings were distributed across `r quad_strands_counts %>% summarize(n_distinct(Quadrant))` quadrants. The total number of strandings was highest in waters between San Juan and Lopez Islands (quadrant 187, 48.506N, -122.964W, `r quad_strands_counts[quad_strands_counts$Quadrant == '187', 'cnt']` reports; and quadrant 186, 48.448N, -122.942W, `r quad_strands_counts[quad_strands_counts$Quadrant == '186', 'cnt']` reports), and the western side of San Juan Island (quadrant 181, 48.504N -123.134W, `r quad_strands_counts[quad_strands_counts$Quadrant == '181', 'cnt']` reports). Only `r round(quad_strands_counts %>% filter(cnt > 5) %>% summarize(n_distinct(Quadrant))/(quad_strands_counts %>% summarize(n_distinct(Quadrant))), 2)*100`% of quadrants had more than five strandings over the study period.
The distribution of sightings and strandings is nearly identical from both trained observers and other public reporting sources, with primary clusters centered in the San Juan Islands and an additional cluster near Whidbey Island reported from trained observers (`r figs("map_source_zoom", display = 'cite')`). Sightings clusters were similar across seasons with the exception of winter, where there were fewer total records and an additional cluster around Whidbey Island (`r figs("map_source_zoom", display = 'cite')`), possibly due to the presence of a dedicated survey or other research project. Stranding records were most broadly distributed in the fall and most constricted in winter, though clusters all overlapped in San Juan Island waters (`r figs("map_source_zoom", display = 'cite')`). The spatial distribution of sightings and strandings did not significantly change over time, as suggested above.
*Discussion*
Harbor porpoise sightings and strandings records in the Salish Sea from 1976 to 2014 were concentrated around the San Juan Islands, the majority of which were sightings. Annual sightings remained constant over time while strandings increased over the study period. The number of sightings was significantly higher in the summer while strandings were higher during spring. The distribution of sightings was consistent across reporting sources and remained constant over time and throughout the seasons of the year.
Over the study period, the mean sighting group size was `r round(mean(HP_data[HP_data$Record_Type == 'Sighting', "Count"], na.rm = T), 1)` individuals. The mean and maximum group sizes in this study was higher than those estimated by others after correcting for detectability and survey effort, ranging from 1.8 to 2.5 individuals per group and a maximum group size of 40 (Calambokidis et al., 2004; Siebert et al., 2006; Koshure, 2012). The highest number of sightings records in our study occurred June through August, while others have found higher encounter rates or higher relative abundance August through October around Vancounter Island, though most studies consistently report lowest density and abundance during winter (Keple, 2002; Olesiuk et al., 2002). [Insert other seasonal results comparisons?].
Spatial modeling:
- Buckland et al. 2004 (line transect + animal rel to env factors + detection probability)
- Data could be used in more complex habitat spatial prediction modeling (Hedley et al., 1999; Hedley & Buckland, 2004; Redfern et al., 2006; Gomez de Segura et al., 2007)
- Williams et al 2006 - can further use information to understand WHY animals are there (spatial modeling)
Williams et al 2011 - framework for variance estimator
On opportunistic data:
- Success of opportunistic somewhat has to do with behavior of species (Jung suggest non-distinct/unremarkable HP behavior reason why only 9% sightings in opp = hp)
- Often matched up with survey-based; lots of examples - striped dolphins in mediterranean (Gomez de Segura et al 2007)
Importance/Future:
- Existing data from line transect surveys around the world is patchy - majority of effort concentrated in relatively small areas - need to combine what we have to scale up to broader spatial scales (Kaschner et al., 2012).
- Could also be used to gather useful information when monitoring other impacts, so as to reduce/consolidate impacts to animals and research effort (Koshure, 2012).
- Predictive spatial habitat use models used to project estimated impacts of anthropogenic activities.
*References*
Aragones, L.V., Jefferson, T.A., and Marsh, H. 1997. Marine mammal survey techniques applicable in developing countries. Asian Marine Biology 14: 15-39.
Borchers DL, Buckland ST, Zucchini W. 2002. Estimating Animal Abundance. Springer: London, UK
Brereton, T., Williams, A.D. & Williams, R. (2000) A low-cost method to determine bathypelagic and seasonal occupancy of fin whale *Balaenoptera physalus* in the Bay of Biscay, In: European Research on Cetaceans – 14 (Ed. by P.G.H. Evans), pp. 14–18. European Cetacean Society, Rome, Italy.
Buckland, S.T., Cattanach, K.L., & Anganuzzi, A.A. 1992. Estimating trends in abundance of dolphins associated with tuna in the eastern tropical Pacific Ocean, using sightings data on commercial tuna vessels. Fishery Bulletin 90: 1-12.
Buckland, S.T., Anderson, D.R., Burnham, K.P., Laake, J.L., Borchers, D.L., & Thomas, L. 2001. Introduction to distance sampling: estimating abundance of biological populations. Oxford, UK: Oxford University Press.
Calambokidis, J., J.L. Laake, & Osmek S.D. 1997. Aerial surveys for marine mammals in Washington and British Columbia inside waters. Final report to the National Marine Mammal Laboratory, Seattle, WA 98115.
Calambokidis, J., Steiger, G.H., Ellifrit, D.K., Troutman, B., & Bowlby, C.E. 2004. Distribution and abundance of humpback whales and other marine mammals off the northern Washington coast. Fish. Bull. 102:563–580.
Cañadas A, Sagarminaga R, De Stephanis R, Urquiola E, Hammond PS. 2005. Habitat preference modelling as a conservation tool: proposals for marine protected areas for cetaceans in southern Spanish waters. Aquat Conserv 15: 495–521.
Carretta, J., Oleson, E., Baker, J., Weller, D., Lang, A., Forney, K.,... & Brownell, R. 2016. U.S. Pacific Marine Mammal Stock Assessments: 2015. NOAA-TM-NMFS-SWFSC-561, U.S.Dept of Commerce, doi:10.7289/V5/TM-SWFSC-561.
Dawson, S., Wade, P., Slooten, E. and Barlow, J. 2008. Design and field methods for sighting surveys of cetaceans in coastal and riverine habitats. Publications, Agencies and Staff of the U.S. Department of Commerce. Paper 255. http://digitalcommons.unl.edu/usdeptcommercepub/255.
Evans, P.G. and Hammond, P.S. 2004. Monitoring cetaceans in European waters. Mammal Review 34(1), 131 – 156.
Gomez de Segura, A.G, Hammond, P.S., Canadas, A., & Raga, J.A. 2007. Comparing cetacean abundance estimates derived from spatial models and design-based line transect methods. Marine Ecology Progress Series 329: 289-299.
Hall, A. 2004. Seasonal abundance, distribution and prey species of harbour porpoise (*Phocoena phocoena*) in southern Vancouver Island waters. Unpublished Master's thesis, University of British Columbia. Vancouver, British Columbia, Canada.
Hedley, S.L., Buckland, S.T., & Borchers, D.L. 1999. Spatial modelling from line transect data: Journal of Cetacean Research and Management 1(3): 255-264.
Hedley, S.L. & Buckland, S.T. 2004. Spatial models for line transect sampling. Journal of Agricultural, Biological, and Environmental Statistics, 9(2): 181–199. DOI: 10.1198/1085711043578.
Jung, J.L., Stephan, E., Louis, M., Alfonsi, E., Liret, C., Carpentier, F.G., & Hassani, S. 2009. Harbour porpoises (*Phocoena phocoena*) in north-western France: aerial survey, opportunistic sightings and strandings monitoring. Journal of the Marine Biological Association of the United Kingdom, 89(5): 1045-1050.
Kaschner K, Quick NJ, Jewell R, Williams R, Harris CM. 2012. Global Coverage of Cetacean Line-Transect Surveys: Status Quo, Data Gaps and Future Challenges. PLoS ONE 7(9): e44075. DOI: 10.1371/journal.pone.0044075
Keple, A.R. 2002. Seasonal abundance and distribution of marine mammals in the Southern Strait of Georgia, British Columbia. Unpublished Master's thesis, University of British Columbia. Vancouver, British Columbia, Canada.
Koshure, N. 2012. Estimating cetacean abundanc and distribution around Vancouver Island, B.C., using data collected from an opportunistic platform. Unpublished Master's thesis, Simon Fraser University, Canada.
Leaper, R., Fairbairns, R., Gordon, J., Hiby, A., Lovell, P. and Papastavrou, V. (1997). Analysis of data collected from a whalewatching operation to assess relative abundance and distribution of the minke whale (*Balaenoptera acutorostrata*) around the Isle of Mull, Scotland. Reports of the International Whaling Commission 47: 505-511.
Lennert-Cody, C.E., Maunder, M.N., Fiedler, P.C., Minami, M., Gerrodette, T., Rusin, J.,... & Buckland, S.T. 2016. Purse-seine vessels as platforms for monitoring the population status of dolphin species in the eastern tropical Pacific Ocean. Fisheries Research 178: 101-113.
Marques, F.F.C. (2001). Estimating Wildlife Distribution and Abundance from Line Transect Surveys Conducted from Platforms of Opportunity. Unpublished Ph.D. thesis, University of St. Andrews, Scotland.
<!-- Mikkelsen L, Rigét FF, Kyhn LA, Sveegaard S, Dietz R, Tougaard J, et al. (2016) Comparing Distribution of Harbour Porpoises (*Phocoena phocoena*) Derived from Satellite Telemetry and Passive Acoustic Monitoring. PLoS ONE 11(7): e0158788. doi:10.1371/journal.pone.0158788. -->
Olesiuk, P.F., L.M. Nichol, M.J. Sowden, and J.K.B. Ford. 2002. Effect of the Sound Generated by an Acoustic Harassment Device on the Relative Abundance and Distribution of Harbor Porpoises (*Phocoena phocoena*) in Retreat Passage, British Columbia. Marine Mammal Science 18:843-862.
Peschko, V., Ronnenberg, K., Siebert, U., & Gilles, A. 2016. Trends of harbour porpoise (*Phocoena phocoena*) density in the southern North Sea. Ecological Indicators 60: 174–183.
Redfern et al. 2006. Techniques for cetacean– habitat modeling. Marine Ecology Progress Series 310: 271-295.
Siebert U., Gilles A., Lucke K., Ludwig M., Benke H., Kock K.H. and Scheidat M. (2006). A decade of harbour porpoise occurrence in German waters – analyses of aerial surveys, incidental sightings and strandings. Journal of Sea Research 56, 65–80.
Silva, et. al. 2003. Occurrence and distribution of cetaceans in the waters around the Azores (Portugal), Summer and Autumn 1999–2000. Aquatic Mammals, 29.1: 77–83.
Thomas, L., Williams, R., & Sandilands, D. 2007. Designing line transect surveys for complex survey regions. Journal of Cetacean Resource Management 9(1): 1-13.
Williams, R. 2003. Cetacean studies using platforms of opportunity. Unpublished Doctoral thesis, University of St. Andrews. St. Andrews, Scotland.
Williams, R., S. L. Hedley, and P. S. Hammond. 2006. Modeling distribution and abundance of Antarctic baleen whales using ships of opportunity. Ecology and Society 11 (1): 1.
Williams, R., Leaper, R., Zerbini, A.N., & Hammond, P.S. 2007. Methods for investigating measurement error in cetacean line-transect surveys. Journal of the Marine Biological Association of the United Kingdom, 87: 313–320.
Williams, R. & Thomas, L. 2007. Distribution and abundance of marine mammals in coastal waters of British Columbia, Canada. Journal of Cetacean Resource Management 9(1): 15-28.
Williams, R. & Thomas, L. 2009. Cost-effective abundance estimation of rare animals: Testing performance of small-boat surveys for killer whales in British Columbia. Biological Conservation 142(7): 1542-1547.
Williams, R., Hedley, S.L., Branch, T.A., Bravington, M.V., Zerbini, A.N. and Findlay, K.P. 2011. Chilean Blue Whales as a Case Study to Illustrate Methods to Estimate Abundance and Evaluate Conservation Status of Rare Species. Conservation Biology, 25: 526–535. doi:10.1111/j.1523-1739.2011.01656.x
*Figures*
```{r}
yr_type_source
```
`r figs("yr_type_source", caption = "Number of sightings and strandings per year from trained observers (solid) and other public reporting sources (dashed).")`
```{r}
records_seas_source
```
`r figs("records_seas_source", caption = "Sum of sightings and strandings reported in each season by trained observers (black) and other public reporting sources (grey).")`
```{r}
records_mo_type
```
Do we like month better than season? Combined rather than broken into sightings vs strandings?
```{r}
records_seas_mean
```
Do we like mean boxplots rather than total bar chars?
```{r}
map_records_size
```
`r figs("map_records_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with a larger number of total sightings records.")`
```{r}
map_group_size
```
`r figs("map_group_size", caption = "Symbols representing quadrant locations where sightings have occurred over the study period, with darker colors representing quadrants with larger mean sighting group size.")`
```{r}
map_source_zoom
```
`r figs("map_source_zoom", caption = "Kernel density clusters of strandings and sightings from trained observers and other reporting sources.")`
```{r}
map_seasons_zoom
```
`r figs("map_seasons_zoom", caption = "Kernel density clusters of sightings and strandings during each season of the year.")`
```{r archive figures, include = FALSE}
# ###all records jitter, facet - record type
# map_type <-
# ggmap(get_map(location = WAbox, source = "google", maptype = "terrain")) +
# geom_jitter(data = HP_data, aes(x = long_all, y = lat_all),
# width = .1, height = .1, size = .7, alpha = .5, color = 'grey50') +
# scale_alpha(range = c(0, 0.3), guide = FALSE) +
# xlab("") + ylab("") +
# theme(
# axis.line = element_blank(),
# axis.ticks = element_blank(),
# axis.text = element_blank(),
# strip.text = element_text(size = 10),
# strip.background = element_blank(),
# panel.border = element_rect(colour = "black", fill = NA, size = 5)) +
# facet_wrap(~ Record_Type)
#
# #Record type - heatmap color
# map_type_zoom <-
# ggmap(get_map(location = pugetbox, source = "google", maptype = "terrain")) +
# geom_jitter(data = HP_data, aes(x = long_all, y = lat_all),
# width = .1, height = .1, size = 0.3, alpha = .3, col = 'grey50') +
# stat_density2d(data = HP_data, aes(x = long_all, y = lat_all, color = Record_Type),
# bins = 4, size = .8) + scale_color_manual(values = color2) +
# theme(legend.text = element_text(size = 11),
# axis.title = element_blank(),
# axis.ticks = element_blank(),
# axis.text = element_blank(),
# axis.line = element_blank(),
# legend.title = element_blank(),
# legend.position = 'bottom')
#figs("map_type", caption = "Kernel density clusters of sightings and strandings over the study period.")
# #counts per year
# records_yr <- ggplot(data = numberperyear, aes(x = Year, y = cnt)) +
# geom_line() +
# geom_smooth( method = "loess", se = FALSE, col = 'black', size = 0.8, linetype = 3) +
# xlab("") + ylab("Annual Sightings") +
# scale_y_continuous(labels = comma) +
# scale_x_continuous(breaks = c(1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015)) +
# fig_theme()
#
# #Line plot for sum of monthly counts
# records_month_tot <- ggplot(data = numberpermonth %>% group_by(Month) %>% summarize(cnt = sum(cnt)),
# aes(x = factor(Month), y = cnt)) +
# geom_bar(stat = 'identity') +
# xlab("") + ylab("Total Monthly Sightings") +
# scale_y_continuous(labels = comma) +
# fig_theme()
#
# #Bar for sum of seasonal counts
# records_seas_tot <- ggplot(data = numberperseason %>% group_by(Season) %>% summarize(cnt = sum(cnt)),
# aes(x = factor(Season), y = cnt)) +
# geom_bar(stat = 'identity') +
# xlab("") + ylab("Total Seasonal Sightings") +
# scale_y_continuous(labels = comma) +
# fig_theme()
#
```