-
Notifications
You must be signed in to change notification settings - Fork 1
/
comment_sort.Rmd
156 lines (122 loc) · 9.46 KB
/
comment_sort.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
---
title: "Sorting and Analyzing OCNMS Public Scoping Comments"
author: "Amelia Ritger"
date: "2023-03-30"
output: html_document
---
# Load packages
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(here) #for folder structure
```
#################
HOW-TO:
1. Want to create a subcategory? Put the keywords in quotation marks, separated by commas, and within c().
2. Want to create a main category that is a stand-alone with no subcategories? Do step #1.
3a. Want to create a main category that has multiple subcategories? Put the subcategory names separated by commas within vctrs::vec_c().
3b. Does that main category also have keywords? Do #3a and include each keyword within quotation marks, separated by commas, within vec_c().
Need to see it in action? Check out how Amelia did it below under "Generate categories and input keywords".
#############
# Generate categories and input keywords
```{r}
#Partnerships and Coordinated Management
treaty_trust_responsibility <- c("indigenous", "trib", "makah", "hoh", "quinault", "quileute", "native", "indian", "treaty", "sovereign", "subsist", "harvest")
collaborative_coordinated <- c("indigenous", "trib", "local", "state", "federal", "international", "canad", "collaborat", "coordinat", "ipc", "govern", "council", "department", "agenc", "navy", "coast guard", "partner", "manage", "jurisdiction", "fisher", "wildlife refuge", "county")
other_partner <- c("SAC", "advisory council", "MRC", "resource committee", "cooperative", "MOU", "MOA", "Feiro")
partnerships_coordinated_management <- vctrs::vec_c(treaty_trust_responsibility, collaborative_coordinated, other_partner, "relationship")
#Changing Ocean Conditions
changing_ocean <- c("climate", "adapt", "long-term", "hypoxi", "acid", "MHW", "OA", "HAB", "bloom", "harm", "heat", "stress", "warm", "mitigat", "quality", "sequest", "carbon", "storm")
#Resource Monitoring and Assessments
marine_research <- c("intertidal", "tide", "kelp", "seafloor", "sea floor", "acoustic", "sound", "research", "nois", "species", "habitat", "ecol", "scien", "biol", "map", "buoy", "mooring", "atlas")
social_science <- c("health", "wellbeing", "econom", "demograph", "resilienc", "household", "justice", "social science", "behav")
cultural_maritime <- c("maritime", "cultur", "heritage", "archaeolog", "wreck", "ornamental", "sense of place", "regalia", "history", "ss pacific", "salvage")
resource_monitoring_assessment <- vctrs::vec_c(marine_research, social_science, cultural_maritime, "data", "analy", "The cloud", "archive", "monitor", "study", "studies")
#Compatible Resource Use
fishing <- c("fish", "crab", "clam", "harvest")
non_fishing_rec <- c("boat", "motor", "MPWC", "recreat", "touris", "econom", "public access")
offshore_wind <- c("wind", "turbine", "float", "energy", "renewable")
other_emergent <- c("energy", "renewable", "optic", "cable", "carbon capture", "desalination", "land use", "development", "deep-sea", "drone", "mariculture")
compatible_resource_use <- vctrs::vec_c(fishing, non_fishing_rec, offshore_wind, other_emergent, "habitat", "seafloor", "disturb", "oil", "gas", "trampl", "sink", "ground", "compatib", "military", "navy", "coast guard")
#Wildlife Disturbance
wildlife_disturbance <- c("disturb", "flush", "strike", "tangle", "motor", "vessel", "military", "navy", "coast guard", "flight", "plane", "drone", "shipping", "acoustic", "sound", "nois nest", "fireworks", "wildlife", "haul out", "trampl")
#Water Quality
water_quality <- c("spill", "area to be avoided", "ATBA", "marine debris", "trash", "plastic", "garbage", "HAB", "vessel", "exhaust", "gas", "black water", "gray water", "contamin", "waste", "industrial", "runoff", "treatment", "agricultur", "pollut", "sewage", "discharge", "bloom", "water quality", "chemistry")
#Introduced Species
introduced_sp <- c("invasiv", "introduc", "not native", "green crab", "non-indigenous", "not indigenous", "non-native")
#Education and Outreach
education_program <- c("student", "teach", "school", "K-12", "K12", "train", "TPD", "universit", "college", "BWET", "B-WET")
visitor_interpret <- c("visit", "tourism", "interpret", "display", "kiosk", "sign", "park", "Cape Flattery")
education_outreach <- vctrs::vec_c(education_program, visitor_interpret, "outreach", "volunteer", "media", "citizen science", "speaker", "media", "communit", "extension", "engagement", "public", "steward", "website")
#Regulatory/Rulemaking suggestions
sanctuary_boundary <- c("boundar", "designat")
regulatory_rulemaking <- vctrs::vec_c(sanctuary_boundary, "regulat", "rule", "overflight", "military", "navy", "coast guard", "discharge", "cruise", "growler")
#OCNMS Operations and Administration
operations_admin <- c("staff", "capacity", "infrastructur", "admin", "budget", "facilit", "money", "fund", "office")
#NOAA’s implementation of OCNMS regulations and issuance of permits
noaa_implementation <- c("enforc", "violat", "complian", "voluntary", "police", "rule", "permit", "consult")
```
# Download the data and tidy it up
## Merge the various comment files
```{r, message = FALSE}
#Create a list of all .csv files we have received
file_names <- list.files(here("data"), pattern = ".csv$", recursive = FALSE, full.names = TRUE) #create a list of all .csv files
#Take that list and merge all .csv files into one
all_merged <- read_csv(file_names, col_names = FALSE) %>%
setNames(c("first_name", "last_name", "affiliation", "comment", "additional_notes", "location", "date_received", "notetaker_initials", "document_id", "date_posted", "email", "attachment")) #change column names
#Clean up the dataframe you have just created
all_merged_clean <- all_merged %>%
filter(!is.na(comment), #remove any rows without a comment
is.na(date_received) | !str_detect(date_received, 'received'), #remove headers
is.na(first_name) | !str_detect(first_name, 'Leave blank if')) #remove headers
#Delete file_names from local environment because you don't need it anymore
rm(file_names)
```
## Create a dataframe with all comments
```{r}
comm <- all_merged_clean %>%
unite("name", "first_name", "last_name", sep=" ", remove=TRUE, na.rm = FALSE) %>% #combine first and last name columns
mutate(name = ifelse(str_detect(name, "NA NA"), "Anonymous", name), #replace no name cells with "Anonymous"
name = str_replace(name, " NA", "")) %>% #remove NA if last name not provided
relocate(c(additional_notes, comment), .after=attachment) #reorder these columns so the for loop below is happy (comments must be at the very end of the dataframe)
```
# Categorize comments
## Run the keyword inputs for each category
** Make sure you RESTART R and CLEAR OUTPUT before running the following code each time you make changes to the code or re-run the code, as it uses data from your environment! **
```{r}
#Create a list of all the vectors (including their contents)
vec_list <- Filter(function(x) is(x, "character"), mget(ls()))
#Create a list of all the vector names (excluding their contents)
vec_names <- Filter(function(x) 'character' %in% class( get(x) ), ls() )
#create a dataframe to fill with the for loop
comm_all <- comm
### Run through all vectors
for(i in 1:length(vec_list)){
comm_all <- comm_all %>% #iteratively replace the dataframe for each category
mutate(!!vec_names[i]:=ifelse(str_detect(comment, paste("(?i)", vec_list[[i]],collapse="|")), "X", "")) #create new column with vector name as the header, fill column with Xs if Comment contains any of the category keywords. (?i) makes str_detect not case sensitive.
}
```
## Create a column that lists all categories the comment was binned into, and another column that lists the total number of categories the comment was binned into
```{r}
comm_sort <- comm_all %>%
mutate(across(changing_ocean:wildlife_disturbance, ~case_when(. != "" ~ cur_column()), .names = 'new_{col}')) %>% #create new columns for each of the categories, where the name of the column is "new + category name" and the contents of the column are filled with the category name
unite(categories, starts_with('new'), na.rm = TRUE, sep = ', ') %>% #take all of the contents of each row (excluding NAs) of these new columns you just made and merge them into a single column, where each content (aka the category name) is separated by a comma
mutate(num_categories = ifelse(categories=="", 0, 1 + str_count(categories, pattern = ","))) %>% #create a new column that shows the number of categories that comment was binned into by counting the number of commas and adding 1 (since a comma sorted into one category will have no commas)
relocate(c(document_id,date_received,date_posted,attachment,name,affiliation,location,email,comment,notetaker_initials,additional_notes,categories,num_categories), .before=changing_ocean) #reorder columns for easy viewing and to reflect CHNMS 2021 NOI Scoping Comment spreadsheet
#Save this first-pass processed comments dataframe to .csv file
write_csv(comm_sort, here("sorted", "comments_sorted.csv"))
```
# Save sorted comments
## Create unique .csv files for each category
```{r, warning = FALSE}
#run a for loop
for(i in 1:length(comm_sort)){
if(colnames(comm_sort[i]) %in% vec_names){ #only do this for the category columns
colname <- colnames(comm_sort[i]) #get category name so you can save unique filenames
comm_category <- comm_sort %>%
filter(comm_sort[,i] == "X") %>% #only keep rows where the cell is checked
select(1:13) #only keep columns without detailed categorization data
write_csv(comm_category, here("sorted", "category-specific", paste(colname, "csv", sep="."))) #save it as a .csv file with a unique name
}
}
```