-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04 Map Precinct to Congressional District.R
220 lines (188 loc) · 10.4 KB
/
04 Map Precinct to Congressional District.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#2022 Election Project
#Author: Scott Onestak
#4 Map Precinct to Congressional District
library(tidycensus)
library(tigris)
library(sf)
library(ggplot2)
library(tidyverse)
library(stringr)
library(geojsonR)
library(jsonlite)
library(geojsonsf)
sf::sf_use_s2(FALSE)
options(scipen = 100)
#State to FIPS Mapping
state_to_fips = read.csv("Data/State_to_FIPS.csv",header = T,stringsAsFactors = F) %>%
mutate(STATEFP = formatC(as.numeric(FIPS),width=2,format="d",flag=0)) %>%
select(State,STATEFP) %>%
filter(!State %in% c('AS','GU','MP','PR','VI','DC'))
#Read in NYT data to use later
nyt = geojson_sf(
gzcon(
url("https://int.nyt.com/newsgraphics/elections/map-data/2020/national/precincts-with-results.geojson.gz", 'rb')
)
)
nyt$STATEFP = substr(nyt$GEOID,1,2)
theStates = c("AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA",
"KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM",
"NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA",
"WV","WI","WY")
cdYears = c(2008,2010,2012,2014,2016,2018,2020,2022)
#Read in cds and store as objects
for(i in seq(from=1,to=length(cdYears),by=1)){
#read in CDs
cdYear = cdYears[i]
if(cdYear <= 2020){
if(cdYear != 2010 & cdYear != 2020){
theCongress = 111 + (cdYear - 2008)/2
} else if(cdYear == 2010){
theCongress = 111 #2010 file was corrupted so just using 2008 map since no change
} else {
theCongress = 116 #the 2020 map is the 2018 map since no changes
}
if(cdYear <= 2010){
temp = st_sf(st_read(paste("Data/Previous Congressional District Shapefiles/CD_",cdYear,".shp",sep=""))) %>%
st_transform(.,crs = 4326) %>%
rename(CD = paste("CD",theCongress,"FP",sep=""),
STATEFP = STATEFP10) %>%
mutate(CD = ifelse(CD=="00","01",CD)) %>%
filter(as.numeric(STATEFP) <= 56 & as.numeric(STATEFP) != 11 & CD != "ZZ") %>%
left_join(.,state_to_fips,by="STATEFP") %>%
mutate(State = ifelse(is.na(State) & STATEFP == "08","CO",State)) %>%
mutate(name = paste(State,"-",CD,sep="")) %>%
rename(district = CD)
} else {
temp = st_sf(st_read(paste("Data/Previous Congressional District Shapefiles/CD_",cdYear,".shp",sep=""))) %>%
st_transform(.,crs = 4326) %>%
rename(CD = paste("CD",theCongress,"FP",sep="")) %>%
mutate(CD = ifelse(CD=="00","01",CD)) %>%
filter(as.numeric(STATEFP) <= 56 & as.numeric(STATEFP) != 11 & CD != "ZZ") %>%
left_join(.,state_to_fips,by="STATEFP") %>%
mutate(State = ifelse(is.na(State) & STATEFP == "08","CO",State)) %>%
mutate(name = paste(State,"-",CD,sep="")) %>%
rename(district = CD)
}
} else {
temp = st_sf(st_read(paste("Data/2022 Congressional District Shapefiles/CD_",cdYear,".shp",sep=""))) %>%
st_transform(.,crs = 4326) %>%
rename(State = state)
}
assign(paste("CD_",cdYear,sep=""),temp)
}
theCDs = list(CD_2008,CD_2010,CD_2012,CD_2014,CD_2016,CD_2018,CD_2020,CD_2022)
#Loop through states to stack results
theStack = NA
for(i in seq(from=1,to=length(theStates),by=1)){
curr_state = theStates[i]
cat(i,curr_state,"\n",sep=" ")
#read in the precincts
if(curr_state=="KY"){
theprecincts_2016 = st_sf(st_read(paste("Data/Harvard Precinct Data/2016/",curr_state,"/",tolower(curr_state),"_2016.shp",sep=""))) %>%
rename(DEM = G16PREDCLI,
REP = G16PRERTRU) %>%
st_transform(.,crs = 4326)
#for counties reporting 2020 at the precinct level, use that data... if not, aggregate at county
#highly populated county splits like Jefferson county have precinct, so it should be pretty accurate even with different CD cuts
theCounties_2020 = counties(state="KY",cb=FALSE,year=2020) %>% mutate(COUNTY = toupper(NAME))
theprecincts_2020 = nyt %>% filter(STATEFP == "21") %>% rename(DEM = votes_dem,REP = votes_rep) %>%
mutate(COUNTYFP = substr(GEOID,3,5)) %>%
left_join(.,theCounties_2020 %>% st_drop_geometry() %>% select(COUNTYFP,COUNTY),by="COUNTYFP") %>%
select(COUNTYFP,COUNTY,DEM,REP,geometry) %>%
st_transform(.,crs = 4326)
missing_counties = theCounties_2020 %>% filter(!COUNTY %in% unlist(unique(theprecincts_2020$COUNTY)))
#Read in Kentucky State Board of Election Data by County to append to the missing counties
ky_sboe = read.csv("Data/KY_SBOE_County_2020/KY_2020.csv",header=T) %>% mutate(COUNTY = toupper(COUNTY))
missing_counties = missing_counties %>%
left_join(.,ky_sboe,by="COUNTY") %>% select(COUNTYFP,COUNTY,DEM,REP,geometry) %>%
st_transform(.,crs = 4326)
#stack the precincts and counties
theprecincts_2020 = rbind(theprecincts_2020,missing_counties)
} else if(curr_state=="WV"){
#Read in precinct data... for counties missing precinct data, read in counties and append
theCounties_2016 = counties(state="WV",cb=FALSE,year=2016) %>% mutate(COUNTY = toupper(NAME))
#Read in OpenElections data file
oe_file = read.csv("Data/OpenElections_WV_2016/20161108__wv__general__precinct__raw.csv",header=T) %>%
filter(election_type == "general" & office %in% c("U.S. President","President")) %>%
mutate(name_raw = toupper(name_raw)) %>%
filter(name_raw %in% c("DONALD J. TRUMP","HILLARY CLINTON")) %>%
mutate(party = ifelse(name_raw == "DONALD J. TRUMP","REP","DEM")) %>%
mutate(COUNTY = toupper(parent_jurisdiction)) %>%
select(party,COUNTY,votes) %>%
group_by(COUNTY,party) %>%
summarise(votes = sum(votes,na.rm=T)) %>%
ungroup() %>%
spread(.,key="party",value="votes")
#Stack counties and precincts
theprecincts_2016 = theCounties_2016 %>% left_join(.,oe_file,by="COUNTY") %>%
st_transform(.,crs = 4326)
theprecincts_2020 = nyt %>% filter(STATEFP == "54") %>% rename(DEM = votes_dem,REP = votes_rep) %>%
st_transform(.,crs = 4326)
} else {
theprecincts_2016 = st_sf(st_read(paste("Data/Harvard Precinct Data/2016/",curr_state,"/",tolower(curr_state),"_2016.shp",sep=""))) %>%
rename_all(toupper) %>%
rename(DEM = G16PREDCLI,
REP = G16PRERTRU) %>%
st_transform(.,crs = 4326)
theprecincts_2020 = st_sf(st_read(paste("Data/Harvard Precinct Data/2020/",curr_state,"/",tolower(curr_state),"_2020.shp",sep=""))) %>%
rename_all(toupper) %>%
rename(DEM = G20PREDBID,
REP = G20PRERTRU) %>%
st_transform(.,crs = 4326)
}
#Assign unique identifier and get total area for precinct
theprecincts_2016$PRECINCTID = as.numeric(rownames(theprecincts_2016))
theprecincts_2016$AREA = as.numeric(st_area(theprecincts_2016))
theprecincts_2020$PRECINCTID = as.numeric(rownames(theprecincts_2020))
theprecincts_2020$AREA = as.numeric(st_area(theprecincts_2020))
for(j in seq(from=1,to=length(cdYears),by=1)){
cdYear = cdYears[j]
#map blocks to the precincts
state_CDs = st_sf(as.data.frame(theCDs[j])) %>% filter(State == curr_state)
#2016
holder = st_intersection(st_make_valid(st_zm(state_CDs)),st_make_valid(st_zm(theprecincts_2016))) %>%
mutate(intersect_area = as.numeric(st_area(.))) %>%
st_drop_geometry() %>%
select(State,district,name,DEM,REP,PRECINCTID,intersect_area,AREA) %>%
mutate(prct = round(intersect_area / AREA,2)) %>%
filter(prct > 0) %>%
mutate(DEM_VOTES_2016 = round(DEM * prct,0),
REP_VOTES_2016 = round(REP * prct,0)) %>%
group_by(name) %>%
summarise(DEM_VOTES_2016 = sum(DEM_VOTES_2016,na.rm=T),
REP_VOTES_2016 = sum(REP_VOTES_2016,na.rm=T)) %>%
ungroup()
holder$TOTAL_VOTES_2016 = holder$DEM_VOTES_2016 + holder$REP_VOTES_2016
holder$DEM_PRCT_2016 = round(holder$DEM_VOTES_2016 / holder$TOTAL_VOTES_2016,4) * 100
holder$REP_PRCT_2016 = round(holder$REP_VOTES_2016 / holder$TOTAL_VOTES_2016,4) * 100
holder$LEAN_2016 = holder$REP_PRCT_2016 - holder$DEM_PRCT_2016 + (51.11 - 48.89)
#2020
holder2 = st_intersection(st_make_valid(st_zm(state_CDs)),st_make_valid(st_zm(theprecincts_2020))) %>%
mutate(intersect_area = as.numeric(st_area(.))) %>%
st_drop_geometry() %>%
select(State,district,name,DEM,REP,PRECINCTID,intersect_area,AREA) %>%
mutate(prct = round(intersect_area / AREA,2)) %>%
filter(prct > 0) %>%
mutate(DEM_VOTES_2020 = round(DEM * prct,0),
REP_VOTES_2020 = round(REP * prct,0)) %>%
group_by(name) %>%
summarise(DEM_VOTES_2020 = sum(DEM_VOTES_2020,na.rm=T),
REP_VOTES_2020 = sum(REP_VOTES_2020,na.rm=T)) %>%
ungroup()
holder2$TOTAL_VOTES_2020 = holder2$DEM_VOTES_2020 + holder2$REP_VOTES_2020
holder2$DEM_PRCT_2020 = round(holder2$DEM_VOTES_2020 / holder2$TOTAL_VOTES_2020,4) * 100
holder2$REP_PRCT_2020 = round(holder2$REP_VOTES_2020 / holder2$TOTAL_VOTES_2020,4) * 100
holder2$LEAN_2020 = holder2$REP_PRCT_2020 - holder2$DEM_PRCT_2020 + (52.27 - 47.73)
final = holder %>%
left_join(.,holder2,by="name") %>%
mutate(year = cdYear) %>%
select(c("year",colnames(holder),colnames(holder2)))
if(i==1 & j==1){
theStack = final
} else {
theStack = rbind(theStack,final)
}
}
}
#Write out the 2-party results by the precinct mappings to 2008 - 2022 CDs
write.csv(theStack,'Data/Precinct_Two_Party_Results/2016_2020_results.csv',row.names=FALSE)