-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathksl_norm_test.R
261 lines (219 loc) · 7.14 KB
/
ksl_norm_test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# ksl_norm_test.R
# Kolmogorov-Smirnov-Lilliefors test for party and candidate votes.
# CC BY-SA. W.A. Borici, 2021.
# Full license terms at https://creativecommons.org/licenses/by-sa/4.0/.
# Output files
# Normality Tests based on the empirical probabilities of:
# 1. Parties (PS, PD, Other Parties)
# 2. Relative turnout
# 3. Relative invalid ballots
# at the adminisrative unit level (for all 383 admin units)
# Run the Kolmogorov-Smirnov Normality test with Lilliefors criteria on
# each of pPS, pPD, pOP.
# ----
# PS
resultPS <- NormTestsWithVisuals(partyVotesP$pPS, "PS", 0)
# Print results - graph and text - in files
resultPS[[1]]
resultPS[[2]]
resultPS[[3]]
resultPS[[4]]
resultPS[[5]]
# Compute Lilliefors critical value for reference for n > 50
D_crit <-0.895/((0.83 + length(partyVotesP$pPS)) /
sqrt(length(partyVotesP$pPS)) - 0.01)
D_crit
resultPS[[6]]
# ----
# PD
# First, some visuals
resultPD <- NormTestsWithVisuals(partyVotesP$pPD, "PD", 0)
# Print results - graph and text - in files
resultPD[[1]]
resultPD[[2]]
resultPD[[3]]
resultPD[[4]]
resultPD[[5]]
resultPD[[6]]
# ----
# Share PS/PD - a simple metric of inter-party competitiveness
resultPSPD <- NormTestsWithVisuals(partyVotesP$pPSPD, "PS/PD ratio", 0)
# Print results - graph and text - in files
resultPSPD[[1]]
resultPSPD[[2]]
resultPSPD[[3]]
resultPSPD[[4]]
resultPSPD[[5]]
resultPSPD[[6]]
# ----
# Other Parties
resultOP <- NormTestsWithVisuals(partyVotesP$pOP, "Other Parties", 0)
# Print results - graph and text - in files
resultOP[[1]]
resultOP[[2]]
resultOP[[3]]
resultOP[[4]]
resultOP[[5]]
resultOP[[6]]
# ----
# Turnout
resultTurnout <- NormTestsWithVisuals(partyVotesP$pTurnout, "Turnout", 0)
# Print results - graph and text - in files
resultTurnout[[1]]
resultTurnout[[2]]
resultTurnout[[3]]
resultTurnout[[4]]
resultTurnout[[5]]
resultTurnout[[6]]
# ----
# Turnout
resultInvalid <- NormTestsWithVisuals(partyVotesP$pInvalid,
"Invalid Ballots", 0)
# Print results - graph and text - in files
resultInvalid[[1]]
resultInvalid[[2]]
resultInvalid[[3]]
resultInvalid[[4]]
resultInvalid[[5]]
resultInvalid[[6]]
# END NORMALITY TEST at ADMIN UNIT
# Normality Tests based on the empirical probabilities of:
# 1. Parties (PS, PD, Other Parties)
# 2. Relative turnout
# 3. Relative invalid ballots
# at the municipality level (for all 61 Albanian municipalities)
# Run the Kolmogorov-Smirnov Normality test with Lilliefors criteria on
# each of pPS, pPD, pOP.
# ----
# PS
resultPS <- NormTestsWithVisuals(votesGroupedByMunicipality$pPS, "PS", 0)
# Print results - graph and text - in files
resultPS[[1]]
resultPS[[2]]
resultPS[[3]]
resultPS[[4]]
resultPS[[5]]
resultPS[[6]]
# ----
# PD
# First, some visuals
resultPD <- NormTestsWithVisuals(votesGroupedByMunicipality$pPD, "PD", 0)
# Print results - graph and text - in files
resultPD[[1]]
resultPD[[2]]
resultPD[[3]]
resultPD[[4]]
resultPD[[5]]
resultPD[[6]]
# ----
# Other Parties
resultOP <- NormTestsWithVisuals(votesGroupedByMunicipality$pOP,
"Other Parties", 0)
# Print results - graph and text - in files
resultOP[[1]]
resultOP[[2]]
resultOP[[3]]
resultOP[[4]]
resultOP[[5]]
resultOP[[6]]
# ----
# Turnout
resultTurnout <- NormTestsWithVisuals(votesGroupedByMunicipality$pTurnout,
"Turnout", 0)
# Print results - graph and text - in files
resultTurnout[[1]]
resultTurnout[[2]]
resultTurnout[[3]]
resultTurnout[[4]]
resultTurnout[[5]]
resultTurnout[[6]]
# ----
# Turnout
resultInvalid <- NormTestsWithVisuals(votesGroupedByMunicipality$pInvalid,
"Invalid Ballots", 0)
# Print results - graph and text - in files
resultInvalid[[1]]
resultInvalid[[2]]
resultInvalid[[3]]
resultInvalid[[4]]
resultInvalid[[5]]
resultInvalid[[6]]
# END NORMALITY TEST at MUNICIPALITY LEVEL
# ----
# We have assumed normal distribution based on the i.i.d. assumption of
# party vote percentage set. Below, we analyze what possible other
# distributions could best fit the various data sets:
# PS - fits Weibull
resultPS <- FitOtherDistributions(partyVotesP$pPS, "PS", FALSE)
resultPS
# plot the weibull:
x <- partyVotesP$pPS
crv <- function(x) dweibull(x, shape = 5.2020632, scale = 0.5345796)
hist(partyVotesP$pPS, breaks = "FD", freq = FALSE, col = "orange",
main = paste("Weibull fit for PS vote share"),
xlab = "PS vote share")
curve(Vectorize(crv)(x), from = 0, to = 1, add=TRUE, lwd=2, col="navy")
# PD - fits normal
resultPD <- FitOtherDistributions(partyVotesP$pPD, "PD", FALSE)
resultPD
# plot the normal:
x <- partyVotesP$pPD
crv <- function(x) dnorm(x, mean = 0.3894273, sd = 0.1164794)
hist(partyVotesP$pPD, breaks = "FD", freq = FALSE, col = "orange",
main = paste("Normal fit for PD vote share"),
xlab = "PD vote share")
curve(Vectorize(crv)(x), from = 0, to = 1, add=TRUE, lwd=2, col="navy")
# Share PS/PD - fits log-normal
resultPSPD <- FitOtherDistributions(partyVotesP$pPSPD, "PS/PD", FALSE)
resultPSPD
# plot the log-normal:
x <- partyVotesP$pPSPD
crv <- function(x) dlnorm(x, meanlog = 0.2512484, sdlog = 0.5823473)
hist(partyVotesP$pPSPD, breaks = "FD", freq = FALSE, col = "orange",
main = paste("Normal fit for PS-to-PD vote share ratio"),
xlab = "PS/PD vote share ratio")
curve(Vectorize(crv)(x), from = 0, to = 10, add=TRUE, lwd=2, col="navy")
# Other Parties - fits beta
resultOP <- FitOtherDistributions(partyVotesP$pOP, "Other Parties", FALSE)
resultOP
# plot the beta:
x <- partyVotesP$pOP
crv <- function(x) dbeta (x, shape1 = 1.940916, shape2 = 14.430417)
hist(partyVotesP$pOP, breaks = "FD", freq = FALSE, col = "orange",
main = paste("Beta fit for other parties' vote share"),
xlab = "Other parties' vote share")
curve(Vectorize(crv)(x), from = 0, to = 1, add=TRUE, lwd=2, col="navy")
# Turnout - fits Weibull
resultTurnout <- FitOtherDistributions(partyVotesP$pTurnout, "Turnout", FALSE)
resultTurnout
# plot the weibull:
x <- partyVotesP$pTurnout
crv <- function(x) dweibull(x, shape = 5.6563431, scale = 0.5028294)
hist(partyVotesP$pTurnout, breaks = "FD", freq = FALSE, col = "orange",
main = paste("Weibull fit for voter turnout"), xlim = xrange,
xlab = "Turnout")
curve(Vectorize(crv)(x), from = 0, to = 1, add=TRUE, lwd=2, col="navy")
# Invalid Ballots - fits Normal
resultInvalid <- FitOtherDistributions(partyVotesP$pInvalid,
"Invalid Ballots", FALSE)
resultInvalid
# Repeat fittings above by municipality
# ----
# PS - fits Weibull
resultPS <- FitOtherDistributions(votesGroupedByMunicipality$pPS, "PS", FALSE)
resultPS
# PD - fits normal
resultPD <- FitOtherDistributions(votesGroupedByMunicipality$pPD, "PD", FALSE)
resultPD
# Other Parties - fits beta
resultOP <- FitOtherDistributions(votesGroupedByMunicipality$pOP,
"Other Parties", FALSE)
resultOP
# Turnout - fits Weibull
resultTurnout <- FitOtherDistributions(votesGroupedByMunicipality$pTurnout,
"Turnout", FALSE)
resultTurnout
# Invalid Ballots - fits Normal
resultInvalid <- FitOtherDistributions(votesGroupedByMunicipality$pInvalid,
"Invalid Ballots", FALSE)
resultInvalid