-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfishers_combination.py
456 lines (393 loc) · 16.7 KB
/
fishers_combination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
import numpy as np
import scipy as sp
import scipy.stats
import scipy.optimize
from ballot_comparison import ballot_comparison_pvalue
from hypergeometric import trihypergeometric_optim
from sprt import ballot_polling_sprt
import matplotlib.pyplot as plt
import numpy.testing
from contest import ContestType
from contest import Contest
from minerva_s import Minerva_S
def fisher_combined_pvalue(pvalues):
"""
Find the p-value for Fisher's combined test statistic
Parameters
----------
pvalues : array_like
Array of p-values to combine
Returns
-------
float
p-value for Fisher's combined test statistic
"""
if np.any(np.array(pvalues)==0):
return 0
obs = -2*np.sum(np.log(pvalues))
return 1-scipy.stats.chi2.cdf(obs, df=2*len(pvalues))
def create_modulus_old(n1, n2, n_w2, n_l2, N1, V_wl, gamma):
"""
The modulus of continuity for the Fisher's combined p-value.
This function returns the modulus of continuity, as a function of
the distance between two lambda values.
n1 : int
sample size in the ballot comparison stratum
n2 : int
sample size in the ballot polling stratum
n_w2 : int
votes for the reported winner in the ballot polling sample
n_l2 : int
votes for the reported loser in the ballot polling sample
N1 : int
total number of votes in the ballot comparison stratum
V_wl : int
margin (in votes) between w and l in the whole contest
gamma : float
gamma from the ballot comparison audit
"""
Wn = n_w2; Ln = n_l2; Un = n2-n_w2-n_l2
assert Wn>=0 and Ln>=0 and Un>=0
if N1 == 0:
T2 = lambda delta: 0
else:
T2 = lambda delta: 2*n1*np.log(1 + V_wl*delta/(2*N1*gamma))
return lambda delta: 2*Wn*np.log(1 + V_wl*delta) + 2*Ln*np.log(1 + 2*V_wl*delta) + \
2*Un*np.log(1 + 3*V_wl*delta) + T2(delta)
def create_modulus(n1, n2, n_w2, n_l2, N1, V_wl, gamma):
"""
The modulus of continuity for the Fisher's combined p-value.
This function returns the modulus of continuity, as a function of
the distance between two lambda values.
n1 : int
sample size in the ballot comparison stratum
n2 : int
sample size in the ballot polling stratum
n_w2 : int
votes for the reported winner in the ballot polling sample
n_l2 : int
votes for the reported loser in the ballot polling sample
N1 : int
total number of votes in the ballot comparison stratum
V_wl : int
margin (in votes) between w and l in the whole contest
gamma : float
gamma from the ballot comparison audit
"""
Wn = n_w2; Ln = n_l2; Un = n2-n_w2-n_l2
assert Wn>=0 and Ln>=0 and Un>=0
if N1 == 0:
T2 = lambda delta: 0
else:
T2 = lambda delta: 2*n1*np.log(1 + V_wl*delta/(2*N1*gamma))
return lambda delta: 2*Wn*np.log(1 + V_wl*delta) + 2*Ln*np.log(1 + V_wl*delta) + \
2*Un*np.log(1 + 2*V_wl*delta) + T2(delta)
def maximize_fisher_combined_pvalue(N_w1, N_l1, N1, N_w2, N_l2, N2,
pvalue_funs, stepsize=0.05, modulus=None, alpha=0.05, feasible_lambda_range=None):
"""
Grid search to find the maximum P-value.
Find the smallest Fisher's combined statistic for P-values obtained
by testing two null hypotheses at level alpha using data X=(X1, X2).
Parameters
----------
N_w1 : int
votes for the reported winner in the ballot comparison stratum
N_l1 : int
votes for the reported loser in the ballot comparison stratum
N1 : int
total number of votes in the ballot comparison stratum
N_w2 : int
votes for the reported winner in the ballot polling stratum
N_l2 : int
votes for the reported loser in the ballot polling stratum
N2 : int
total number of votes in the ballot polling stratum
pvalue_funs : array_like
functions for computing p-values. The observed statistics/sample and known parameters should be
plugged in already. The function should take the lambda allocation AS INPUT and output a p-value.
stepsize : float
size of the grid for searching over lambda. Default is 0.05
modulus : function
the modulus of continuity of the Fisher's combination function.
This should be created using `create_modulus`.
Optional (Default is None), but increases the precision of the grid search.
alpha : float
Risk limit. Default is 0.05.
feasible_lambda_range : array-like
lower and upper limits to search over lambda.
Optional, but a smaller interval will speed up the search.
Returns
-------
dict with
max_pvalue: float
maximum combined p-value
min_chisq: float
minimum value of Fisher's combined test statistic
allocation lambda : float
the parameter that minimizes the Fisher's combined statistic/maximizes the combined p-value
refined : bool
was the grid search refined after the first pass?
stepsize : float
the final grid step size used
tol : float
if refined is True, this is an upper bound on potential approximation error of min_chisq
"""
assert len(pvalue_funs)==2
# find range of possible lambda
if feasible_lambda_range is None:
feasible_lambda_range = calculate_lambda_range(N_w1, N_l1, N1, N_w2, N_l2, N2)
(lambda_lower, lambda_upper) = feasible_lambda_range
test_lambdas = np.arange(lambda_lower, lambda_upper+stepsize, stepsize)
if len(test_lambdas) < 5:
stepsize = (lambda_upper + 1 - lambda_lower)/5
test_lambdas = np.arange(lambda_lower, lambda_upper+stepsize, stepsize)
fisher_pvalues = np.empty_like(test_lambdas)
pvalue1s = np.empty_like(test_lambdas)
pvalue2s = np.empty_like(test_lambdas)
for i in range(len(test_lambdas)):
pvalue1 = np.min([1, pvalue_funs[0](test_lambdas[i])])
pvalue1s[i] = pvalue1
pvalue2 = np.min([1, pvalue_funs[1](1-test_lambdas[i])])
pvalue2s[i] = pvalue2
fisher_pvalues[i] = fisher_combined_pvalue([pvalue1, pvalue2])
pvalue = np.max(fisher_pvalues)
max_index = np.argmax(fisher_pvalues)
alloc_lambda = test_lambdas[max_index]
pvalue1 = pvalue1s[max_index]
pvalue2 = pvalue2s[max_index]
# If p-value is over the risk limit, then there's no need to refine the
# maximization. We have a lower bound on the maximum.
if pvalue > alpha or modulus is None:
return {'max_pvalue' : pvalue,
'pvalue1' : pvalue1,
'pvalue2' : pvalue2,
'min_chisq' : sp.stats.chi2.ppf(1 - pvalue, df=4),
'allocation lambda' : alloc_lambda,
'tol' : None,
'stepsize' : stepsize,
'refined' : False
}
# Use modulus of continuity for the Fisher combination function to check
# how close this is to the true max
fisher_fun_obs = scipy.stats.chi2.ppf(1-pvalue, df=4)
fisher_fun_alpha = scipy.stats.chi2.ppf(1-alpha, df=4)
dist = np.abs(fisher_fun_obs - fisher_fun_alpha)
mod = modulus(stepsize)
if mod <= dist:
return {'max_pvalue' : pvalue,
'pvalue1' : pvalue1,
'pvalue2' : pvalue2,
'min_chisq' : fisher_fun_obs,
'allocation lambda' : alloc_lambda,
'stepsize' : stepsize,
'tol' : mod,
'refined' : False
}
else:
lambda_lower = alloc_lambda - 2*stepsize
lambda_upper = alloc_lambda + 2*stepsize
refined = maximize_fisher_combined_pvalue(N_w1, N_l1, N1, N_w2, N_l2, N2,
pvalue_funs, stepsize=stepsize/10, modulus=modulus, alpha=alpha,
feasible_lambda_range=(lambda_lower, lambda_upper))
refined['refined'] = True
return refined
def plot_fisher_pvalues(N, overall_margin, pvalue_funs, alpha=None):
"""
Plot the Fisher's combined p-value for varying error allocations
using data X=(X1, X2)
Parameters
----------
N : array_like
Array of stratum sizes
overall_margin : int
the difference in votes for reported winner and loser in the population
pvalue_funs : array_like
functions for computing p-values. The observed statistics/sample and known parameters should be plugged in already. The function should take the lambda allocation AS INPUT and output a p-value.
alpha : float
Optional, desired upper percentage point
Returns
-------
dict with
float
maximum combined p-value
float
minimum value of Fisher's combined test statistic
float
lambda, the parameter that minimizes the Fisher's combined statistic/maximizes the combined p-value
"""
assert len(N)==2
assert len(pvalue_funs)==2
# find range of possible lambda
(lambda_lower, lambda_upper) = calculate_lambda_range(N_w1, N_l1, N1, N_w2, N_l2, N2)
fisher_pvalues = []
cvr_pvalues = []
nocvr_pvalues = []
for lam in np.arange(lambda_lower, lambda_upper+1, 0.5):
pvalue1 = np.min([1, pvalue_funs[0](lam)])
pvalue2 = np.min([1, pvalue_funs[1](1-lam)])
fisher_pvalues.append(fisher_combined_pvalue([pvalue1, pvalue2]))
plt.scatter(np.arange(lambda_lower, lambda_upper+1, 0.5), fisher_pvalues, color='black')
if alpha is not None:
plt.axhline(y=alpha, linestyle='--', color='gray')
plt.xlabel("Allocation of Allowable Error")
plt.ylabel("Fisher Combined P-value")
plt.show()
def calculate_lambda_range(N_w1, N_ell1, N_1, N_w2, N_ell2, N_2):
'''
Find the largest and smallest possible values of lambda.
Input:
------
N_w1 : int
reported votes for overall reported winner w in stratum 1
N_ell1 : int
reported votes for overall reported loser ell in stratum 1
N1 : int
ballots cast in stratum 1
N_w2 : int
reported votes for overall reported winner w in stratum 2
N_ell2 : int
reported votes for overall reported loser ell in stratum 2
N1 : int
ballots cast in stratum 2
Returns:
--------
(lb, ub): real ordered pair. lb is a sharp lower bound on lambda; ub is a sharp upper bound
Derivation:
-----------
Let V denote the overall reported margin of w over ell across both strata, i.e.,
V = (N_w1 + N_w2) - (N_ell1 + N_ell2)
The overstatement error in votes in stratum s is *at most* the difference between the
reported margin in that stratum and the margin that would result if all ballots in
stratum s had votes for the loser; i.e.,
margin_error_s <= (N_ws - N_ells)+N_s.
Thus
lambda*V <= N_w1 - N_ell1 + N_1.
(1-lambda)*V <= N_w2 - N_ell2 + N_2, i.e., lambda*V >= V - (N_w2 - N_ell2 + N_2)
The overstatement error in votes in a stratum is at least the difference between the
reported margin in that stratum and the margin that would result if all ballots in the
stratum had votes for the winner; i.e.,
margin_error_s >= (N_ws - N_ells)-N_s.
Thus
lambda*V >= N_w1 - N_ell1 - N_1
(1 - lambda)*V >= N_w2 - N_ell2 - N_2, i.e., lambda*V <= V - (N_w2 - N_ell2 - N_2)
Combining these yields
lambda >= max( N_w1 - N_ell1 - N_1, V - (N_w2 - N_ell2 + N_2) )/V
lambda <= min( N_w1 - N_ell1 + N_1, V - (N_w2 - N_ell2 - N_2) )/V.
'''
V = N_w1 + N_w2 - N_ell1 - N_ell2
lb = np.amax([N_w1 - N_ell1 - N_1, V - (N_w2 - N_ell2 + N_2)] )/V
ub = np.amin([ N_w1 - N_ell1 + N_1, V - (N_w2 - N_ell2 - N_2)] )/V
return (lb, ub)
def bound_fisher_fun(N_w1, N_l1, N1, N_w2, N_l2, N2,
pvalue_funs, feasible_lambda_range=None, stepsize=0.05):
"""
DEPRECATED: Create piecewise constant upper and lower bounds for the Fisher's
combination function for varying error allocations
Parameters
----------
N_w1 : int
votes for the reported winner in the ballot comparison stratum
N_l1 : int
votes for the reported loser in the ballot comparison stratum
N1 : int
total number of votes in the ballot comparison stratum
N_w2 : int
votes for the reported winner in the ballot polling stratum
N_l2 : int
votes for the reported loser in the ballot polling stratum
N2 : int
total number of votes in the ballot polling stratum
pvalue_funs : array_like
functions for computing p-values. The observed statistics/sample and known parameters
should be plugged in already.
The function should take the lambda allocation AS INPUT and output a p-value.
feasible_lambda_range : array-like
lower and upper limits to search over lambda. Optional, but will speed up the search
stepsize : float
size of the mesh to calculate bounds; default 0.5
Returns
-------
dict with
array-like
sample_points : Fisher's combining function evaluated at the grid points
array-like
lower_bounds : piecewise constant lower bound on Fisher's combining function between the grid points
array-like
upper_bounds : piecewise constant upper bound on Fisher's combining function between the grid points
array-like
grid : grid of lambdas
"""
if feasible_lambda_range is None:
feasible_lambda_range = calculate_lambda_range(N_w1, N_l1, N1, N_w2, N_l2, N2)
(lambda_lower, lambda_upper) = feasible_lambda_range
cvr_pvalue = pvalue_funs[0]
nocvr_pvalue = pvalue_funs[1]
cvr_pvalues = []
nocvr_pvalues = []
for lam in np.arange(lambda_lower, lambda_upper+1, stepsize):
pvalue1 = np.min([1, cvr_pvalue(lam)])
pvalue2 = np.min([1, nocvr_pvalue(1-lam)])
cvr_pvalues.append(pvalue1)
nocvr_pvalues.append(pvalue2)
lower_bounds = [fisher_combined_pvalue([cvr_pvalues[i+1], nocvr_pvalues[i]]) for i in range(len(cvr_pvalues)-1)]
upper_bounds = [fisher_combined_pvalue([cvr_pvalues[i], nocvr_pvalues[i+1]]) for i in range(len(cvr_pvalues)-1)]
sample_points = [fisher_combined_pvalue([cvr_pvalues[i], nocvr_pvalues[i]]) for i in range(len(cvr_pvalues))]
return {'sample_points' : sample_points,
'upper_bounds' : upper_bounds,
'lower_bounds' : lower_bounds,
'grid' : np.arange(lambda_lower, lambda_upper+1, stepsize)
}
################################################################################
############################## Unit tests ######################################
################################################################################
def test_modulus1():
N1 = 1000
N2 = 100
Vw1 = 550
Vl1 = 450
Vw2 = 60
Vl2 = 40
Vu2 = N2-Vw2-Vl2
Vwl = (Vw1 + Vw2) - (Vl1 + Vl2)
Nw1_null = N1/2
n1 = 100
o1 = o2 = u1 = u2 = 0
gamma = 1.03
nw2 = 6; Wn = nw2
nl2 = 4; Ln = nl2
n2 = 10; Un = n2 - nl2 - nw2
def c(lam):
return Vw2 - Vl2 - lam*Vwl
def Nw_null(lam):
return (N2 - Un + c(lam))/2
def fisher_fun(lam):
Nw_null_val = Nw_null(lam)
c_val = c(lam)
fisher_fun = -2*np.sum(np.log(Nw_null_val - np.arange(Wn))) +\
-2*np.sum(np.log(Nw_null_val - c_val - np.arange(Ln))) +\
-2*np.sum(np.log(N2 - 2*Nw_null_val + c_val - np.arange(Un))) +\
2*np.sum(np.log(Vw2 - np.arange(Wn))) +\
2*np.sum(np.log(Vl2 - np.arange(Ln))) +\
2*np.sum(np.log(Vu2 - np.arange(Un))) +\
-2*n1*np.log(1 - (1-lam)*Vwl/(2*N1*gamma)) + \
2*o1*np.log(1 - 1/(2*gamma)) + \
2*o2*np.log(1 - 1/(gamma)) + \
2*u1*np.log(1 + 1/(2*gamma)) + \
2*u2*np.log(1 + 1/(gamma))
return fisher_fun
mod = create_modulus(n1, n2, nw2, nl2, N1, Vwl, gamma)
v1 = np.abs(fisher_fun(0.6 + 0.1) - fisher_fun(0.6))
v2 = mod(0.1)
np.testing.assert_array_less(v1, v2)
v1 = np.abs(fisher_fun(0.2 + 0.01) - fisher_fun(0.2))
v2 = mod(0.01)
np.testing.assert_array_less(v1, v2)
v1 = np.abs(fisher_fun(0.8 + 0.001) - fisher_fun(0.8))
v2 = mod(0.001)
np.testing.assert_array_less(v1, v2)
mod_old = create_modulus_old(n1, n2, nw2, nl2, N1, Vwl, gamma)
np.testing.assert_array_less(mod(0.1), mod_old(0.1))
np.testing.assert_array_less(mod(0.01), mod_old(0.01))
np.testing.assert_array_less(mod(0.001), mod_old(0.001))
if __name__ == "__main__":
test_modulus1()