-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpresidential_round_sizes.py
151 lines (126 loc) · 5.96 KB
/
presidential_round_sizes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
For several states of interest (tight margins), this script generates computes
and stores in a json file the polling stratum round sizes that achieve a .9
probability of stopping under the alternative hypothesis that the election is
truly as announced. It produces both Minerva and R2 Bravo round sizes for various
polling stratum sizes (as a percentage of relevant ballots).
Oliver Broadrick 2020
"""
"""
States of interest:
Rhode Island
Wisconsin
Michigan
Pennsylvania
Ohio
North Carolina
"""
from round_sizes import find_sample_size_for_stopping_prob_efficiently, find_sample_size_for_stopping_prob_efficiently_r2bravo
import math
import matplotlib.pyplot as plt
import json
import pprint
# make an array of the states of interest
states_of_interest = [
'RhodeIsland',
'Wisconsin',
'Michigan',
'Pennsylvania',
'Ohio',
'NorthCarolina'
]
# open desired stratified data json file
with open('data/2016_one_round_all.json') as json_file:
data_2016 = json.load(json_file)
#pprint.pprint(data)
# loop through the states
for state in data_2016:
if state in states_of_interest:
# print this states data
print("\n", state, ":")
pprint.pprint(data_2016[state])
# track all data in a struct for each state, output to a json file
data = data_2016[state]
data.update({'audits': []})
# file for data named based on margin
data_file_name = 'data/round_sizes_'+state+'.txt'
# risk limit of 10%
alpha = 0.1
# overall relevant ballot tallies from state data
results = data_2016[state]['contests']['presidential']['results']
N_w = max(results)
N_l = min(results)
N_relevant = N_w + N_l
N_w_fraction = N_w / N_relevant
fractional_margin = (N_w - N_l) / N_relevant
print("N_w: "+str(N_w))
print("N_l: "+str(N_l))
print("N_relevant: "+str(N_relevant))
print("fractional_margin(mine): "+str(fractional_margin))
# loop through the various percent sizes of the polling stratum
for percent_polling in [.05,.1,.15,.2,.25,.3,.35,.4,.45,.5,.55,.6,.65,.7,.75,.8,.85,.9,.95]:
# divide the ballots into strata
# all strata will have the same margin as the overall contest
N_2 = round(percent_polling * N_relevant)
N_1 = N_relevant - N_2
N_w1 = round(N_w_fraction * N_1)
N_l1 = N_1 - N_w1
N_w2 = N_w - N_w1
N_l2 = N_2 - N_w2
margin = N_w1 + N_w2 - N_l1 - N_l2
# sanity check
assert (N_l2 + N_l1 == N_l)
assert (N_w2 + N_w1 == N_w)
assert (N_1 + N_2 == N_relevant)
# print for viewing pleasure
print("percent_polling: "+str(percent_polling))
print ("N_1: "+str(N_1)+" N_w1: "+str(N_w1)+" N_l1: "+str(N_l1))
print ("N_2: "+str(N_2)+" N_w2: "+str(N_w2)+" N_l2: "+str(N_l2))
# comparison stratum round size (fixed)
n1 = 750
# desired probability of stopping under the alternative hypothesis that
# the contest truly is as announced
stopping_probability = .9
# define right bounds for round size search
minerva_right = None
r2bravo_right = None
# obtain and print the minerva round size along with pvalues and lambda
minerva_results = find_sample_size_for_stopping_prob_efficiently(stopping_probability, N_w1, N_l1, N_w2, N_l2, n1, alpha, underlying=None, right=minerva_right)
print ("minerva_round_size: "+str(minerva_results['round_size']))
print("combined_pvalue: "+str(minerva_results['combined_pvalue']))
print("comparison pvalue: "+str(minerva_results['comparison_pvalue']))
print("polling pvalue: "+str(minerva_results['polling_pvalue']))
print("alloc_lambda: "+str(minerva_results['alloc_lambda']))
# obtain and print the r2bravo round size along with pvalues and lambda
r2bravo_results = find_sample_size_for_stopping_prob_efficiently_r2bravo(stopping_probability, N_w1, N_l1, N_w2, N_l2, n1, alpha, underlying=None, right=r2bravo_right)
print ("r2bravo_round_size: "+str(r2bravo_results['round_size']))
print("combined_pvalue: "+str(r2bravo_results['combined_pvalue']))
print("comparison pvalue: "+str(r2bravo_results['comparison_pvalue']))
print("polling pvalue: "+str(r2bravo_results['polling_pvalue']))
print("alloc_lambda: "+str(r2bravo_results['alloc_lambda']))
# add this data to the data structure
data['audits'].append({
'percent_polling':percent_polling,
'N_relevant':N_relevant,
'N_w':N_w,
'N_l':N_l,
'N_2':N_2,
'N_1':N_1,
'N_w1':N_w1,
'N_l1':N_l1,
'N_w2':N_w2,
'N_l2':N_l2,
'minerva_round_size':minerva_results['round_size'],
'minerva_combined_pvalue':minerva_results['combined_pvalue'],
'minerva_comparison_pvalue':minerva_results['comparison_pvalue'],
'minerva_polling_pvalue':minerva_results['polling_pvalue'],
'minerva_alloc_lambda':minerva_results['alloc_lambda'],
'r2bravo_round_size':r2bravo_results['round_size'],
'r2bravo_combined_pvalue':r2bravo_results['combined_pvalue'],
'r2bravo_comparison_pvalue':r2bravo_results['comparison_pvalue'],
'r2bravo_polling_pvalue':r2bravo_results['polling_pvalue'],
'r2bravo_alloc_lambda':r2bravo_results['alloc_lambda']
})
# update the file each loop (for convenience of checking progress)
with open(data_file_name, 'w') as outfile:
json.dump(data, outfile, indent=2)