-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBlogVaccineEffectiveness.py
402 lines (299 loc) · 13.3 KB
/
BlogVaccineEffectiveness.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 24 13:14:28 2021
@author: 212367548
"""
import pandas as pd
import datetime
from plotly.offline import plot as pplot
import plotly.express as px
import plotly.graph_objects as go
import os
import numpy as np
import covid
from tableauscraper import TableauScraper as TS
ts = TS()
#%% Date suffix
date_suffix = '_2022-04-14'
month_label = 'Mar 2022'
#%% Load population
pop_age = covid.read_pop_age_wi('vax')
#%% Load in age tables for all outcomes
datasets = ['Cases', 'Hospitalizations', 'Deaths']
urls = {'Cases': 'https://bi.wisconsin.gov/t/DHS/views/CasesOutcomesbyVaxStatusAge/Cases?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link',
'Hospitalizations': 'https://bi.wisconsin.gov/t/DHS/views/CasesOutcomesbyVaxStatusAge/Hospitalizations?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link',
'Deaths': 'https://bi.wisconsin.gov/t/DHS/views/CasesOutcomesbyVaxStatusAge/Deaths?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link',
}
vax_age_all = pd.DataFrame()
for outcome in datasets:
ts.loads(urls[outcome])
dashboard = ts.getWorkbook()
vax_age = dashboard.getWorksheet(outcome + ' by Age').data
vax_age = vax_age.pivot(index='Age group-value', columns='Measure Names-alias', values='Measure Values-alias')
vax_age.columns.name = 'Vax status'
vax_age.index.name = 'Age group'
col_rename = {'Percent of people who completed the vaccine series by the middle of the month': 'Vax fraction',
'Rate of ' + outcome.lower() + ' per 100,000 fully vaccinated people': 'Vax',
'Rate of ' + outcome.lower() + ' per 100,000 not fully vaccinated people': 'Unvax'}
vax_age = vax_age.rename(columns=col_rename)
vax_age['Vax fraction'] = pd.to_numeric(vax_age['Vax fraction'].str.replace('%',''))/100
vax_age['Vax'] = pd.to_numeric(vax_age.Vax.str.replace(',',''))
vax_age['Unvax'] = pd.to_numeric(vax_age.Unvax.str.replace(',',''))
# create 12-17 summed age category
vax_age.loc['12-17'] = (vax_age.loc['12-15']*pop_age['12-15'] + vax_age.loc['16-17']*pop_age['16-17']) / (pop_age['12-17'])
vax_age = vax_age.drop(['12-15', '16-17'])
# order the rows correctly
vax_age = vax_age.loc[['0-4', '5-11', '12-17', '18-24', '25-34', '35-44', '45-54', '55-64', '65+', 'Total'], :]
# get to a flatter format
vax_frac = vax_age['Vax fraction']
vax_age = vax_age.reset_index().melt(id_vars=['Age group', 'Vax fraction'], value_name=outcome)
vax_age['Age group population'] = vax_age['Age group'].apply(lambda p: pop_age[p])
vax_age['Population'] = (vax_age['Age group population'] * vax_age['Vax fraction'] * (vax_age['Vax status'] == 'Vax')
+ vax_age['Age group population'] * (1-vax_age['Vax fraction']) * (vax_age['Vax status'] == 'Unvax') )
# get rid of intermediate calculation columns
vax_age = vax_age.drop(['Age group population', 'Vax fraction'], axis=1)
# merge the new data with the previous data
vax_age_all[vax_age.columns] = vax_age
# put columns in the right order
vax_age_all = vax_age_all[['Age group', 'Vax status', 'Population', 'Cases', 'Hospitalizations', 'Deaths']]
vax_age_all.to_csv('.\\data\\vaccinations\\Breakthroughs' + date_suffix + '.csv')
#%% Compute age-adjusted efficacy
vax = vax_age_all[vax_age_all['Vax status'] == 'Vax']
unvax = vax_age_all[vax_age_all['Vax status'] == 'Unvax']
vax = vax.set_index('Age group')
unvax = unvax.set_index('Age group')
vax['Total population'] = vax['Population'] + unvax['Population']
unvax['Total population'] = vax['Total population']
pop_wi = vax.loc['Total', 'Total population']
vax['Adjusted Deaths'] = vax['Deaths'] * vax['Total population'] / 1e5
unvax['Adjusted Deaths'] = unvax['Deaths'] * unvax['Total population'] / 1e5
print(vax['Adjusted Deaths'].drop('Total').sum() / pop_wi * 1e5)
print(unvax['Adjusted Deaths'].drop('Total').sum() / pop_wi * 1e5)
#%% Settings for all plots
# get age group labels from pop_age
labels = list(pop_age.index)
labels.remove('Total')
color = {'Cases': 'steelblue',
'Hospitalizations': 'darkorange',
'Deaths': 'firebrick'}
pattern = {'Vax': '/', 'Unvax': ''}
#%% simplify - variable width graph - stratified by age
# based loosely on mekko example from plotly documentation https://plotly.com/python/bar-charts/
def plotly_vax_age_bar(vax_age, outcome, priority='Age group', group=None):
# vax_age needs 'Age groups', 'Population', 'Vax status'
# select age groups to include
if group is None:
vax_age = vax_age[vax_age['Age group']!='Total']
else:
vax_age = vax_age[vax_age['Age group']==group]
# select ordering of Vax status / age group
if priority == 'Age group':
vax_age = vax_age.sort_values(['Age group', 'Vax status'], ascending=[True, False])
else:
vax_age = vax_age.sort_values(['Vax status', 'Age group'], ascending=[False, True])
# x values of bars - based on population as width
vax_age['x'] = vax_age.Population.cumsum() - vax_age.Population
fig = go.Figure()
for status in ['Vax', 'Unvax']:
data = vax_age[vax_age['Vax status']==status]
fig.add_trace(go.Bar(
name=status,
y=data[outcome],
x=data['x'],
width=data['Population'],
offset=0,
marker_color=color[outcome],
marker_pattern_shape=pattern[status],
customdata=np.transpose([data['Age group'], data[outcome]*data.Population/1e5]),
# texttemplate="%{y} x %{width} =<br>%{customdata[1]}",
# textposition="inside",
# textangle=0,
# textfont_color="white",
hovertemplate="<br>".join([
"%{customdata[0]}",
"Population: %{width}",
outcome + 'per 100k: %{y}',
"Total " + outcome + ": %{customdata[1]}",
])
))
# add dividers for age groups
grouped_widths = vax_age.Population.rolling(2).sum()[1::2]
if group is None and priority=='Age group':
grouped_edge = grouped_widths.cumsum() - grouped_widths
grouptext = vax_age['Age group'].unique()
shapes = list()
for gg, group in enumerate(grouptext):
fig.add_annotation(
text=group,
x=grouped_edge.iloc[gg], xanchor='left', align='left',
yref='paper', y=1, yanchor='top',
textangle=-90, valign='top',
showarrow=False,
)
if gg != 0:
shapes.append(
dict(
type= 'line', line_color='lightgray',
yref= 'paper', y0= 0, y1= 1,
xref= 'x', x0=grouped_edge.iloc[gg], x1=grouped_edge.iloc[gg],
)
)
fig.update_layout(shapes=shapes)
fig.update_xaxes(
title = 'Share of population'
)
fig.update_yaxes(
title = outcome + ' per 100K'
)
fig.update_xaxes(range=[0,grouped_widths.sum()])
return fig
#%% Parameters for all plots
imwidth = 600
imheight = 450
#%% variable width graph - stratified by age
for outcome in datasets:
fig = plotly_vax_age_bar(vax_age_all, outcome)
fig.update_layout(
title_text = outcome + " by vax status <i>and</i> age group<br>" + month_label,
uniformtext=dict(mode="hide", minsize=10),
)
save_png = '.\\docs\\assets\\VaxBarAge-' + outcome + '-StratAge'+date_suffix+'.png'
fig.write_image(
save_png,
width=700,
height=imheight,
engine='kaleido',
)
os.startfile(save_png)
#%%%%%% Previous plots for explanation blog post, not needed for updates
exit
#%% 65+ straight numbers
fig = go.Figure()
for status in ['Vax', 'Unvax']:
data = vax_age[vax_age['Vax status']==status]
data = data[data['Age group']=='65+']
data['Deaths'] = data['Deaths'] * data['Population'] / 100e3
fig.add_trace(go.Bar(
name=status,
y=data[outcome],
x=[status],
# width=data['Population'],
# offset=0,
marker_color=color[outcome],
marker_pattern_shape=pattern[status],
# customdata=np.transpose([data['Age group'], data[outcome]*data.Population/1e5]),
# texttemplate="%{y} x %{width} =<br>%{customdata[1]}",
# textposition="inside",
# textangle=0,
# textfont_color="white",
# hovertemplate="<br>".join([
# "%{customdata[0]}",
# "Population: %{width}",
# outcome + 'per 100k: %{y}',
# "Total " + outcome + ": %{customdata[1]}",
# ])
))
fig.update_layout(
title_text = "Number of " + outcome.lower() + " by vax status - 65+<br>(NOT GOOD ANALYSIS FOR ILLUSTRATION ONLY)",
uniformtext=dict(mode="hide", minsize=10),
)
fig.update_yaxes(title='Deaths')
save_png = '.\\docs\\assets\\VaxBarAge-DeathRaw-65'+date_suffix+'.png'
fig.write_image(
save_png,
width=imwidth,
height=imheight,
engine='kaleido',
)
os.startfile(save_png)
#%% 65+ only
# select outcome
outcome = 'Deaths'
fig = plotly_vax_age_bar(vax_age_all, outcome, group='65+')
fig.update_layout(
title_text = 'Rate of ' + outcome.lower() + " by vax status - 65+",
uniformtext=dict(mode="hide", minsize=10),
)
save_png = '.\\docs\\assets\\VaxBarAge-Death-65'+date_suffix+'.png'
fig.write_image(
save_png,
width=imwidth,
height=imheight,
engine='kaleido',
)
os.startfile(save_png)
#%% vax/unvax totals
outcome = 'Deaths'
fig = plotly_vax_age_bar(vax_age_all, outcome, group='Total')
fig.update_layout(
title_text = outcome + " by vax status - all ages<br>(AGAIN NOT GREAT ANALYSIS BEAR WITH ME)",
uniformtext=dict(mode="hide", minsize=10),
)
save_png = '.\\docs\\assets\\VaxBarAge-Death-Total'+date_suffix+'.png'
fig.write_image(
save_png,
width=imwidth,
height=imheight,
engine='kaleido',
)
os.startfile(save_png)
#%%%%%%%%%%%% Other plot attempts
exit
#%% vax/unvax with order priority reversed
outcome = 'Deaths'
fig = plotly_vax_age_bar(vax_age_all, outcome, priority='Vax status')
fig.update_layout(
title_text = outcome + " by vax status<br>By age group",
uniformtext=dict(mode="hide", minsize=10),
)
savefile = '.\\docs\\assets\\plotly\\VaxBarAge-Death-StratAgeVaxGroup.html'
fig.write_html(
file=savefile,
include_plotlyjs='cdn',
)
os.startfile(savefile)
#%% process monthly data
case_url = 'https://bi.wisconsin.gov/t/DHS/views/CasesOutcomesbyVaxStatus/CasesbyVaxStatus?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link'
hosp_url = 'https://bi.wisconsin.gov/t/DHS/views/CasesOutcomesbyVaxStatus/HospitalizationsbyVaxStatus?:embed_code_version=3&:embed=y&:loadOrderID=1&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link'
death_url = 'https://bi.wisconsin.gov/t/DHS/views/CasesOutcomesbyVaxStatus/DeathsbyVaxStatus?:embed_code_version=3&:embed=y&:loadOrderID=2&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link'
ts.loads(case_url)
case_dash = ts.getWorkbook()
cases = case_dash.getWorksheet('Cases').data
col_rename = {'Month*-value': 'Month',
'Measure Names-alias': 'Measure',
'Measure Values-alias': 'value'}
cases = cases[col_rename.keys()]
cases = cases.rename(columns=col_rename)
# convert every value to numeric, some of which are percents
cases['value'] = pd.to_numeric(cases['value'].apply(lambda v: v.replace('%', '')))
# pivot to column format
cases = cases.pivot(index='Month', columns='Measure', values='value')
cases = cases.rename(columns=
{'Confirmed and probable COVID-19 case rate per 100,000 not fully vaccinated people': 'Non-vax rate',
'Confirmed and probable COVID-19 case rate per 100,000 fully vaccinated people': 'Vax rate',
'Percent of people who completed the vaccine series by the first of the month': 'Percent vax'
})
# replace index with month number for sorting
cases = cases.reset_index()
cases['Month number'] = cases.Month.apply(lambda m: datetime.datetime.strptime(m, '%B').month)
cases = cases.set_index('Month number', drop=True)
cases = cases.sort_index()
# Create efficacy and relative risk columns
cases['Relative risk'] = cases['Vax rate'] / cases['Non-vax rate']
cases['Efficacy'] = 1 - cases['Relative risk']
#%% adjust for previously infected?
pop_wi = 5.9e6
vax_rate = cases.loc[7, 'Vax rate']
vax_perc = cases.loc[7, 'Percent vax']
vax_pop = vax_perc/100 * pop_wi
vax_num = vax_rate/1e5 * vax_pop
non_rate = cases.loc[7, 'Non-vax rate']
non_perc = 100 - vax_perc
non_pop = non_perc/100 * pop_wi
non_num = non_rate/1e5 * non_pop
pre_frac = 0.3
nonpre_pop = non_perc/100 * pre_frac * pop_wi
nonpre_num = vax_rate/1e5 * nonpre_pop
non2_num = non_num - nonpre_num
non2_pop = non_pop - nonpre_pop
non2_rate = non2_num / non2_pop * 1e5