This repository has been archived by the owner on Apr 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper_vacc.py
100 lines (95 loc) · 4.07 KB
/
scraper_vacc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import csv
import os
from helper import extractIdx
def processVaccData():
vacc_data = extractVaccData()
# print(vacc_data)
writeVaccCsv(vacc_data)
def extractVaccData():
vacc_data = {}
base_dir = "./dataset/data"
for name in os.listdir(base_dir):
# print (name)
if (name in ["COVID19VaccDosesAdministered.csv"]):
print(name)
vacc_data = parseAdministered("%s/%s" % (base_dir, name), vacc_data)
elif (name in ["COVID19VaccPersons_v2.csv"]):
print(name)
vacc_data = parseVaccPersons("%s/%s" % (base_dir, name), vacc_data)
return vacc_data
def parseAdministered(file, vacc_data):
idxGeoRegion = 0
idxDate = 0
idxSumTotal = 0
idxPer100PersonsTotal = 0
idxType = 0
csvreader = csv.reader(open(file, "r"), delimiter=',', quotechar='"')
for row in csvreader:
if row[0] == "date": # skip header line
idxGeoRegion, idxDate, idxSumTotal, idxPer100PersonsTotal, idxType = extractIdx(row, 'geoRegion', 'date', 'sumTotal', 'per100PersonsTotal', 'type')
continue
# print(', '.join(row))
date = row[idxDate]
canton = row[idxGeoRegion]
if canton in ["all", "neighboring_chfl", "unknown"]:
continue
total = row[idxSumTotal]
per100 = row[idxPer100PersonsTotal]
dtype = row[idxType]
if date not in vacc_data:
vacc_data[date] = {}
if canton not in vacc_data[date]:
vacc_data[date][canton] = {}
if dtype == "COVID19VaccDosesAdministered":
vacc_data[date][canton]["administeredTotal"] = total
vacc_data[date][canton]["administeredPer100"] = per100
return vacc_data
def parseVaccPersons(file, vacc_data):
idxGeoRegion = 0
idxDate = 0
idxSumTotal = 0
idxPer100PersonsTotal = 0
idxType = 0
csvreader = csv.reader(open(file, "r"), delimiter=',', quotechar='"')
for row in csvreader:
if row[0] == "date": # skip header line
idxGeoRegion, idxDate, idxSumTotal, idxPer100PersonsTotal, idxType, idxAgeGroup = extractIdx(row, 'geoRegion', 'date', 'sumTotal', 'per100PersonsTotal', 'type', 'age_group')
continue
# print(', '.join(row))
date = row[idxDate]
canton = row[idxGeoRegion]
if canton in ["all", "neighboring_chfl", "unknown"]:
continue
total = row[idxSumTotal]
per100 = row[idxPer100PersonsTotal]
dtype = row[idxType]
ageGroup = row[idxAgeGroup]
if ageGroup != "total_population":
continue
if date not in vacc_data:
vacc_data[date] = {}
if canton not in vacc_data[date]:
vacc_data[date][canton] = {}
if dtype == "COVID19FullyVaccPersons":
vacc_data[date][canton]["fullyVaccTotal"] = total
vacc_data[date][canton]["fullyVaccPer100"] = per100
return vacc_data
def writeVaccCsv(vacc_data):
with open('vacc_data.csv', 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["date", "canton", "noopdeliveredTotal", "noopdeliveredPer100", "administeredTotal", "administeredPer100", "fullyVaccinatedTotal", "fullyVaccinatedPer100", "noopreceivedTotal", "noopreceivedPer100"])
for date in sorted(vacc_data):
print("writing vacc data for %s" % date)
for canton in sorted(vacc_data[date]):
data = vacc_data[date][canton]
# print(data)
dt = "0"
dp = "0"
at = data["administeredTotal"] if ("administeredTotal" in data) else "0"
ap = data["administeredPer100"] if ("administeredPer100" in data) else "0"
ft = data["fullyVaccTotal"] if ("fullyVaccTotal" in data) else "0"
fp = data["fullyVaccPer100"] if ("fullyVaccPer100" in data) else "0"
rt = "0"
rp = "0"
csvwriter.writerow([date, canton, dt, dp, at, ap, ft, fp, rt, rp])