This repository has been archived by the owner on Apr 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper_pop.py
47 lines (43 loc) · 1.45 KB
/
scraper_pop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import csv
import os
from helper import extractIdx
def process_pop_data():
data = extract_pop_data()
#print(data)
write_pop_csv(data)
def extract_pop_data():
data = {}
base_dir = "./dataset/data"
for name in os.listdir(base_dir):
# print (name)
if (name in ["COVID19VaccPersons_v2.csv"]):
print(name)
data = parsePersons("%s/%s" % (base_dir, name), data)
return data
def parsePersons(file, data):
idxGeoRegion = 0
idxPop = 0
idxAgeGroup = 0
csvreader = csv.reader(open(file, "r"), delimiter=',', quotechar='"')
for row in csvreader:
if row[0] == "date": # skip header line
idxGeoRegion, idxPop, idxAgeGroup = extractIdx(row, 'geoRegion', 'pop', 'age_group')
continue
# print(', '.join(row))
if (row[idxAgeGroup] != 'total_population'):
continue
if not row[idxPop].isnumeric():
continue
canton = row[idxGeoRegion]
pop = row[idxPop]
data[canton] = pop
return data
def write_pop_csv(data):
with open('pop.csv', 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["canton", "pop"])
for canton in sorted(data):
print("writing pop data for %s" % canton)
pop = data[canton]
csvwriter.writerow([canton, pop])