-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompress_pharma.py
30 lines (23 loc) · 985 Bytes
/
compress_pharma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import json
import pandas as pd
"""
This script writes a json file with zip codes (key) and the number of pharmacies in that zip code (value).
Adjust code to filter for 'source': either 'gp' or 'fsq'
"""
# Read df in
tmp = pd.read_csv("../Data/GA_Pharmacy_Data_gp_fsq/pharmacy_ga.csv")
# Drop rows where zipcode is None
tmp = tmp.dropna(subset=['zipcode'])
tmp = tmp.reset_index(drop=True)
# Cast zipcode to int
tmp.zipcode = tmp.zipcode.astype(int)
# To select pharmacies from Google Places: 'gp'
# To select pharmacies from Foursquare: 'fsq'
# To use pharmacies from both data sources, delete the line used to filter 'source'.
tmp_gp = tmp[tmp.source == 'fsq']
# Count number of pharmacies per zipcode
agg_gp = tmp_gp.groupby(by=["zipcode"]).count()
agg_json = agg_gp.to_dict()
# Write to json -- 'census' column is arbitrary, used to write dict keys, values at 'zipcode' level.
with open("fsq_zipdata.json", "w") as outfile:
outfile.write(json.dumps(agg_json["census"]))