forked from burnpiro/wod-bike-temporal-network
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_groupby_intervals_to_json.py
51 lines (33 loc) · 1.85 KB
/
convert_groupby_intervals_to_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
from os import listdir
from os.path import join, isfile
from tqdm import tqdm
import pandas as pd
import json
COLUMNS_RENAME_DICT = {"interval_start": "s", "rental_place": "o", "return_place": "d", "number_of_trips": "c"}
def convert_csv_to_json(input_path, output_path):
df = pd.read_csv(input_path, parse_dates=True)
df = df[["interval_start", "rental_place", "return_place", "number_of_trips"]]
df['interval_start'] = pd.to_datetime(df['interval_start'])
df["day"] = df["interval_start"].dt.day
df["minute_in_day"] = df["interval_start"].dt.hour * 60 + df["interval_start"].dt.minute
days_in_month = df["interval_start"].dt.daysinmonth.max()
df = df[["day", "minute_in_day", "rental_place", "return_place", "number_of_trips"]]
df = df.rename(columns={"rental_place": "o", "return_place": "d", "number_of_trips": "c"})
month_dict = {}
for day in range(1, days_in_month + 1):
dict_for_current_day = (df[df.day == day])[["minute_in_day", "o", "d", "c"]].groupby('minute_in_day').apply(
lambda g: g[["o", "d", "c"]].sort_values(by=['c'], ascending=False).to_dict(orient='records')).to_dict()
month_dict[day] = dict_for_current_day
with open(output_path, 'w') as fp:
json.dump(month_dict, fp)
if __name__ == '__main__':
INPUT_FILES_DIR = join(os.path.dirname(os.path.realpath(__file__)), "groupedby_intervals")
OUTPUT_FILES_DIR = join(os.path.dirname(os.path.realpath(__file__)), "groupedby_intervals_json")
csv_files = [f for f in listdir(INPUT_FILES_DIR) if isfile(join(INPUT_FILES_DIR, f))]
if not os.path.exists(OUTPUT_FILES_DIR):
os.makedirs(OUTPUT_FILES_DIR)
for f in tqdm(csv_files):
input_path = join(INPUT_FILES_DIR, f)
output_path = join(OUTPUT_FILES_DIR, f.replace(".csv", ".json"))
convert_csv_to_json(input_path, output_path)