-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel_prediction.py
97 lines (72 loc) · 3.38 KB
/
model_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python
# coding: utf-8
# In[5]:
from Data_Preprocessing import Preprocessing
from model import TabularModel
import torch
import torch.nn as nn
import time
import pandas as pd
import numpy as np
# In[27]:
class Prediction():
def __init__(self):
pass
def get_model(self):
emb_szs = [(24, 12), (2, 1), (7, 4)]
model = TabularModel(emb_szs, 6, 1, [200,100], p=0.4).cuda()
model.load_state_dict(torch.load('TaxiFareRegrModel.pt'));
model.eval() # be sure to run this step!
return model
def haversine_distance(self, lat1, long1, lat2, long2):
r = 6371
phi1 = np.radians(self.dfx[lat1])
phi2 = np.radians(self.dfx[lat2])
delta_phi = np.radians(self.dfx[lat2]-self.dfx[lat1])
delta_lambda = np.radians(self.dfx[long2]-self.dfx[long1])
a = np.sin(delta_phi/2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2)**2
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
return r * c
def test_data(self): # pass in the name of the new model
# INPUT NEW DATA
plat = float(input('What is the pickup latitude? '))
plong = float(input('What is the pickup longitude? '))
dlat = float(input('What is the dropoff latitude? '))
dlong = float(input('What is the dropoff longitude? '))
psngr = int(input('How many passengers? '))
dt = input('What is the pickup date and time?\nFormat as YYYY-MM-DD HH:MM:SS ')
# PREPROCESS THE DATA
dfx_dict = {'pickup_latitude':plat,'pickup_longitude':plong,'dropoff_latitude':dlat,
'dropoff_longitude':dlong,'passenger_count':psngr,'EDTdate':dt}
self.dfx = pd.DataFrame(dfx_dict, index=[0])
self.dfx['dist_km'] = self.haversine_distance('pickup_latitude', 'pickup_longitude',
'dropoff_latitude', 'dropoff_longitude')
self.dfx['EDTdate'] = pd.to_datetime(self.dfx['EDTdate'])
# We can skip the .astype(category) step since our fields are small,
# and encode them right away
self.dfx['Hour'] = self.dfx['EDTdate'].dt.hour
self.dfx['AMorPM'] = np.where(self.dfx['Hour']<12,0,1)
self.dfx['Weekday'] = self.dfx['EDTdate'].dt.strftime("%a")
self.dfx['Weekday'] = self.dfx['Weekday'].replace(['Fri','Mon','Sat','Sun','Thu','Tue','Wed'],
[0,1,2,3,4,5,6]).astype('int64')
# CREATE CAT AND CONT TENSORS
cat_cols = ['Hour', 'AMorPM', 'Weekday']
cont_cols = ['pickup_latitude', 'pickup_longitude', 'dropoff_latitude',
'dropoff_longitude', 'passenger_count', 'dist_km']
xcats = np.stack([self.dfx[col].values for col in cat_cols], 1)
xcats = torch.tensor(xcats, dtype=torch.int64)
xconts = np.stack([self.dfx[col].values for col in cont_cols], 1)
xconts = torch.tensor(xconts, dtype=torch.float)
#CONVERT TO CUDA TENSORS
xcats = xcats.cuda()
xconts = xconts.cuda()
# PASS NEW DATA THROUGH THE MODEL WITHOUT PERFORMING A BACKPROP
with torch.no_grad():
gpumodel = self.get_model()
z = gpumodel(xcats, xconts)
print(f'\nThe predicted fare amount is ${z.item():.2f}')
# In[28]:
if __name__ == '__main__':
data_pred = Prediction()
data_pred.test_data()
# In[ ]: