-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
58 lines (39 loc) · 1.18 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.linear_model import LinearRegression
from statsmodels.formula.api import ols
def plot_residuals(x, y, dataframe):
sns.set(style="whitegrid")
return sns.residplot(x, y, color="b")
def regression_errors(y, yhat):
n = y.size
residuals = yhat - y
SSE = mean_squared_error(y, yhat)*n
ESS = sum(residuals**2)
TSS = ESS + SSE
MSE = SSE/n
RMSE = sqrt(MSE)
return {
'sse': SSE,
'mse': SSE / n,
'rmse': RMSE,
'ess': ESS,
'tss': TSS
}
def baseline_mean_errors(yhat,y):
SSE_baseline = mean_squared_error(yhat, y)*len(y)
MSE_baseline = SSE_baseline/len(y)
RMSE_baseline = sqrt(MSE_baseline)
return SSE_baseline, MSE_baseline, RMSE_baseline
def better_than_baseline(y, yhat):
SSE_baseline = baseline_mean_errors(y)[0]
SSE_model = regression_errors(y, yhat)[0]
return SSE_model < SSE_baseline
def model_significance(ols_model):
r_squared = ols_model.rsquared
r_pval = ols_model.f_pvalue
return r_squared, r_pval