-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFeaturePredictionForTurnover_Streamlit.py
209 lines (159 loc) · 7.36 KB
/
FeaturePredictionForTurnover_Streamlit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#import necessary packages
#for the web app
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
sb.set()
color = sb.color_palette()
import matplotlib as mpl
import pickle
from sklearn import preprocessing as pp
from sklearn import linear_model
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
from sklearn.decomposition import PCA
from sklearn.linear_model import LassoCV
from scipy.stats import wilcoxon
#title of the app
st.title('Top 3 features predicting employee turnover')
# Loading data
with open('lasso_output.pickle', 'rb') as f:
test_r_squared, lasso_alpha, lasso_coef = pickle.load(f)
with open('scaler.pickle', 'rb') as f:
scalerEmployeeResponse, scalerOccupancyTurnover = pickle.load(f)
with open('original_values.pickle', 'rb') as f:
TurnOver2, dataX2 = pickle.load(f)
with open('linear_output.pickle', 'rb') as f:
linear_r_squared_df, linear_coef_df = pickle.load(f)
#loading the dataframe
feature_set = pd.read_pickle("./feature_set.pkl")
turnOverRate = pd.read_pickle("./turnOverRate.pkl")
LocationCodeComprehensive = pd.read_pickle("./LocationCodeComprehensive.pkl")
location_pay = pd.read_pickle("./location_pay.pkl")
location_sd = pd.read_pickle("./location_sdS_df.pkl")
lasso_r_squared_df = pd.read_pickle("./lasso_r_squared_1000.pkl")
lasso_alpha_df = pd.read_pickle("./lasso_alpha_1000.pkl")
lasso_coef_df = pd.read_pickle("./lasso_coef_1000.pkl")
selected_features_interaction = pd.read_pickle("./selected_features_interaction.pkl")
#Rename the column
LocationCodeComprehensive.rename(columns = {'Unnamed: 2':'location code'}, inplace = True)
#Calculate mean
mean_coef=lasso_coef_df.mean()
#Chose top coefficients.
abs_mean_coef=abs(mean_coef)
top_coef=abs_mean_coef.sort_values(ascending=False)
#Put features together
all_features = pd.concat([feature_set,location_pay,location_sd], axis = 1)
#Side bar with choice of location
location = st.sidebar.selectbox(
'Select your location',
LocationCodeComprehensive)
#Index for current location
index = LocationCodeComprehensive==location
selected_features_interaction2=pd.DataFrame(data=selected_features_interaction)
x_linear = selected_features_interaction
#Calculate the mean for the coefficients
mean_linear_coef=linear_coef_df.mean()
PredictionMatrix=x_linear.multiply(mean_linear_coef)
PredictionValue=PredictionMatrix.sum(axis=1)
#User chose how much to reduce, figure out how much do we need to reduce in the log transformed scaled unit.
TurnOver2=TurnOver2.reset_index(drop=True)
dataX2=dataX2.reset_index(drop=True)
dataX2.columns = range(dataX2.shape[1])
base_turnover=TurnOver2[index]
mean_coef_reduced = mean_linear_coef.apply(lambda x: round(x, 2 - int(np.floor(np.log10(abs(x))))))
st.subheader("Feature 55: Provide excellent service")
st.write(mean_coef_reduced[0])
st.subheader("Feature 62: Chef's tenure")
st.write(mean_coef_reduced[1])
st.subheader("Feature 21: Managers avoid playing favorites")
st.write(mean_coef_reduced[2])
st.markdown('***')
st.subheader("Current and target turnover rate for your location (%)")
#slider for choosing the goal turnover rate
new_turnover = st.slider('Goal turn over rate (%)', 0, 400, 100)
new_turnover = new_turnover/100
#Plot the current and target turnover rate
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
sb.barplot(TurnOver2[index]*100, orient = 'v', ax =ax1)
ax1.set_ylabel('turnover rate (%)')
ax1.set_xlabel('current')
ax1.set_ylim((0,400))
sb.barplot(new_turnover*100, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
#new_turnover = base_turnover*(1-(percent_turnover/100))
log_new_turnover = np.log(new_turnover+0.1)
scaled_turnover = (log_new_turnover-scalerOccupancyTurnover.mean_[1])/scalerOccupancyTurnover.scale_[1]
current_turnover = turnOverRate[index]
turnover_change = scaled_turnover-current_turnover
#currently assumes we use the top component and have selected 3 features.
feature_change = turnover_change/(mean_linear_coef[0]+mean_linear_coef[3]*selected_features_interaction2.loc[LocationCodeComprehensive==location,1]+mean_linear_coef[4]*selected_features_interaction2.loc[LocationCodeComprehensive==location,2])
#bring it back to what it means in the raw data scale. The raw value of the feature has to be between 0 and 1.
new_feature = selected_features_interaction2.loc[LocationCodeComprehensive==location,0]+feature_change
new_feature_before_scale = (new_feature*scalerEmployeeResponse.scale_[55])+scalerEmployeeResponse.mean_[55]
new_feature_before_transform = 1.1-np.exp(0.1-new_feature_before_scale)
current_feature = dataX2.loc[LocationCodeComprehensive==location,55]
st.markdown('***')
st.subheader("Current and target scores for the (Q55: Provide excellent service)")
#Plot the current and target score
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
#current score
sb.barplot(current_feature.iloc[0], orient = 'v', ax =ax1)
ax1.set_ylabel('score (Q55) (%)')
ax1.set_xlabel('current')
ax1.set_ylim((0,1))
#TargetScore
#need to check if we reached the limit (score must be between 0 and 1)
TargetScore = new_feature_before_transform.iloc[0]
if TargetScore >= 0 and TargetScore <= 1:
sb.barplot(TargetScore, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
elif TargetScore < 0:
sb.barplot(0, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
st.write('Target score = 0 reached score limit')
elif TargetScore > 1:
sb.barplot(1, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
st.write('Target score = 1 reached score limit')
st.markdown('***')
#have option for the 2nd question.
feature_change2 = turnover_change/(mean_linear_coef[2]+mean_linear_coef[4]*selected_features_interaction2.loc[LocationCodeComprehensive==location,0]+mean_linear_coef[5]*selected_features_interaction2.loc[LocationCodeComprehensive==location,1])
#bring it back to what it means in the raw data scale. The raw value of the feature has to be between 0 and 1.
new_feature2 = selected_features_interaction2.loc[LocationCodeComprehensive==location,2]+feature_change2
new_feature_before_scale2 = (new_feature2*scalerEmployeeResponse.scale_[21])+scalerEmployeeResponse.mean_[21]
new_feature_before_transform2 = 1.1-np.exp(0.1-new_feature_before_scale2)
current_feature2 = dataX2.loc[LocationCodeComprehensive==location,21]
st.subheader("Current and target scores for the (Q21: Managers avoid playing favorites)")
#Plot the current and target score
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
#current score
sb.barplot(current_feature2.iloc[0], orient = 'v', ax =ax1)
ax1.set_ylabel('score (Q21) (%)')
ax1.set_xlabel('current')
ax1.set_ylim((0,1))
#TargetScore
#need to check if we reached the limit (score must be between 0 and 1)
TargetScore2 = new_feature_before_transform2.iloc[0]
if TargetScore2 >= 0 and TargetScore2 <= 1:
sb.barplot(TargetScore2, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
elif TargetScore2 < 0:
sb.barplot(0, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
st.write('Target score = 0 reached score limit')
elif TargetScore2 > 1:
sb.barplot(1, orient = 'v', color = 'red', ax = ax2)
ax2.set_xlabel('target')
st.pyplot()
st.write('Target score = 1 reached score limit')