-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
183 lines (168 loc) · 7.81 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import pandas as pd
import os
import streamlit as st
from ydata_profiling import ProfileReport
from streamlit_pandas_profiling import st_profile_report
from pycaret.classification import (
setup as classification_setup,
compare_models as classification_compare_models,
pull as classification_pull,
save_model as classification_save_model,
load_model as classification_load_model,
predict_model as classification_predict_model,
tune_model,
)
from pycaret.regression import (
setup as regression_setup,
compare_models as regression_compare_models,
pull as regression_pull,
save_model as regression_save_model,
load_model as regression_load_model,
predict_model as regression_predict_model,
)
from pycaret.clustering import (
setup as clustering_setup,
pull as clustering_pull,
create_model as clustering_create_model,
save_model as clustering_save_model,
load_model as clustering_load_model,
assign_model as clustering_assign_model,
predict_model as clustering_predict_model,
)
with st.sidebar:
st.image(
"https://i.etsystatic.com/41369585/r/il/9099ae/4698309797/il_1080xN.4698309797_m5ov.jpg"
)
st.title("AutoLearn")
choice = st.radio(
"Navigation", ["Upload", "Profiling", "ML", "Download", "Model Inference"]
)
st.info(
"This application allows you to build an automated machine learning pipeline using Streamlit, Pandas Profiling, and Pycaret. And it is damnright magic!"
)
if os.path.exists("sourcedata.csv"):
df = pd.read_csv("sourcedata.csv", index_col=False)
# Initialize session state for analysis_type
if "analysis_type" not in st.session_state:
st.session_state.analysis_type = None
if choice == "Upload":
st.title("Upload Your Data for Modeling")
file = st.file_uploader("Upload a CSV file", type=["csv"])
if file:
df = pd.read_csv(file, index_col=False)
df.to_csv("sourcedata.csv", index=False)
elif choice == "Profiling":
st.title("Automated Exploratory Data Analysis")
profile_report = ProfileReport(df)
st_profile_report(profile_report)
elif choice == "ML":
st.write("ML")
target = st.selectbox(
"Select Target Variable (Only for Regression and Classification)", df.columns
)
st.session_state.analysis_type = st.radio(
"Select Analysis Type", ["Regression", "Classification", "Clustering"]
)
if st.button("Run Model"):
if st.session_state.analysis_type == "Regression":
regression_setup(df, target=target)
setup_df = regression_pull()
st.info("This is the ML Experiment Settings")
st.dataframe(setup_df)
best_model = regression_compare_models()
compare_df = regression_pull()
st.info("This is the Model Comparison")
st.dataframe(compare_df)
regression_save_model(best_model, "best_model")
elif st.session_state.analysis_type == "Classification":
classification_setup(df, target=target)
setup_df = classification_pull()
st.info("This is the ML Experiment Settings")
st.dataframe(setup_df)
best_model = classification_compare_models(sort="AUC")
compare_df = classification_pull()
st.info("This is the Model Comparison")
st.dataframe(compare_df)
tuned_model = tune_model(best_model)
st.info("Fine tuned the best model...")
st.write(tuned_model)
classification_save_model(tuned_model, "best_model")
elif st.session_state.analysis_type == "Clustering":
best_model_name = None
best_silhouette = -1
clustering_setup(data=df, normalize=True, remove_multicollinearity=True)
setup_df = clustering_pull()
st.info("This is the Clustering Experiment Settings")
st.dataframe(setup_df)
# Clustering Models: ['kmeans', 'hclust', 'ap', 'meanshift', 'sc', 'dbscan', 'optics', 'birch']
# Models has predict_model functionality: ['kmeans', 'ap', 'birch']
# Models does not has predict_model functionality: ['hclust', 'meanshift','sc','dbscan', 'optics']
models = ["kmeans", "ap", "birch"]
all_metrics_df = pd.DataFrame()
for model_name in models:
# Train the clustering model using PyCaret
model = clustering_create_model(model_name)
print(model)
metrics_df = clustering_pull()
# Add the 'model_name' column to the metrics DataFrame
metrics_df["model_name"] = model_name
# Concatenate the current metrics DataFrame to the overall DataFrame
all_metrics_df = pd.concat(
[all_metrics_df, metrics_df], ignore_index=True
)
# Check if the silhouette score for the current model is better than the best so far
current_silhouette = all_metrics_df.loc[
all_metrics_df["model_name"] == model_name, "Silhouette"
].values[0]
if current_silhouette > best_silhouette:
best_silhouette = current_silhouette
best_model_name = model_name
all_metrics_df.reset_index(drop=True, inplace=True)
print(best_model_name)
all_metrics_df.set_index("model_name", inplace=True)
# Display the DataFrame
st.info("This is the Model Comparison")
st.dataframe(all_metrics_df)
model = clustering_create_model(best_model_name)
print(model)
clustering_save_model(model, "best_model")
elif choice == "Download":
with open("best_model.pkl", "rb") as f:
st.download_button("Download the Model", f, "trained_model.pkl")
elif choice == "Model Inference":
st.title("Upload Your Data for Predictions")
file = st.file_uploader("Upload a CSV file", type=["csv"])
if file and st.session_state.analysis_type:
df_inference = pd.read_csv(file, index_col=False)
# Display a message about the successful upload
st.success("CSV file uploaded successfully for predictions!")
# Load the best model saved in a .pkl file
if st.session_state.analysis_type == "Regression":
regression_model = regression_load_model("best_model")
st.success("Regression Best Model loaded successfully for predictions!")
predictions = regression_predict_model(regression_model, data=df_inference)
st.subheader("Predictions:")
st.write(predictions)
predictions.to_csv("predictions.csv", index=False)
st.success("Predictions saved to predictions.csv")
elif st.session_state.analysis_type == "Classification":
classification_model = classification_load_model("best_model")
st.success("Classification Best Model loaded successfully for predictions!")
predictions = classification_predict_model(
classification_model, data=df_inference
)
st.subheader("Predictions:")
st.write(predictions)
predictions.to_csv("predictions.csv", index=False)
st.success("Predictions saved to predictions.csv")
elif st.session_state.analysis_type == "Clustering":
clustering_model = clustering_load_model("best_model")
print(clustering_model)
st.success("Clustering Best Model loaded successfully for predictions!")
predictions = clustering_predict_model(
model=clustering_model, data=df_inference
)
st.subheader("Predictions:")
st.write(predictions)
predictions.to_csv("predictions.csv", index=False)
st.success("Predictions saved to predictions.csv")