-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathap.py
194 lines (157 loc) · 8.44 KB
/
ap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import os
import base64
from io import BytesIO
import re
import streamlit as st
import pandas as pd
import ydata_profiling
import plotly.express as px
from pycaret.regression import setup, pull
from streamlit_pandas_profiling import st_profile_report
from pdfminer.high_level import extract_text
from transformers import pipeline
from help import qadocument
from happytransformer import HappyTextToText, TTSettings
def pdf_to_text(pdf_file):
text = extract_text(pdf_file)
return text
def save_text_to_file(text, output_file_path):
with open(output_file_path, 'w', encoding='utf-8') as output_file:
output_file.write(text)
def read_file(filename):
with open(filename, 'r', encoding='utf-8') as file:
text_content = file.read()
return text_content
st.markdown("""
<style>
.title {
text-align: center;
}
</style>
""", unsafe_allow_html=True)
if os.path.exists('dataset.csv'):
df = pd.read_csv('dataset.csv', index_col=None)
else:
df = None
with st.sidebar:
st.image("https://students.ma/wp-content/uploads/2019/02/Logo_inpt.png")
st.markdown("<h1 class='title'>~~~~ Auto INPT ML ~~~~</h1>", unsafe_allow_html=True)
st.sidebar.header("Navigation : ")
choice = st.sidebar.radio("", ["Upload Dataset", "Data Analysis", "Data Visualization", "Machine Learning Models","Download the Model","Natural Language Processing"])
st.sidebar.info("This project application helps you to automate all the process of Machine Learning and NLP tasks aiming to reduce time and increase productivity. This project is made by Younes GUENDOUL. ")
if choice == "Upload Dataset":
st.markdown("<h1 class='title'>Upload Your Dataset</h1>", unsafe_allow_html=True)
file = st.file_uploader("Upload Your Dataset (CSV file)")
if file:
df = pd.read_csv(file, index_col=None)
df.to_csv('dataset.csv', index=None)
st.success("File uploaded successfully!")
st.dataframe(df)
if choice == "Data Analysis":
st.markdown("<h1 class='title'>Exploratory Data Analysis & Data Wrangling</h1>", unsafe_allow_html=True)
profile_df = df.profile_report()
st_profile_report(profile_df)
if choice == "Data Visualization":
st.markdown("<h1 class='title'>Interactive DashBoards</h1>", unsafe_allow_html=True)
st.subheader("Choose the Axis: ")
x = st.selectbox('x', df.columns)
y = st.selectbox('y', df.columns)
color = st.selectbox('color', df.columns)
if df[x].dtype != 'object' and df[y].dtype != 'object':
st.subheader('Data Visualization')
fig = px.scatter(df, x=x, y=y, color=color)
st.plotly_chart(fig)
else:
st.warning("Please select numeric columns for x, y")
if choice == "Machine Learning Models":
st.markdown("<h1 class='title'>Model Configuration</h1>", unsafe_allow_html=True)
st.subheader("Problem Type")
chosen_target = st.selectbox('Choose the Target Column', df.columns)
problem_type = st.selectbox('Choose the Problem Type', ['Regression', 'Classification', 'Clustering'])
if st.button('Run Modelling'):
if problem_type == 'Regression':
from pycaret.regression import setup, compare_models,pull, save_model
setup(df, target=chosen_target, verbose=False)
st.success('Setup Complete')
st.subheader('Model Comparison')
setup_df = pull()
st.dataframe(setup_df)
best_model = compare_models()
compare_df = pull()
st.dataframe(compare_df)
elif problem_type == 'Classification':
from pycaret.classification import setup, compare_models,pull, save_model
setup(df, target=chosen_target, verbose=False)
st.success('Setup Complete')
st.subheader('Model Comparison')
setup_df = pull()
st.dataframe(setup_df)
best_model = compare_models()
compare_df = pull()
st.dataframe(compare_df)
else:
from pycaret.clustering import setup,pull, save_model,create_model
setup(df, verbose=False)
st.success('Setup Complete')
st.subheader('Model Comparison')
setup_df = pull()
st.dataframe(setup_df)
best_model = create_model( 'dbscan')
compare_df = pull()
st.dataframe(compare_df)
save_model(best_model, 'best_model')
#************************************************************************************************************
if choice == "Download the Model":
st.markdown("<h1 class='title'>Download the best Model</h1>", unsafe_allow_html=True)
if os.path.exists('best_model.pkl'):
st.download_button('Download Model', 'best_model.pkl', file_name="best_model.pkl")
else:
st.warning("The model file 'best_model.pkl' does not exist.")
#*************************************************************************************************************
if choice == "Natural Language Processing":
st.markdown("<h1 class='title'>Natural Language Processing</h1>", unsafe_allow_html=True)
problem_type = st.selectbox('Choose the Problem Type', ['Document Q&A and Summarazation', 'Sentiment Analysis',"Text Classification", 'Spell Check'])
if problem_type == 'Document Q&A and Summarazation':
file = st.file_uploader("Select a PDF file to Upload", type=["pdf"])
if file is not None:
pdf_contents = file.read()
base64_pdf = base64.b64encode(pdf_contents).decode('utf-8')
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="600" type="application/pdf">'
st.markdown(pdf_display, unsafe_allow_html=True)
with open("uploaded_file.pdf", "wb") as f:
f.write(file.getvalue())
st.success("File saved successfully.")
user_input = st.text_area("Enter Your Question About The Document Here:", value="", height=100)
if st.button('answer'):
st.write(f"Chatbot: {qadocument('uploaded_file.pdf',user_input)}")
if st.button('summarize'):
st.write(f"Chatbot: {qadocument('uploaded_file.pdf','can you provide a summary of the document ?')}")
elif problem_type == 'Sentiment Analysis':
user_input = st.text_area("Enter Your Sentence Here to Predict the Emotion:", value="", height=100)
if st.button('Predict Sentiment'):
emotion = pipeline('sentiment-analysis', model='arpanghoshal/EmoRoBERTa')
sentiment_label = emotion(user_input)[0]['label']
sentiment_score = emotion(user_input)[0]['score']
st.write(f"The Sentiment is {sentiment_label} with a Score of {sentiment_score:.2f}")
elif problem_type == 'Text Classification':
user_input = st.text_input("Enter Your Sentence To Classify Here:")
words_for_class = st.text_input("Enter words (separated by commas) for Classification :")
words = words_for_class.split(',')
candidate_labels = [word.strip() for word in words]
if st.button('Classify Sentence'):
classifier = pipeline("zero-shot-classification",model="facebook/bart-large-mnli")
x = classifier(user_input, candidate_labels)
st.write(f"The sentence is classified as {x['labels'][0]} with a Score of {x['scores'][0]:.2f}")
elif problem_type == 'Spell Check':
#st.markdown("<h1 class='title'> NLP for Spell Check </h1>", unsafe_allow_html=True)
user_input = st.text_area("Enter Your Sentence To Correct Here:", value="", height=100)
if st.button('Correct'):
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)
result = happy_tt.generate_text(user_input, args=args)
st.write(f"Correction : {result.text}")
#************************************************************************************************************
#streamlit run ap.py
#https://www.onepointltd.com/wp-content/uploads/2020/03/inno2.png
#https://img.freepik.com/vetores-gratis/coroa-de-ouro-realista-3d-ilustracao-vetorial_97886-286.jpg?size=338&ext=jpg
#https://static.tildacdn.com/tild3263-6531-4764-b532-343666323531/brain1.png