-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCREDITSCORE.py
97 lines (70 loc) · 2.77 KB
/
CREDITSCORE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings("ignore",category=FutureWarning)
warnings.filterwarnings("ignore", category=DataConversionWarning)
df = pd.read_csv('bank.csv')
#Categorical Features
categorical_features = ['Gender','Education','Marital Status','Home Ownership']
encoder = OneHotEncoder(sparse=False)
encoded = encoder.fit_transform(df[categorical_features])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_features))
#Combining orignal with categorical
df = df.join(encoded_df)
df = df.drop(categorical_features,axis=1)
#Explanatory Data Analysis
#Pie chart of all the labels
value_counts = df[['Credit Score']].value_counts()
high = value_counts[0]
average = value_counts[1]
low = value_counts[2]
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
l = ['High', 'Average', 'Low']
s = [high,average,low]
ax.pie(s, labels = l,autopct='%1.2f%%')
#Histograms of all the features
df.hist()
#Variablity in the features
df.plot(kind ='density',subplots = True, layout =(14,3),sharex = False)
#?
sns.pairplot(df,hue='Credit Score')
#Splitting data into X and y
y = np.array(df[['Credit Score']])
X = np.array(df.drop(['Credit Score'],axis=1))
#Relation between features
plt.xlabel("Features")
plt.ylabel("Credit Score")
plt.scatter(df['Income'].values.reshape(-1,1),df['Number of Children'].values.reshape(-1,1),color='b')
plt.show()
#Scaling Data
scaler = StandardScaler()
X = scaler.fit_transform(X)
#Splitting Data into training and test sets
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
#Intializing, Training and making predictions on the model
model = KNeighborsClassifier(n_neighbors=2)
model.fit(x_train,y_train)
predictions = model.predict(x_test)
#Metrics
accuracy = accuracy_score(y_test,predictions)
report = classification_report(y_test,predictions)
cm = confusion_matrix(y_test,predictions)
print(f'-----------------------------------------------Classification Report--------------------------------------------------------\n{report}')
print('Accuracy Score',accuracy)
print('Confusion matrix\n',cm)
#Confusion matrix heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()