-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIterativeImputer.py
35 lines (26 loc) · 962 Bytes
/
IterativeImputer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from numpy import isnan
from pandas import read_csv, DataFrame
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
# Load the data
df = read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/horse-colic.csv',
header=None,
na_values='?',)
# Show the first 5 rows of the data
df.head()
# Define X (Predictor variables) and y (Target variable)
dt = df.values
ix = [i for i in range(dt.shape[1]) if i != 27]
X, y = dt[:, ix], dt[:, 27]
# Show count of missing values of X (before imputation)
sum(isnan(X).flatten())
# Define imputer
imp = IterativeImputer()
# Fit and transform imputer on the dataset
Xtrans = imp.fit_transform(X)
# Show count of missing values of Xtrans (after imputation)
sum(isnan(Xtrans).flatten())
# Convert NumPy array to Pandas DataFrame
Xtrans = DataFrame(data=Xtrans)
# Show the first 5 rows of the data with imputed values
Xtrans.head()