-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample.py
104 lines (75 loc) · 2.54 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression, make_classification
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from rfe_cv import rfe_cv
def main():
n_samples = 200
n_features = 6
rng = np.random.RandomState(0)
X, y = make_regression(n_samples, n_features, random_state=rng)
regr = RandomForestRegressor(max_depth=2,
n_estimators=40,
random_state=rng,
)
vars_names = [f'var {x}' for x in range(n_features)]
df = pd.DataFrame(data=X, columns=vars_names)
df['y'] = y
# the scoring parameter is used in the scoring X no. of best features.
# the RFE phase is performed using sklearn's RFE.
rfe_cv(df, vars_names, 'y', regr,
cv=5,
scoring='r2',
figsize=(7, 4),
)
# Lets perform a classification
X, y = make_classification(n_samples,
n_features,
n_classes=2,
random_state=rng,
)
vars_names = [f'var {x}' for x in range(n_features)]
df = pd.DataFrame(data=X, columns=vars_names)
df['y'] = y
clf = RandomForestClassifier(max_depth=2,
n_estimators=40,
random_state=rng,
)
rfe_cv(df, vars_names, 'y', clf,
cv=5,
scoring='accuracy',
figsize=(7, 4),
)
# Two models in the same figure
clf = RandomForestClassifier(max_depth=2,
n_estimators=40,
random_state=rng,
)
fig1, fig2 = rfe_cv(df, vars_names, 'y', clf,
cv = 5,
scoring = 'accuracy',
figsize = (7, 4),
return_fig = True,
model_label = 'RF1',
)
clf = RandomForestClassifier(max_depth=None,
n_estimators=10,
random_state=rng,
)
rfe_cv(df, vars_names, 'y', clf,
cv = 5,
scoring = 'accuracy',
return_fig = False,
figs = [fig1, fig2],
model_label = 'RF2',
)
# if I dont want all features
rfe_cv(df, vars_names, 'y', clf,
cv = 5,
scoring = 'accuracy',
max_features = 3,
model_label = 'RF2a',
return_fig = False,
)
if __name__ == "__main__":
main()