-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfifa20.Rmd
154 lines (109 loc) · 3.94 KB
/
fifa20.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
---
title: "Predictive model for FIFA 20"
output:
html_document:
toc: true
---
# Install packages
```{r eval=FALSE}
install.packages(c("DALEX", "modelStudio", "ingredients", "iBreakDown", "gbm"))
```
# Read data
Use the `players_20.csv` data from the kaggle website https://www.kaggle.com/stefanoleone992/fifa-20-complete-player-dataset.
It is available in the `DALEX` package.
```{r warning=FALSE, message=FALSE}
library(DALEX)
data <- fifa
```
# Feature selection
This dataset has 4 potential target variables and one categorical feature.
Let's select only the `value_eur` target and remove the `nationality` feature.
```{r warning=FALSE, message=FALSE}
data$wage_eur <- data$overall <- data$potential <- data$nationality <- NULL
```
# Feature engineering
Target value is skewed. Will be much easier to model sqrt(value).
```{r warning=FALSE, message=FALSE}
data$value_eur <- log10(data$value_eur)
```
# Create a gbm model
Let's use `gbm` library to create a `gbm` model with 250 trees 3 levels deep.
```{r warning=FALSE, message=FALSE}
set.seed(1313)
library("gbm")
model <- gbm(value_eur ~ . , data = data, n.trees = 300, interaction.depth = 4)
```
# Create a DALEX explainer
Let's wrap gbm model into a DALEX explainer.
```{r warning=FALSE, message=FALSE}
explainer <- DALEX::explain(model,
data = data[,-1],
y = 10^data$value_eur,
predict_function = function(m,x)
10^predict(m, x, n.trees = 300),
label = 'gbm')
```
# Feature Importance explainer
Calculate Feature Importnace explainer.
```{r warning=FALSE, message=FALSE}
library("ingredients")
fi <- ingredients::feature_importance(explainer, B=15)
plot(fi, max_vars = 9)
```
# Partial Dependency explainer
Calculate Partial Dependency explainer.
```{r warning=FALSE, message=FALSE}
pdp <- ingredients::partial_dependency(explainer, variables = "age")
plot(pdp)
```
# Ceteris Paribus explainer
Calculate Ceteris Paribus explainer.
```{r warning=FALSE, message=FALSE}
new_observation <- data["R. Lewandowski",]
cp <- ingredients::ceteris_paribus(explainer, new_observation = new_observation, variables = "age",
variable_splits = list(age = seq(18,45,0.1)))
plot(cp, subtitle = 'for Robert Lewandowski (GBM model)')
```
# Break Down explainer
Calculate Break Down explainer.
```{r warning=FALSE, message=FALSE}
library("iBreakDown")
bd <- iBreakDown::break_down(explainer, new_observation = new_observation)
plot(bd)
library("ggplot2")
library("scales")
plot(bd, digits = 0, max_features = 10, subtitle = 'for Robert Lewandowski') +
scale_y_continuous(labels = dollar_format(suffix = "€", prefix = ""), name = "Estimated value",
limits = 1000000*c(1, 75), breaks = 1000000*c(1,25,50,75))
sh <- iBreakDown::shap(explainer, new_observation = new_observation, B=15)
plot(sh, max_features = 9)
```
# modelStudio app
Create a `modelStudio` dashboard.
More resources: https://github.com/ModelOriented/modelStudio
```{r eval = FALSE, warning=FALSE, message=FALSE}
library("modelStudio")
# Use parallelMap to speed up the computation
options(
parallelMap.default.mode = "socket",
parallelMap.default.cpus = 4,
parallelMap.default.show.info = FALSE
)
# Pick observations
fifa_selected <- data[1:40, ]
# Make a studio for the model
fifa20_ms <- modelStudio(
explainer,
new_observation = fifa_selected,
parallel = TRUE,
rounding_function = signif, digits = 5,
options = ms_options(
#show_boxplot = FALSE,
margin_left = 175,
margin_ytitle = 110,
ms_title = "Interactive Model Studio for ⚽⚽⚽ FIFA 20 ⚽⚽⚽ (GBM model)"
))
fifa20_ms
# Save as HTML
r2d3::save_d3_html(fifa20_ms, file = "fifa20_ms.html")
```