-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDC_dinamic_parameters_estimation.R
120 lines (99 loc) · 5.72 KB
/
DC_dinamic_parameters_estimation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#_______________________________________________________________________________
# About: ~ Parameters estimation for Dixon-Coles model (dinamic version)
#
# ~ For the i-th matchday (seconf half of the league--> i=20,...38)
# the list of optimal parameters will be saved as "par_list_i.RData"
# Notation "i" means that we want to estimate the model to predict the
# i-th matchday, so our training set will be from matchday 1 to (i-1)
#
# ~ Since we'll implement a profile log-likelihood approach, we estimate
# our parameters using (n) different values of xi (for each matchday)
#
# ~ par_list_i will have the following structure:
# (opt_par_xi0, opt_par_xi1, ... , opt_par_xin)
# where the generic opt_par_xij contains the optimal parameters of the
# model associated to the j-th value of xi. Its structure will be:
# (opt_par_xij$att, opt_par_xij$def, opt_par_xij$home, opt_par_xij$rho)
#_______________________________________________________________________________
#-------------------------------------------------------------------------------
# Import data, functions and libraries
serieA_2122<- read.csv("data/serieA_21-22.csv")
teams <- names(table(serieA_2122[,"HomeTeam"]))
source("functions/DC_tau.R")
source("functions/DC_relist_params.R")
source("functions/add_date_difference.R")
source("functions/loglike/DC_dinamic_loglike.R")
library(dplyr)
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
#........................OPTIMIZATION GLOBAL SETTINGS...........................
#-------------------------------------------------------------------------------
# Remember that we already estimated DC static parameters on the entire season
# To speed-up the optimization phase we will use them as initial guess
# So the optimization algorithm should converge to the solution faster!
# For our project purposes, we'll set hessian=F (we just need estimates, not s.e and CI)
# Initial guess
load("parameters/DC_static_parameters.RData")
parameters_guess<- list(att = DC_static_parameters$att[2:20],
def = DC_static_parameters$def[2:20],
home = as.numeric(DC_static_parameters$home),
rho = as.numeric(DC_static_parameters$rho)
)
# Notice that the number of parameters is 2n instead of 2n+2 because of the sum-to-zero costraints
# The function "DC_relist_params()" will manage this
# Optim preferences
user_dots <- list(maxit = 15,
method = "BFGS",
interval = "profile",
hessian = FALSE)
# Array of some different xi values
xi_values<- seq(from=0, to=0.01, by = 0.001)
# Notice that xi=0 corresponds to the static version of Dixon-Coles model!
#-------------------------------------------------------------------------------
#.........PARAMETERS ESTIMATION OVER THE SECOND HALF OF THE LEAGUE..............
#-------------------------------------------------------------------------------
for (i in 20:38){
cat("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
cat("Parameters estimation to predict matchday n.",i,"...\n")
cat("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
#---------------------------------------------------------------------------
# (1) Training and test set for the current matchday
training= serieA_2122[1:(10*(i-1)),]
test=serieA_2122[(10*i -9):(10*i),]
# Add date differencies to the training set
training<- add_date_difference(training,test)
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
# Empty list where optimal parameters for each matchday will be appended
par_list= list()
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
# (2) Iteration over all xi values
cat("----------------------------------------------------\n")
for(xi in xi_values){
cat("Model associated to xi = ",xi,"...\n")
# Get optimal parameters (associated to the current value of xi)
DC_dinamic_parameters_xi= optim(par = unlist(parameters_guess),
fn = DC_dinamic_loglike,
data=training,
xi= xi,
method = user_dots$method,
hessian = user_dots$hessian,
control = list(maxit = user_dots$maxit))$par
# Remember to re-list the estimated parameters
DC_dinamic_parameters_xi<-DC_relist_params(DC_dinamic_parameters_xi)
# Add them to parameters list
par_list= append(par_list,DC_dinamic_parameters_xi)
cat("Estimation using xi=",xi,"DONE!\n")
}
cat("----------------------------------------------------\n")
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
# Save optimal parameters of the current matchday
cat("....................................................\n")
cat("Saving parameters of matchday n.",i,"...\n")
cat("....................................................\n")
filepath= paste("parameters/DC_dinamic_parameters/par_list_",i,".RData",sep="")
save(par_list,file = filepath)
#---------------------------------------------------------------------------
}