-
Notifications
You must be signed in to change notification settings - Fork 0
/
oscars-bradley-terry-teams-multiyear.R
115 lines (85 loc) · 3.69 KB
/
oscars-bradley-terry-teams-multiyear.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
library(rstan)
library(ggplot2)
library(data.table)
# map (-infinity, infinty) -> (0, 1)
inv_logit <- function(u) 1 / (1 + exp(-u))
# map vector to vector with sum of zero
center <- function(u) u - sum(u) / length(u)
oscars <- read.table("data/oscars.csv", header = TRUE, sep = ",")
years <- seq(2006, 1987)
pvars <- c('DGA', 'DN', 'DP', 'DPrN', 'Gd') # "player" variables
pvars.c <- paste0(pvars, '.c')
Y <- length(years) # years
J <- length(pvars) # players per team
p <- 5 # teams per year
T <- Y * p # teams
K <- T * J # players
m <- p * (p - 1) # matches per year - team vs team within each year but no self-play
N <- Y * m # matches
# DD Best Director
min.year <- min(years) - 1
oscars.my <- oscars[oscars$Year > min.year &
oscars$DD == 1 &
oscars$Ch != 0, c("Year", "Name", "Movie", pvars, "Ch")]
oscars.my <- data.table(oscars.my)
oscars.my.orig <- oscars.my
oscars.my[, y:=ifelse(oscars.my$Ch == 2, 0, 1)]
lapply(seq_along(pvars), function(x) oscars.my[, pvars.c[x]:=center(get(pvars[x]))])
sigma <- 1 # scale of player ability variation
alpha <- c(t(oscars.my[, pvars.c, with=FALSE])) # player abilities for comparison plot
########################################################################################################
# Y = 10 years, m = 20 matches per year, J = 4 players per team, p = 5 teams per year, K = 200 players
players <- t(matrix(1:K, J, T)) # player IDs matrix for team0 & team1
team0.my <- array(NA, c(Y, m, J)) # players on team 0
team1.my <- array(NA, c(Y, m, J)) # players on team 1
alpha_team0.my <- array(NA, c(Y, m, J)) # ability of players on team 0
alpha_team1.my <- array(NA, c(Y, m, J)) # ability of players on team 1
for ( y in 1:Y ) {
team0 <- matrix(NA, m, J) # players on team 0
team1 <- matrix(NA, m, J) # players on team 1
alpha_team0 <- matrix(NA, m, J) # ability of players on team 0
alpha_team1 <- matrix(NA, m, J) # ability of players on team 1
n <- 1
for ( i in 1:p ) {
for ( j in 1:p ) {
if ( i != j ) {
k = i + p * (y - 1)
l = j + p * (y - 1)
team0[n, 1:J] <- as.matrix(players[k,])
team1[n, 1:J] <- as.matrix(players[l,])
alpha_team0[n, 1:J] <- as.matrix(oscars.my[Year %in% years[y]][i, pvars.c, with=FALSE])
alpha_team1[n, 1:J] <- as.matrix(oscars.my[Year %in% years[y]][j, pvars.c, with=FALSE])
n <- n + 1
}
}
}
team0.my[y,,] <- team0
team1.my[y,,] <- team1
alpha_team0.my[y,,] <- alpha_team0
alpha_team1.my[y,,] <- alpha_team1
}
y <- matrix(NA, Y, m)
for ( year in 1:Y ) {
for (n in 1:m) {
sum_alpha_team0 = sum(alpha_team0.my[year, n, 1:J])
sum_alpha_team1 = sum(alpha_team1.my[year, n, 1:J])
y[year, n] <- rbinom(1, 1, inv_logit(sum_alpha_team1 - sum_alpha_team0))
}
}
########################################################################################################
team_data <- list(K = K, J = J, N = m, Y = Y, team0 = team0.my, team1 = team1.my, y = y)
# approx 4 min to sample
team_model <- stan_model("oscars-bradley-terry-teams-multiyear.stan")
system.time( team_posterior <- sampling(team_model, data = team_data, init=0.5) )
print(team_posterior, c("sigma", "alpha"), probs=c(0.05, 0.5, 0.95))
alpha_hat <- rep(NA, K)
for (k in 1:K)
alpha_hat[k] <- mean(extract(team_posterior)$alpha[ , k])
team_bayes_fit_plot <-
ggplot(data.frame(alpha = alpha, alpha_hat = alpha_hat),
aes(x = alpha, y = alpha_hat)) +
geom_abline(slope = 1, intercept = 0, color="green", size = 2) +
geom_point(size = 2)
team_bayes_fit_plot
# R^2 correlation does not look great
# but it's clearly a y = x line of best fit