-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcustom_sim.R
160 lines (107 loc) · 4.93 KB
/
custom_sim.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# custom sim with rordbeta
library(ordbetareg)
library(glmmTMB)
library(tidyverse)
# we'll use glmmTMB because it's much faster and we're only going to use it
# for point estimation
# parameters to loop over and get power curves for
treat_effects <- seq(0,1, by=0.2)
num_subjects <- seq(10,20,by=2)
num_measures_per_day <- seq(2,5,by=1)
num_days <- 10
# measure of how much variance there is within-subject over time
# equivalent to sd of random intercepts
sd_within_subject <- .2
# need to simulate dispersion (phi)
# higher values = more concentration (sort of like lower variance)
phi <- 1
# p-value threshold to use
p_threshold <- 0.05
# make a big grid
all_params <- expand.grid(treat_effects,num_subjects,
num_measures_per_day,
num_days,
sd_within_subject,
phi)
names(all_params) <- c("treat_effects","num_subjects",
"num_measures_per_day","num_days","sd_within_subject",
"phi")
# number of sims per parameter combination
# 100 would be a bit more conservative
sims <- 10
# parallel processing to make it run faster
# set to just vanilla lapply if on Windows
over_params <- parallel::mclapply(1:nrow(all_params), function(i) {
this_param <- slice(all_params, i)
# now loop over iterations
over_sims <- lapply(1:sims, function (s) {
# first draw random intercept for each respondent
var_int <- rnorm(n=this_param$num_subjects,
mean=0,sd=this_param$sd_within_subject)
# make vector with one intercept per participant
# multiple measures = multiple obs per respondent
var_int_all <- rep(var_int,each=this_param$num_measures_per_day*this_param$num_days)
# simulate outcome with function rordbeta given parameters
# first simulate predictor as random uniform
covar <- runif(n=this_param$num_subjects * this_param$num_measures_per_day * this_param$num_days,
min = 0,
max=1)
# note: no overall intercept, we assume it is zero but that could be changed
# logit function for linear model
linear_model <- plogis(this_param$treat_effects * covar +
var_int_all)
out_ordbeta <- rordbeta(n=this_param$num_subjects * this_param$num_measures_per_day * this_param$num_days,
mu = linear_model,
phi=this_param$phi)
# fit a glmmTMB model for speed
to_model <- tibble(out_ordbeta=out_ordbeta,
covar=covar,
subject=rep(1:this_param$num_subjects,
each=this_param$num_measures_per_day * this_param$num_days),
days=rep(1:this_param$num_days, times=this_param$num_measures_per_day*this_param$num_subjects))
# fit varying intercepts model if > 1 obs per respondent
if(this_param$num_measures_per_day>1 || this_param$num_days>1) {
glmtmb_fit <- glmmTMB(out_ordbeta ~ covar + (1|subject),data = to_model,
family=ordbeta)
} else {
glmtmb_fit <- glmmTMB(out_ordbeta ~ covar,data = to_model,
family=ordbeta)
}
# now get estimates and see if coef is significant and what the bias is
glm_sum <- summary(glmtmb_fit)
# simulation results
sim_res <- tibble(param_vals=i,
iteration=s,
treat_est=glm_sum$coefficients$cond['covar','Estimate'],
treat_pvalue=glm_sum$coefficients$cond['covar','Pr(>|z|)'],
treat_err=glm_sum$coefficients$cond['covar','Std. Error'],
true_treat=this_param$treat_effects,
true_phi=this_param$phi,
num_subjects=this_param$num_subjects,
num_measures_per_day=this_param$num_measures_per_day,
sd_within_subject=this_param$sd_within_subject,
p_threshold=p_threshold)
# power = p < threshold
sim_res <- mutate(sim_res,
treat_sig=as.numeric(treat_pvalue < p_threshold))
return(sim_res)
}) %>% bind_rows
return(over_sims)
},mc.cores=parallel::detectCores()) %>% bind_rows
# estimate power by param combination
power_est <- group_by(over_params, param_vals,true_treat,
true_phi,num_subjects, num_measures_per_day,
sd_within_subject,p_threshold) %>%
summarize(power_est=mean(treat_sig))
# plot power curve for varying N
# with different lines for different true treatment effects
# facet wrap by num_measures_per_day
library(ggplot2)
power_est %>%
mutate(num_measures_per_day=factor(num_measures_per_day),
true_treat=paste0("Effect size: ",true_treat)) %>%
ggplot(aes(y=power_est,
x=num_subjects)) +
geom_line(aes(linetype=num_measures_per_day,
group=num_measures_per_day)) +
facet_wrap(~true_treat)