Skip to content

Latest commit

 

History

History
125 lines (98 loc) · 4.14 KB

regression_coeff_recovery.md

File metadata and controls

125 lines (98 loc) · 4.14 KB

#coeff recovery in regression analysis using brms

Here we show an example for a simulation where we generate data based on known 'true' coeff and recover thoese using brms. This simulation allows us to explore how sample-size and effect-size influence our uncertinty estimates (aka CI). Its also a first step before going to a power analysis. For each study: sample individual parameters based on known coeff and their sds. simulate data based on the individual parameters estimate the parameters based on the "observed" data

We can repeat this for a few studies, and then see what size of CI/pd we observe, and whether our point-estimate is close to the real values used to sample individual parameters.

#Aim: get the results of N studies trying to estimate a true effect (intercept and slope).
#the fixed and scale parameters are used to generate individual parameters for a specific study
#then this study estimates the fixed effect while also including CI and pd estimates.


rm(list=ls())
library(brms)
library(cmdstanr)
library(bayestestR)
library(dplyr)
library(lme4)
library(lmerTest)

####some preparations with individual parameters and data simulation----------------
#simulation configuration
Nsubjects    = 50
Ntrials      = 100
Nstudies     = 10


#function for data generation
generate_df = function(Nsubjects, Ntrials, eta) {
  df=data.frame()
  
  for (i in 1:Nsubjects){
    subject        = rep(i, each = Ntrials)
    trial          = seq(1, Ntrials)
    x0             = rep(1, each = Ntrials) #for the intercept
    x1             = sample.int(2,Ntrials,replace=T)-1 #condition coded 0 or 1
    beta_0         = rep(eta[i,1],Ntrials)
    beta_1         = rep(eta[i,2],Ntrials)
    y              = cbind(x0,x1)%*%eta[i,]+rnorm(Ntrials,0,1)
    
    df = rbind(df,data.frame(subject,trial,beta_0,beta_1,x1,y))
  }
  return(df)
}


#that's the model we'll be using
fit_prior = c(
  set_prior(
    prior = "normal(0,0.2)",
    class = "b",
    coef = "Intercept"
  ),
  set_prior(
    prior = "normal(0,0.2)",
    class = "b",
    coef = "x1")
)

model_empty <- brm(y ~ 0+Intercept+x1 +(1+x1 |subject),
                   data   = data.frame(subject=1,
                                       x1=1,
                                       y=1),
                   prior  = fit_prior,
                   backend= "cmdstanr",
                   chains = 1,
                   silent = 0)

#initialize data frames for simulation output
df_bayes=df_freq=list()
df_bayes[['x0']]=df_bayes[['x1']]=data.frame()
df_freq [['x0']]=df_freq [['x1']]=data.frame()



####simulate studies-----------------------

for (study in 1: Nstudies){
  #a current study begins here
  
  #we sample some participant from the population. the means are the 'fixed effects' and the sds are the 'random effect'.
  #note that this also has a covariance parameter for the association between the intercept and slope in the population
  mysigma = matrix(c(1.0,0.2,
                     0.2,1.0),
                   2,2) #varcov matrix where diag is the variance for each param, and off-diag is the covar
  
  eta    = MASS::mvrnorm(n     = Nsubjects,
                         mu    = c(0,.20),  #these are the fixed effects for the intercept and slope,
                         Sigma = mysigma       #variance-covariance matrix
                         )
  
  
  #then we 'run' the study and observe the data
  df=generate_df(Nsubjects,Ntrials,eta)
  
  #Bayes estimation
  model <- update(model_empty,
                  newdata   = df,
                     prior  = fit_prior,
                     backend= "cmdstanr",
                     iter   =1100,
                     warmup =1000,
                     chains = 10,
                     silent = 0,
                     refresh = 0,
                     cores=10)
  
  df_bayes[['x0']]=rbind(df_bayes[['x0']],describe_posterior(model,parameters='b_Intercept'))
  df_bayes[['x1']]=rbind(df_bayes[['x1']],describe_posterior(model,parameters= 'b_x1'))

  
  #freq estimation
  model = lmer(y~1+x1+(1+x1|subject),
               data=df) 
  df_freq[['x0']]=rbind(df_freq[['x0']],summary(model)$coefficients[1,])
  df_freq[['x1']]   =rbind(df_freq[['x1']],summary(model)$coefficients[2,])
}