04-normal-models.Rmd

# Normal Distribution Models

This chapter was mainly analytic derivations, but there was one section that did code so I show that in JAGS and Stan. 

## Stan Model for mean and variance unknown

The model for mean and variance unknown for normal sampling.

```{r chp4-dag-1, echo=FALSE,fig.align='center',fig.cap='DAG with for mean and variance unknown: Variance parameterization'}
knitr::include_graphics(paste0(w.d,'/dag/chp4-normal.png'),
                        auto_pdf = TRUE)
```

Or, alternatively,

```{r chp4-spec-1, echo=FALSE,fig.align='center',fig.cap='Model specification diagram for normal model'}
knitr::include_graphics(paste0(w.d,'/model-spec/chp4-normal.png'),
                        auto_pdf = TRUE)
```


```{r chp4-normal-fit, warnings=T, message=T,error=T, cache=TRUE}

model_normal <- '
data {
  int  N;
  real x[N];
  real mu0;
  real sigma0;
  real alpha0;
  real beta0;
}

parameters {
  real mu;
  real<lower=0> sigma;
}

model {
  x ~ normal(mu, sigma);
  mu ~ normal(mu0, sigma0);
  sigma ~ inv_gamma(alpha0, beta0);
}


'
# data must be in a list
mydata <- list(
  N = 10,
  x=c(91, 85, 72, 87, 71, 77, 88, 94, 84, 92),
  mu0 = 75,
  sigma0 = 50,
  alpha0 = 5,
  beta0 = 150
)

# start values 
start_values <- function(){
  list(mu=50, sigma=5)
}

# Next, need to fit the model
#   I have explicited outlined some common parameters
fit <- stan(
  model_code = model_normal, # model code to be compiled
  data = mydata,          # my data
  init = start_values,    # starting values
  chains = 4,             # number of Markov chains
  warmup = 1000,          # number of warm up iterations per chain
  iter = 5000,            # total number of iterations per chain
  cores = 2,              # number of cores (could use one per chain)
  refresh = 0             # no progress shown
)

# first get a basic breakdown of the posteriors
print(fit)

# plot the posterior in a
#  95% probability interval
#  and 80% to contrast the dispersion
plot(fit)

# traceplots
rstan::traceplot(fit, pars = c("mu", "sigma"), inc_warmup = TRUE)

# Gelman-Rubin-Brooks Convergence Criterion
ggs_grb(ggs(fit)) + 
   theme_bw() + theme(panel.grid = element_blank())

# autocorrelation
ggs_autocorrelation(ggs(fit)) + 
   theme_bw() + theme(panel.grid = element_blank())

# plot the posterior density
posterior <- as.matrix(fit)

plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")
mcmc_areas(
  posterior,
  pars = c("mu", "sigma"),
  prob = 0.8) + 
  plot_title

# bivariate plot
posterior <- as.data.frame(posterior)
p <- ggplot(posterior, aes(x=mu, y=sigma))+
  geom_point()+
  theme_bw()+
  theme(panel.grid = element_blank())
p

# I prefer a posterior plot that includes prior and MLE
MLE <- c(mean(mydata$x), sd(mydata$x))
prior_mu <- function(x){dnorm(x, 75, 50)}
x.mu <- seq(60.01, 120, 0.01)
prior.mu <- data.frame(mu=x.mu, dens.mu = prior_mu(x.mu))
prior_sig <- function(x){extraDistr::dinvgamma(x, 5, 150)}
x.sig <- seq(0.01, 60, 0.01)
prior.sig <- data.frame(sigma=x.sig, dens.sig = prior_sig(x.sig))
cols <- c("Posterior"="#0072B2", "Prior"="#E69F00", "MLE"= "black")#"#56B4E9", "#E69F00" "#CC79A7"

p1 <- ggplot()+
  geom_density(data=posterior,
               aes(x=mu, color="Posterior"))+
  geom_line(data=prior.mu,
            aes(x=x.mu, y=dens.mu, color="Prior"))+
  geom_vline(aes(xintercept=MLE[1], color="MLE"))+
  scale_color_manual(values=cols, name=NULL)+
  theme_bw()+
  theme(panel.grid = element_blank())

p2 <- ggplot()+
  geom_density(data=posterior,
               aes(x=sigma, color="Posterior"))+
  geom_line(data=prior.sig,
            aes(x=sigma, y=dens.sig, color="Prior"))+
  geom_vline(aes(xintercept=MLE[2], color="MLE"))+
  scale_color_manual(values=cols, name=NULL)+
  theme_bw()+
  theme(panel.grid = element_blank())

p1 + p2 + plot_layout(guides="collect")

```


## JAGS Model for mean and variance unknown (precision parameterization)

The model for mean and variance unknown for normal sampling.

```{r chp4-dag-2,echo=FALSE, fig.align='center', fig.cap='DAG with for mean and variance unknown: Precision parameterization'}
knitr::include_graphics(paste0(w.d,'/dag/chp4-normal-precision.png'),
                        auto_pdf = TRUE)
```

Or, alternatively,

```{r chp4-spec-2, echo=FALSE,fig.align='center',fig.cap='Model specification diagram for normal model with precision parameterization'}
knitr::include_graphics(paste0(w.d,'/model-spec/chp4-normal-precision.png'),
                        auto_pdf = TRUE)
```


Now for the computation using JAGS

```{r chp4-normal-jags, warnings=T, message=T, error=T, cache=TRUE}

# model code
jags.model <- function(){
  #############################################
  # Conditional distribution for the data
  #############################################
  
  for(i in 1:n){
    x[i] ~ dnorm(mu, tau)  	     # conditional distribution of the data
  } # closes loop over subjects
  
  
  #############################################
  # Define the prior distributions for the unknown parameters
  #	The mean of the data (mu)
  # The variance (sigma.squared) and precision (tau) of the data
  #############################################
  
  mu ~ dnorm(mu.mu, tau.mu)	     # prior distribution for mu
  
  mu.mu <- 75			               # mean of the prior for mu 
  sigma.squared.mu <- 50		     # variance of the prior for mu
  tau.mu <- 1/sigma.squared.mu 	 # precision of the prior for mu
  
  
  tau ~ dgamma(alpha, beta)		   # precision of the data
  sigma.squared <- 1/tau		     # variance of the data
  sigma <- pow(sigma.squared, 0.5) # taking square root
  
  nu.0 <- 10			               # hyperparameter for prior for tau
  sigma.squared.0 <- 30		       # hyperparameter for prior for tau
  
  alpha <- nu.0/2		             # hyperparameter for prior for tau
  beta <- nu.0*sigma.squared.0/2 # hyperparameter for prior for tau
    
}
# data
mydata <- list(
  n=10, 
  x=c(91, 85, 72, 87, 71, 77, 88, 94, 84, 92))


# starting values
start_values <- function(){
  list("mu"=75, "tau"=0.1)
}

# vector of all parameters to save
param_save <- c("mu", "tau", "sigma")

# fit model
fit <- jags(
  model.file=jags.model,
  data=mydata,
  inits=start_values,
  parameters.to.save = param_save,
  n.iter=4000,
  n.burnin = 1000,
  n.chains = 4,
  n.thin=1,
  progress.bar = "none")

print(fit)

# extract posteriors for all chains
jags.mcmc <- as.mcmc(fit)

R2jags::traceplot(jags.mcmc)

# gelman-rubin-brook
gelman.plot(jags.mcmc)

# convert to single data.frame for density plot
a <- colnames(as.data.frame(jags.mcmc[[1]]))
plot.data <- data.frame(as.matrix(jags.mcmc, chains=T, iters = T))
colnames(plot.data) <- c("chain", "iter", a)


plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")
mcmc_areas(
  plot.data,
  pars = c("mu"),
  prob = 0.8) + 
  plot_title

mcmc_areas(
  plot.data,
  pars = c("tau"),
  prob = 0.8) + 
  plot_title

mcmc_areas(
  plot.data,
  pars = c("sigma"),
  prob = 0.8) + 
  plot_title


# bivariate plot
p <- ggplot(plot.data, aes(x=mu, y=tau))+
  geom_point()+
  theme_bw()+
  theme(panel.grid = element_blank())
p


# I prefer a posterior plot that includes prior and MLE
MLE <- c(mean(mydata$x), 1/var(mydata$x))
prior_mu <- function(x){dnorm(x, 75, 50)}
x.mu <- seq(70.01, 100, 0.01)
prior.mu <- data.frame(mu=x.mu, dens.mu = prior_mu(x.mu))
prior_tau <- function(x){dgamma(x, 5, 150)}
x.tau <- seq(0.0001, 0.06, 0.0001)
prior.tau <- data.frame(tau=x.tau, dens.tau = prior_tau(x.tau))
cols <- c("Posterior"="#0072B2", "Prior"="#E69F00", "MLE"= "black")#"#56B4E9", "#E69F00" "#CC79A7"

p1 <- ggplot()+
  geom_density(data=plot.data,
               aes(x=mu, color="Posterior"))+
  geom_line(data=prior.mu,
            aes(x=x.mu, y=dens.mu, color="Prior"))+
  geom_vline(aes(xintercept=MLE[1], color="MLE"))+
  scale_color_manual(values=cols, name=NULL)+
  theme_bw()+
  theme(panel.grid = element_blank())

p2 <- ggplot()+
  geom_density(data=plot.data,
               aes(x=tau, color="Posterior"))+
  geom_line(data=prior.tau,
            aes(x=tau, y=dens.tau, color="Prior"))+
  geom_vline(aes(xintercept=MLE[2], color="MLE"))+
  scale_color_manual(values=cols, name=NULL)+
  theme_bw()+
  theme(panel.grid = element_blank())

p1 + p2 + plot_layout(guides="collect")

```