####################################################################################### ####################################################################################### ####################################################################################### ###################### ###################### EXERCISE WAMBS CHECKLIST ###################### ###################### by Rens van de Schoot and Sonja Winter ###################### ####################################################################################### ####################################################################################### ####################################################################################### #Install package rjags install.packages("rjags") require("rjags") ###################### CHOOSING PRIORS ############################################## #First, we need to think about the prior distributions and hyperparameters for our model. #For the current model, we need to think about 4 priors: the intercept, the two regression #parameters and the residual variance. In Bugs/Jags they use tau (precision) instead of #sigma^2 (variance). Below, we will specify the sd of a normal distribution, which is the sqrt of #sigma^2. To translate this sd to tau you first square it (to get to sigma^2), then you take #the inverse by taking the quotient: 1/sigma^2. For example, if you want a prior variance of 100 #you specify a prior sd of sqrt(100) in the distribution plots below. To translate this to your #Jags model, you do 1/variance = 1/100 = 0.01. Et voila! #For this model, we want to specify Normal priors on the intercept and regression parameters, #and a Gamma prior on tau (precision), which serves as the residual variance of the outcome variable. #We created two small functions, so that it becomes easier to try out how different priors look like in practice. #Input: #Type = either "normal" or "gamma" #Mean = the mean of the distribution #Variance = the variance of the distribution #Shape = for gamma: the shape of the distribution #Scale = for gamma: the scale of the distribution #sec.min .max step = make the interval shown on the x-axis smaller or wider and take smaller or larger steps #label = label of the x-axis #First run the function: prior.plot <- function(type="normal", mean=0, variance=100, shape=0, scale=1, sec.min=-50, sec.max=50, step=.1, label=label) { x <- seq(sec.min, sec.max, by = step) if (type == "normal") { prior.d <- dnorm(x,mean = mean, sd = sqrt(variance)) } if (type == "gamma") { prior.d <- dgamma(x,shape = shape, scale = scale) } plot(x, prior.d, type="l", , xlab = label) abline(v = mean, col="red") abline(v = (mean+2*sqrt(variance)), col = "blue") abline(v = (mean-2*sqrt(variance)), col = "blue") } # then, to obtain plot, use for example prior.plot(type = "normal", mean = 10, variance = 100, label="intercept") #Plots of all four priors with a red line for the mean and blue lines for +/- 2 SD. par(mfrow=c(2,2)) prior.plot(type = "normal", mean = 10, variance = 5, label="intercept") prior.plot(type = "normal", mean = .8, variance = 5, label="beta 1") prior.plot(type = "normal",mean = 0, variance = 10, label="beta 2") prior.plot(type = "gamma", shape=.5, scale=.5, sec.min=0, sec.max=0.5, step=.001, label="res.var") par(mfrow=c(1,1)) ###################### RUNNING THE MODEL ############################################## ##Load data (make sure you set the working directory to the folder where your data is): ## To set your working directory to the right folder use: setwd("C:/your/folder/here") setwd("D:/exercise-day-4") #load data data <- read.table("data_example.txt") #Split data in vectors for each variable #For this model, we will not use y1. #Variables in the model: #y2 = lag between planned PhD duration and actual PhD duration #x1 = age of participant #x2 = age squared y1 <- data[,1] y2 <- data[,2] x1 <- data[,3] x2 <- data[,4] #Specify number of iterations and number of burn-in iterations (that will not be included #for posterior) iterations <- 200 burnin <- floor(iterations/2) chains <- 2 #Run JAGS model. The txt file holds the Bugs model input (excluding #data and inits parts). Data is a list of the vectors you specified above. #N.chains are the number of chains you specified above. #Note: If the following code doesn't work, maybe you should change your working directory again to #the location with the model_example.txt file. model.fit <- jags.model(file="model_example.txt", data=list(y=y2, x1=x1, x2=x2), n.chains = chains) #Run the model for a certain number of MCMC iterations and extract random samples from the #posterior distribution of parameters of interest. #First specify the model object and then c() the parameters you would like to have #information on. N.iter is the number of iterations to monitor. We specified this number above. model.samples <- coda.samples(model.fit, c("intercept", "beta1", "beta2", "sigma2"), n.iter=iterations) #Summary estimates of the parameters summary(window(model.samples, start = burnin)) #Diagnostic plots to check convergence for all parameters #Traceplot and posterior distribution plot(model.samples, trace=TRUE, density = FALSE) ###################### CONVERGENCE DIAGNOSTICS ############################# #Gelman-Rubin diagnostic #Look at the Upper CI/Upper limit this should be close to 1. #When autoburnin=FALSE, the burinin fase of the iterations will be included in the #calculation, resulting (normally) in a higher PSR value. #Pay close attention to the y-axis, even if it seems that the PSR goes up a lot, the y-axis shows that #the range is very small. gelman.diag(model.samples) gelman.diag(model.samples, autoburnin=FALSE) gelman.plot(model.samples, autoburnin=FALSE) #Geweke diagnostic #This outputs a z-score. A z-score higher than the absolute value of 1.96 is associated with a p-value #of < .05 (two-tailed). These values should therefore be lower than the absolute value of 1.96. geweke.diag(model.samples) #If geweke.diag indicates that the first and last part of a sample from a Markov chain are not drawn #from the same distribution, it may be useful to discard the first few iterations to see if the rest of the #chain has "converged". This plot shows what happens to Gewekeâ??s Z-score when successively larger #numbers of iterations are discarded from the beginning of the chain geweke.plot(model.samples) ###################### RUNNING THE MODEL WITH MORE ITERATIONS ############################# #Change the number of iterations and the burnin variable iterations <- 500 burnin <- floor(iterations/2) #Run JAGS model. The txt file holds the Bugs model input (excluding #data and inits parts). Data is a list of the vectors you specified above. #N.chains are the number of chains you specified above. model.fit.2 <- jags.model(file="model_example.txt", data=list(y=y2, x1=x1, x2=x2), n.chains = chains) #Run the model for a certain number of MCMC iterations and extract random samples from the #posterior distribution of parameters of interest. #First specify the model object and then c() the parameters you would like to have #information on. N.iter is the number of iterations to monitor. We specified this number above. model.samples.2 <- coda.samples(model.fit.2, c("intercept", "beta1", "beta2", "sigma2"), n.iter=iterations) # to compute relative bias: #save the output of the the two models #model 1 = 200 iterations; #model 2 = 500 iterations; sum.model.1 <- summary(window(model.samples, start = 100)) sum.model.2 <- summary(window(model.samples.2, start = 250)) #We created a function to quickly compute bias of parameter estimates between two models. bias.model <- function(model1, model2) { bias <- as.vector(1) for (i in 1:nrow(model1[[1]])) { bias[i] <- (model2[[1]][i,1]-model1[[1]][i,1])/model1[[1]][i,1]*100 } bias <- cbind(bias, as.vector(model1[[1]][,1]), as.vector(model2[[1]][,1])) rownames(bias) <- row.names(model1[[1]]) colnames(bias) <- c("% bias","est model 1", "est model 2") print(bias) } #Compare model with 500 iterations (model.2) to model with 200 iterations (model.1) bias.model(sum.model.2, sum.model.1) # diagnostics gelman.diag(model.samples.2, autoburnin=FALSE) gelman.plot(model.samples.2, autoburnin=FALSE) geweke.diag(model.samples.2) geweke.plot(model.samples.2) ###################### CHECKING THE HISTOGRAM ############################# #Histograms of the posterior distribution par(mfrow=c(2,2)) hist(model.samples.2[[1]][,1], breaks = 50, main = "Intercept") hist(model.samples.2[[1]][,2], breaks = 50, main = "Age") hist(model.samples.2[[1]][,3], breaks = 50, main = "Age squared") hist(model.samples.2[[1]][,4], breaks = 50, main = "Residual variance") par(mfrow=c(1,1)) ###################### CHECKING AUTOCORRELATIONS ############################# autocorr.plot(model.samples.2) ###################### CHECKING KERNEL DENSITY ############################# plot(model.samples.2, trace=FALSE, density = TRUE) ###################### AN ALTERNATIVE PRIOR FOR THE RESIDUALS ############################# #Change the number of iterations and the burnin variable if needed iterations <- 500 burnin <- floor(iterations/2) #Run JAGS model. The txt file holds the Bugs model input (excluding #data and inits parts). model.fit.3 <- jags.model(file="model_example_gamma.txt", data=list(y=y2, x1=x1, x2=x2), n.chains = chains) #Run the model: model.samples.3 <- coda.samples(model.fit.3, c("intercept", "beta1", "beta2", "sigma2"), n.iter=iterations) #Save the output; sum.model.3 <- summary(window(model.samples.3, start = 250)) #request for the posterior results summary(window(model.samples.3, start = burnin)) #and compare the relative bias: bias.model(sum.model.2, sum.model.3) ###################### RUNNING THE MODEL WITH NONINFORMATIVE PRIORS ############################# #Change the number of iterations and the burnin variable if needed iterations <- 500 burnin <- floor(iterations/2) #Run JAGS model. The txt file holds the Bugs model input (excluding #data and inits parts). model.fit.4 <- jags.model(file="model_example_noninf.txt", data=list(y=y2, x1=x1, x2=x2), n.chains = chains) #Run the model: model.samples.4 <- coda.samples(model.fit.4, c("intercept", "beta1", "beta2", "sigma2"), n.iter=iterations) #Diagnostic plots to check convergence for all parameters #Traceplot and posterior distribution plot(model.samples.4) #Histograms of the posterior distribution par(mfrow=c(2,2)) hist(model.samples.4[[1]][,1], breaks = 50, main = "Intercept") hist(model.samples.4[[1]][,2], breaks = 50, main = "Age") hist(model.samples.4[[1]][,3], breaks = 50, main = "Age squared") hist(model.samples.4[[1]][,4], breaks = 50, main = "Residual variance") par(mfrow=c(1,1)) #Autocorrelation plots autocorr.plot(model.samples.4) #Summary estimates of the parameters summary(window(model.samples.4, start = burnin)) #Gelman diagnostics gelman.diag(model.samples.4) gelman.plot(model.samples.4) #Geweke diagnostics geweke.diag(model.samples.4) geweke.plot(model.samples.4) #save the output of the the model sum.model.4 <- summary(window(model.samples.4, start = burnin)) #ask for posterior results summary(window(model.samples.4, start = burnin)) #Compare model with informative priors (model.2) to model with flat priors (model.3) bias.model(sum.model.2, sum.model.4)