#########################################
#           Exercises Bayes-2           #
#              N Schalken               #
#            25 March 2017              #
#########################################

#------------------------------------------------------------------------------------------------------------------------------------------------------------
#Install and load the blavaan package

install.packages("blavaan")
require("blavaan")
require("coda")

#------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP1: Do you understand the priors?

#First, we need to think about the prior distributions and hyperparameters for our model.
#For the current model, we need to think about 4 priors: the intercept, the two regression
#parameters and the residual variance. In Bugs/Jags they use tau (precision) instead of 
#sigma^2 (variance). Below, we will specify the sd of a normal distribution, which is the sqrt of
#sigma^2. To translate this sd to tau you first square it (to get to sigma^2), then you take
#the inverse by taking the quotient: 1/sigma^2. For example, if you want a prior variance of 100
#you specify a prior sd of sqrt(100) in the distribution plots below. To translate this to your
#Jags model, you do 1/variance = 1/100 = 0.01. Et voila!

#For this model, we want to specify Normal priors on the intercept and regression parameters,
#and a Gamma prior on tau (precision), which serves as the residual variance of the outcome variable.

#We created two small functions, so that it becomes easier to try out how different priors look like in practice.

#Input:
#Type = either "normal" or "gamma"
#Mean = the mean of the distribution
#Variance = the variance of the distribution
#Shape = for gamma: the shape of the distribution
#Scale = for gamma: the scale of the distribution
#sec.min .max step = make the interval shown on the x-axis smaller or wider and take smaller or larger steps
#label = label of the x-axis

#First run the function:
prior.plot <- function(type="normal", mean=0, variance=100, shape=0, scale=1, sec.min=-50, sec.max=50, step=.1, label=label) {
  x <- seq(sec.min, sec.max, by = step)
  
  if (type == "normal") {
    prior.d <- dnorm(x,mean = mean, sd = sqrt(variance))
  }
  
  if (type == "gamma") {
    prior.d <- dgamma(x,shape = shape, scale = scale)
  }
  
  plot(x, prior.d, type="l", xlab = label)
  abline(v = mean, col="red")
  abline(v = (mean+2*sqrt(variance)), col = "blue")
  abline(v = (mean-2*sqrt(variance)), col = "blue") 
}

# then, to obtain plot, use for example
par(mfrow=c(1,1))
prior.plot(type = "normal", mean = 10, variance = 100, label="intercept")

#Obtain the prior plots
par(mfrow=c(2,2))
prior.plot(type = "normal", mean = 10, variance = 5, label="intercept")
prior.plot(type = "normal", mean = .8, variance = 5, label="beta 1")
prior.plot(type = "normal",mean = 0, variance = 10, label="beta 2")
prior.plot(type = "gamma", shape=.5, scale=.5, sec.min=0, sec.max=0.5, step=.001, label="res.var")
par(mfrow=c(1,1))

#------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP2: Run the model and check for convergence
##Load data (make sure you set the working directory to the folder where your data is):

## To set your working directory to the right folder use: 
setwd("C:/your/folder/here")

setwd("D:/exercise-day-4")

#load data
data <- read.table("data_example.dat")

#Split data in vectors for each variable
#For this model, we will not use y1.
#Variables in the model:
#y2 = lag between planned PhD duration and actual PhD duration
#x1 = age of participant
#x2 = age squared

colnames(data)<- c("y1", "y2", "x1", "x2")

#Specify the regression model
model.regression <- '#regression
                    y2 ~ prior("dnorm(0.8, 0.2)")*x1 + prior("dnorm(0, 0.1)")*x2
                    
                    #variance
                    y2 ~~ prior("dgamma(.5, .5)")*y2

                    #intercept
                    y2 ~ prior("dnorm(10, 0.2)")* 1'


#Fit the regression model
fit.bayes <- blavaan(model.regression, data=data, n.chains = 2, burnin = 100, sample = 100, adapt=100)
summary(fit.bayes, fit.measures=TRUE, ci = TRUE)

#Plot
plot(fit.bayes, pars=1:4, plot.type="trace")

##Informative:
# beta[1,2,1] = Regression coefficient 'x1'
# beta[1,3,1] = Regression coefficient 'x2'
# psi[1,1,1] = Variance 
# alpha[1,1,1] = Intercept
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP 3

#Create a mcmc.list object, so the Gelman diagnostics and plots can be obtained.
mcmc.list <- blavInspect(fit.bayes, what="mcmc")
gelman.diag(mcmc.list)
par(mfrow=c(2,2))
gelman.plot(mcmc.list)

#Obtain the geweke diagnostic
geweke.diag(mcmc.list)

#Show all Geweke plots --> click zoom to see more details 
par(mfrow=c(4,2))
geweke.plot(mcmc.list, auto.layout=FALSE)
par(mfrow=c(1,1))

#Run the analysis with 500 iterations
fit.bayes2 <- blavaan(model.regression, data=data, n.chains = 2, burnin = 250, sample = 250, adapt=250)
summary(fit.bayes2, fit.measures=TRUE, ci = TRUE)

#Create the following function to compute bias
bias.blavaan <- function(model1, model2) {
  bias <- as.vector(1)
  for (i in 1:nrow(model1)) {
    bias[i] <- (model2[i,1]-model1[i,1])/model1[i,1]*100
  }
  
  bias <- cbind(bias, as.vector(model1[,1]), as.vector(model2[,1]))
  rownames(bias) <- c("beta1", "beta2","variance", "intercept")
  colnames(bias) <- c("% bias", "est model 1", "est model 2")
  print(bias)
  }

#Obtain the parameter estimates of the total summary, put them in an object and create matrices of them
summary1 <- blavInspect(fit.bayes, what="postmean")
summary2 <- blavInspect(fit.bayes2, what="postmean")
model1 <- as.matrix(summary1)
model2 <- as.matrix(summary2)

#Compute the bias between the two Bayesian Regression Models 
bias.blavaan(model1, model2)

#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP4

#Obtain histograms
par(mfrow=c(2,2))
plot(fit.bayes2, pars=1:4, plot.type="histogram")

#OR

mcmc.list.2 <- blavInspect(fit.bayes2, what="mcmc")
model.mcmc.2 <- as.matrix(mcmc.list.2)
par(mfrow=c(2,2))
hist(model.mcmc.2[,1], breaks = 50, main = "Intercept")
hist(model.mcmc.2[,2], breaks = 50, main = "Age")
hist(model.mcmc.2[,3], breaks = 50, main = "Age squared")
hist(model.mcmc.2[,4], breaks = 50, main = "Residual variance")


#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP5

#Obtain autocorrelation plots
par(mfrow(2,2))
plot(fit.bayes2, pars=1:4, plot.type="autocorr", col='blue')

#OR
par(mfrow(2,2))
autocorr.plot(mcmc.list.2)

#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP6

#Obtain density plots
par(mfrow(2,2))
plot(fit.bayes2, pars=1:4, plot.type="density")

#OR
par(mfrow(2,2))
plot(mcmc.list.2, trace=FALSE, density=TRUE)

#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP7

#Different priors
model.regression.2 <- '#regression
                    y2 ~ prior("dnorm(0.8, 0.2)")*x1 + prior("dnorm(0, 0.1)")*x2

                    #variances
                    y2 ~~ prior("dgamma(.001, .001)")*y2

                    #intercepts
                    y2 ~ prior("dnorm(10, 0.2)")* 1'

#Fit the regression model
fit.bayes.3 <- blavaan(model.regression.2, data=data, n.chains = 2, burnin = 250, sample = 250, adapt=250)
summary(fit.bayes.3, fit.measures=TRUE, ci = TRUE)

#Save the regression parameters of model 3 in the right format:
summary3 <- blavInspect(fit.bayes.3, what="postmean")
model3 <- as.matrix(summary3)

#Compare estimates of model 2 and model 3:
bias.blavaan(model2, model3)

#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#STEP8

#Different priors

model.regression.3 <- '#regression
                      y2 ~ prior("dnorm(0, 0.0000001)")*x1 + prior("dnorm(0, 0.0000001)")*x2

                      #variances
                      y2 ~~ prior("dgamma(.001, .001)")*y2

                      #intercepts
                      y2 ~ prior("dnorm(0, 0.0000001)")* 1'

#Fit the regression model
fit.bayes.4 <- blavaan(model.regression.3, data=data, n.chains = 2, burnin = 250, sample = 250, adapt=250)
summary(fit.bayes.4, fit.measures=TRUE, ci = TRUE)

#Save the regression parameters of model 4 in the right format:
summary4 <- blavInspect(fit.bayes.4, what="postmean")
model4 <- as.matrix(summary4)

#Compare estimates of model 2 and model 3:
bias.blavaan(model2, model4)

#####################################################################################################################################################################################################