#########################################
#           Exercises Bayes-1           #
#              N Schalken               #
#           10 February 2017            #
#########################################

#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
###############
# Exercise 1b #
###############

#STEP1: Download and load the package
require(blavaan)
require(lavaan)

#STEP2: Load data (make sure you set the working directory to the folder where your data is):

# To set your working directory to the right folder use:
# setwd("C:/your/folder/here")

#OR go to the Menu --> Session --> Set Working Directory --> Choose Directory

data <- read.table("regression.txt")

#STEP3: Give names to the variables in the dataset
colnames(data)<- c("y", "x1", "x2")

#STEP4: Specify the regression model
model.regression <- '#regression
                    y ~ x1 + x2

                    #variances
                    y ~~ y

                    #intercepts
                    y ~ 1'

#STEP5: 
fit <- lavaan(model.regression, data=data)
summary(fit, fit.measures=TRUE, ci = TRUE, rsquare=TRUE)

###############
# Exercise 1c #
###############

#STEP1: Run the Bayesian Regression
fit.bayes <- blavaan(model.regression, data=data)
summary(fit.bayes, fit.measures=TRUE, ci = TRUE, rsquare=TRUE)

#STEP2: Obtain plots of the Bayesian Regression
plot(fit.bayes, pars= 1:4, plot.type="density")
plot(fit.bayes, pars= 1:4, plot.type="autocorr")
plot(fit.bayes, pars= 1:4, plot.type="histogram")
plot(fit.bayes, pars= 1:4, plot.type="trace")

#---------------------------------------------------------------------------------------------------------------------------------------------
##############
# Exercise 2 #
##############

#STEP1: Load the new dataset and give a name to the variable
data.IQ <- read.table("data_IQ.txt")
colnames(data.IQ)<- c("IQ")

#STEP2: Specify the model
model.IQ1 <- '#variances
              IQ ~~ IQ

              #Intercepts
              IQ ~ 1'

#STEP3: Run the model using maximum likelihood estimation (ML)
fit.IQ1 <- lavaan(model.IQ1, data=data.IQ)
summary(fit.IQ1, ci = TRUE)

#STEP4: Rerun the model using Bayesian estimation
fit.IQ2 <- blavaan(model.IQ1, data=data.IQ)
summary(fit.IQ2, ci=TRUE)

#STEP5: Specifying an alternative prior for the mean IQ score. 
fit.IQ3 <- blavaan(model.IQ1, data=data.IQ, dp = dpriors(nu= "dnorm(100, 0.01"))
summary(fit.IQ3, ci=TRUE)

#STEP6: Specifying a new alternative prior for the mean IQ score. 
fit.IQ4 <- blavaan(model.IQ1, data=data.IQ, dp = dpriors(nu= "dnorm(, )"))
summary(fit.IQ4, ci=TRUE)

#STEP7: Specifying a new alternative prior for the mean IQ score.
fit.IQ5 <- blavaan(model.IQ1, data=data.IQ, dp = dpriors(nu= "dnorm(, )"))
summary(fit.IQ5, ci=TRUE)

#STEP8: Specifying a new alternative prior for the mean IQ score.
fit.IQ6 <- blavaan(model.IQ1, data=data.IQ, dp = dpriors(nu= "dnorm(, )"))
summary(fit.IQ6, ci=TRUE)

#STEP9: Specifying a new alternative prior for the mean IQ score.
fit.IQ7 <- blavaan(model.IQ1, data=data.IQ, dp = dpriors(nu= "dnorm(, )"))
summary(fit.IQ7, ci=TRUE)

#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
###############
# Exercise 3  #
###############


##PART1:
#Import the dataset
data3 <- read.table("studentpor.txt", sep = ",", header=TRUE)

#Recode the variable sex into numerical values
library('plyr')
data3$sex <- revalue(data3$sex, c("M"="0", "F"="1"))

#Load the packages for creating the plot
library(tidyr)
library(ggplot2)

#Create a regression model with 10 predictors 
regression3 <- '#regression
                G3 ~ G1 + G2 + absences + health + Walc + Dalc + goout + freetime + sex + studytime

                #variance
                G3 ~~ G3

                #intercept
                G3 ~  1'

#Run the specified regression model
fit.regression3 <- blavaan(regression3, data=data3, n.chains = 2, burnin = 10000, sample = 10000, adapt=10000)

#Obtain the results from the analysis
summary(fit.regression3)

##PART2 - VIOLIN PLOT:
# To create a violin plot with all predictors in it, you should take the following steps:

#STEP1: To create a violin plot, we first should obtain the 'mcmc.list', which is the list with all the iterations for each parameter. 
#After that, you create a matrix of the list.

mcmc.list <- blavInspect(fit.regression3, what="mcmc") 
iter <- as.matrix(mcmc.list)

#STEP2: The plot should consist of all 10 predictors, therefore we create vectors with the iterations for each predictor in the model. 
#After that, you combine all vectors in a long vector with all iterations for the 10 predictors.

b1 <- iter[,1]
b2 <- iter[,2]
b3 <- iter[,3]
b4 <- iter[,4]
b5 <- iter[,5]
b6 <- iter[,6]
b7 <- iter[,7]
b8 <- iter[,8]
b9 <- iter[,9]
b10 <- iter[,10]

b <- c(b1, b2, b3, b4, b5 ,b6, b7, b8, b9, b10)


#STEP3: Create an empty vector to store the regression coefficients in, in the next step.

beta <- vector()

#STEP4: With a for loop you fill the vector of STEP 3 with beta1, beta2, beta3, and so on:

for (i in 1:10) {
  beta[i] <- paste("beta", i, sep="")
}

#STEP5: Create a dataframe with in the first column the iterations, and in the second column the parameters that belong to the iterations.
#By repeating the beta's 20000 times each, you combine the iterations with the parameter where they belong to.

iterations <- data.frame(posterior=b, parameter= rep(beta, each=20000))

#STEP 6: Create the plot with the specified dataframe of STEP5:

ggplot(data = iterations, aes(y= posterior, x = parameter)) + 
  coord_flip() +
  geom_hline(yintercept=0, linetype="dashed") +
  scale_x_discrete(limits=rev(levels(iterations$parameter))) +
  geom_violin(adjust = 1, color="orange", alpha = 0, size=1.1) +
  stat_summary(fun.data=mean_sdl, 
               geom="pointrange", color="orange") +
  stat_summary(fun.y=median, geom="point", shape=5,size=4, color="blue") +
  theme_minimal() +
  labs(x = "Parameter", y="Posterior Value")

#Note: The regression coefficients are in the same order as you specified in the model, so beta1 = G1, beta2 = G2, beta3 = absences, and so on.

#Translate variance to tau
tau <- function(variance){
  1/variance
}

tau(10000) #Obtain tau for a variance of 10.000

#########################################################################################################################################################################