#####################################################################
#											LOGISTIC REGRESSIONS												#
#####################################################################

rm(list=ls())
library(psych)
library(graphics)
library(pROC)
library(randomForest)


###############################################################
#												PREPARATION													#
###############################################################

Daten  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)

library(reshape)
##Variable Labels
var.labels <- c("ST Interest Rate Diff. (n)", "LT Interest Rate Diff. (n)", "ST Interest Rate Diff.", "LT Interest Rate Diff.", "I/GDP (gr)(glo)" ,  "LT Interest Rate (n)", "ST Interest Rate (n)", "LT Interest Rate", "ST Interest Rate", "C/GDP", "I/GDP", "CA/GDP", "Bank Assets/GDP", "Loans/GDP", "Public Debt/GDP", "Exchange Rate (gr)", "GDP (gr)", "C (gr)",  "I (gr)", "CA (gr)", "Inflation", "Narrow Money (gr)", "Broad Money (gr)", "Stock Prices (gr)", "Bank Assets (gr)", "Loans (gr)", "Public Debt (gr)", "C/GDP (gr)", "CA/GDP (gr)", "Bank Assets/GDP (gr)", "Loans/GDP (gr)", "Exchange Rate (gap)", "GDP (gap)", "C (gap)", "I (gap)", "CA (gap)", "ST Interest Rate (n)(gap)", "LT Interest Rate (n)(gap)", "ST Interest Rate (gap)", "Narrow Money (gap)", "Broad Money (gap)", "Stock Prices (gap)", "Bank Assets (gap)", "Loans (gap)", "Public Debt (gap)", "Public Debt/GDP (gap)", "C/GDP (gap)", "I/GDP (gap)", "CA/GDP (gap)", "Bank Assets/GDP (gap)", "Loans/GDP (gap)", "Narrow Money/GDP (gap)", "Broad Money/GDP (gap)", "ST Interest Rate (n)(glo)", "LT Interest Rate (n)(glo)", "ST Interest Rate (glo)",  "LT Interest Rate (glo)", "Public Debt/GDP (glo)", "C/GDP (glo)", "I/GDP (glo)", "Real Exchange Rate (gr)", "CA/GDP (glo)", "Bank Assets/GDP (glo)", "Loans/GDP (glo)", "GDP (gr)(glo)", "C (gr)(glo)", "I (gr)(glo)", "CA (gr)(glo)", "Inflation (glo)", "Broad Money (gr)(glo)",  "Stock Prices (gr)(glo)", "Bank Assets (gr)(glo)", "Loans (gr)(glo)", "Public Debt (gr)(glo)", "C/GDP (gr)(glo)", "CA/GDP (gr)(glo)", "Bank Assets/GDP (gr)(glo)", "Loans/GDP (gr)(glo)", "GDP (gap)(glo)", "C (gap)(glo)", "I (gap)(glo)", "CA (gap)(glo)", "ST Interest Rate (n)(gap)(glo)", "ST Interest Rate (gap)(glo)", "LT Interest Rate (gap)(glo)", "Narrow Money (gap)(glo)", "Broad Money (gap)(glo)", "Stock Prices (gap)(glo)", "Bank Assets (gap)(glo)", "Loans (gap)(glo)", "Public Debt/GDP (gap)(glo)", "C/GDP (gap)(glo)", "I/GDP (gap)(glo)", "Real Exchange Rate (gap)", "CA/GDP (gap)(glo)", "Bank Assets/GDP (gap)(glo)", "Loans/GDP (gap)(glo)", "Exchange Rate (n)", "Real Exchange Rate", "LT Interest Rate (gap)", "Narrow Money (gr)(glo)", "LT Interest Rate (n)(gap)(glo)", "Broad Money/GDP (gr)(glo)", "Broad Money/GDP (gap)(glo)", "Public Debt (gap)(glo)", "Broad Money/GDP", "Broad Money/GDP (gr)", "Broad Money/GDP (gap)", "Broad Money/GDP (glo)", "b2","b3","b1","rec1","rec2","rec3", "year", "country")

## interaction-terms
ia_pub<-Daten$pdebt_gap*Daten$ltrate
Daten$ia_pub<-ia_pub
var.labels <- c(var.labels, "Public Burden")

ia_prb<-Daten$loans1_y_gap*Daten$ltrate
Daten$ia_prb<-ia_prb
var.labels <- c(var.labels, "Private Burden")

ia_jb<-Daten$loans1_y_gap*Daten$ltrate*Daten$pdebt_gap
Daten$ia_jb<-ia_jb
var.labels <- c(var.labels, "Joint Burden")

ia_lygr<-Daten$loans1_y*Daten$gr_rgdp
Daten$ia_lygr<-ia_lygr
var.labels <- c(var.labels, "Loans/GDP x GDP (gr)")

ia_pygr<-Daten$pdebt*Daten$gr_rgdp
Daten$ia_pygr<-ia_pygr
var.labels <- c(var.labels, "Public Debt/GDP x GDP (gr)")

ia_lyer<-Daten$loans1_y_gap*Daten$er_gap
Daten$ia_lyer<-ia_lyer
var.labels <- c(var.labels, "Loans/GDP (gap) x Exchange Rate (gap)")

## country factor
Daten$country.factor<-as.factor(Daten$ccode)

#throw out vars not used
drops <- names(Daten) %in% c("year") # true-false indicator: true at the names in vector
full <- Daten[!drops] # drops those variables which have true indication in "drops"



###############################################################
#														ANALYSIS   												#
###############################################################


### LOGIT ANALYSIS
##############################################################################################################
# variables
var.list <- c("loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap")
# interaction terms
ia <- c("ia_pub", "ia_prb", "ia_jb", "ia_lygr", "ia_pygr", "ia_lyer")
# single indicators
single.list <- list("loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap",  "ia_pub", "ia_prb", "ia_jb")
# total list
total.list <- c(var.list, ia)

out.list <- c("", "AUC", "95\\%-CI", "N")

# confidence intervals
n.ci <- 3
ci <- c(0.99, 0.95, 0.9)

regs <- 3 # number of regressions besides single indicators  
num_fe <- 1 # number of FEs															  

# table matrices
out <- matrix(nrow=length(single.list)+regs, ncol=8)
spec <- matrix(nrow=length(var.list)+length(ia)+num_fe+1, ncol=regs+1)
sym <-"\\checkmark"

# number of cross-validations
runs <- 100
##############################################################################################################



## SINGLE INDICATORS	
library(pROC)

aucs <- matrix(nrow=length(single.list)+regs, ncol=runs)
ci90 <- matrix(nrow=length(single.list)+regs, ncol=runs)
ci95_lo <- matrix(nrow=length(single.list)+regs, ncol=runs)
ci95_up <- matrix(nrow=length(single.list)+regs, ncol=runs)
ci99 <- matrix(nrow=length(single.list)+regs, ncol=runs)

N <- matrix(nrow=length(single.list)+regs, ncol=runs)
	
for (i in 1:length(single.list)) {
	
	indep <- paste(single.list[i], collapse="+") # indep. variables
	dep <- paste("b2~") # dep. variable
	fmla <- as.formula(paste(dep, indep)) # get formula
	
	position <- names(Daten) %in% c(indep) # update output table matrix
	indicator <- var.labels[position]
	out[i,6] <- indicator 
	
	for(j in 1:runs) {
	
		# Bootstrap - training, test sample
		set.seed(j)
		indexes = sample(1:nrow(full), size=0.632*nrow(full), replace=F)
		test = full[-indexes,]
		train = full[indexes,]	

		# Regression
		logit <- glm(fmla, data=train, family="binomial")
		N[i,j] <- logit$df.null

		# OOS-analysis
		pred<-predict(logit, newdata=test, type="response") # predicted outcome

		location <- names(test) %in% c("b2")
		name <- names(test[location]) # get names
		true<-test[,name] # real outcome

		r<-roc(true,pred, ci=T) # ROC analysis, ci=T gives AUC CIs (not ROC CIs)
		aucs[i,j] <- as.numeric(r$auc)
		
		ci90[i,j] <- as.numeric(ci.auc(r,conf.level=ci[3]))[1]
		ci95_lo[i,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
		ci95_up[i,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
		ci99[i,j] <- as.numeric(ci.auc(r,conf.level=ci[1]))[1]		

		}
	# use colMeans here, as the "as.matrix()" transformation turns the initial row(non)vector into a columnvector.
	# This is necessary, as the *Means commands only apply to matrices, but x[i, ] is not a matrix
	out[i, 5] <- as.numeric(colMeans(as.matrix(N[i, ]))) # update output table matrix
	
	out[i,2]<-as.numeric(colMeans(as.matrix(aucs[i, ])))
	out[i,1]<-as.numeric(colMeans(as.matrix(ci95_lo[i, ])))
	out[i,3]<-as.numeric(colMeans(as.matrix(ci95_up[i, ])))

	out[i,7]<-as.numeric(colMeans(as.matrix(ci90[i, ])))
	out[i,8]<-as.numeric(colMeans(as.matrix(ci99[i, ])))

	}

aucs
ci99
ci95_lo
ci95_up
ci90

out



## BASELINE
# get formula
location <- names(train) %in% c(var.list) # get location of vars
name <- names(train[location]) # get names
indep <- paste(name, collapse="+") # indep. variables
dep <- paste("b2~") # dep. variable
fmla <- as.formula(paste(dep, indep)) # get formula

out[length(single.list)+1,6] <- c("Baseline")
spec[1,1+1] <- c("Baseline")

for (i in 1:length(var.list)) {
spec[i+1, 1+1] <- sym
}

for(j in 1:runs) {
	
	# training, test sample
	set.seed(j)
	indexes = sample(1:nrow(full), size=0.632*nrow(full), replace=F)
	test = full[-indexes,]
	train = full[indexes,]
		
	# Regression
	logit<-glm(fmla, data=train, family="binomial")
	N[length(single.list)+1,j] <- logit$df.null

	# OOS-analysis
	pred<-predict(logit, newdata=test, type="response") # predicted outcome

	location <- names(test) %in% c("b2")
	name <- names(test[location]) # get names
	true<-test[,name] # real outcome

	library(pROC)
	r<-roc(true,pred,ci=T) # ROC analysis
	aucs[length(single.list)+1,j] <- as.numeric(r$auc)
		
	ci90[length(single.list)+1,j] <- as.numeric(ci.auc(r,conf.level=ci[3]))[1]
	ci95_lo[length(single.list)+1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[length(single.list)+1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
	ci99[length(single.list)+1,j] <- as.numeric(ci.auc(r,conf.level=ci[1]))[1]

}

out[length(single.list)+1, 5] <- as.numeric(colMeans(as.matrix(N[length(single.list)+1, ]))) # update output table matrix

out[length(single.list)+1,2]<-as.numeric(colMeans(as.matrix(aucs[length(single.list)+1, ])))
out[length(single.list)+1,1]<-as.numeric(colMeans(as.matrix(ci95_lo[length(single.list)+1, ])))
out[length(single.list)+1,3]<-as.numeric(colMeans(as.matrix(ci95_up[length(single.list)+1, ])))

out[length(single.list)+1,7]<-as.numeric(colMeans(as.matrix(ci90[length(single.list)+1, ])))
out[length(single.list)+1,8]<-as.numeric(colMeans(as.matrix(ci99[length(single.list)+1, ])))

out


## INTERACTIONS

# get formula
location <- names(train) %in% c(var.list, ia) # get location of vars
name <- names(train[location]) # get names
indep <- paste(name, collapse="+") # indep. variables
dep <- paste("b2~") # dep. variable
fmla <- as.formula(paste(dep, indep)) # get formula

out[length(single.list)+2,6]<- c("IA")
spec[1,2+1] <- c("IA")

for (i in 1:length(total.list)) {
spec[i+1, 2+1] <- sym
}

for(j in 1:runs) {
	
	# training, test sample
	set.seed(j)
	indexes = sample(1:nrow(full), size=0.632*nrow(full), replace=F)
	test = full[-indexes,]
	train = full[indexes,]
	
	# Regression
	logit<-glm(fmla, data=train, family="binomial")
	N[length(single.list)+2,j] <- logit$df.null

	# OOS-analysis
	pred<-predict(logit, newdata=test, type="response") # predicted outcome

	location <- names(test) %in% c("b2")
	name <- names(test[location]) # get names
	true<-test[,name] # real outcome

	r<-roc(true,pred,ci=T) # ROC analysis
	aucs[length(single.list)+2,j] <- as.numeric(r$auc)
		
	ci90[length(single.list)+2,j] <- as.numeric(ci.auc(r,conf.level=ci[3]))[1]
	ci95_lo[length(single.list)+2,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[length(single.list)+2,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
	ci99[length(single.list)+2,j] <- as.numeric(ci.auc(r,conf.level=ci[1]))[1]

}

out[length(single.list)+2, 5] <- as.numeric(colMeans(as.matrix(N[length(single.list)+2, ]))) # update output table matrix

out[length(single.list)+2,2]<-as.numeric(colMeans(as.matrix(aucs[length(single.list)+2, ])))
out[length(single.list)+2,1]<-as.numeric(colMeans(as.matrix(ci95_lo[length(single.list)+2, ])))
out[length(single.list)+2,3]<-as.numeric(colMeans(as.matrix(ci95_up[length(single.list)+2, ])))

out[length(single.list)+2,7]<-as.numeric(colMeans(as.matrix(ci90[length(single.list)+2, ])))
out[length(single.list)+2,8]<-as.numeric(colMeans(as.matrix(ci99[length(single.list)+2, ])))

out



## COUNTRY-FE & INTERACTIONS

# get formula
location <- names(train) %in% c(var.list, ia, "country.factor") # get location of vars
name <- names(train[location]) # get names
indep <- paste(name, collapse="+") # indep. variables
dep <- paste("b2~") # dep. variable
fmla <- as.formula(paste(dep, indep)) # get formula

out[length(single.list)+3,6] <- c("FE & IA")
spec[1,3+1] <- c("FE & IA")

for (i in 1:(length(total.list)+1)) {
spec[i+1,3+1] <- sym
}

for(j in 1:runs) {
	
	# training, test sample
	set.seed(j)
	indexes = sample(1:nrow(full), size=0.632*nrow(full), replace=F)
	test = full[-indexes,]
	train = full[indexes,]

	# Regression
	logit<-glm(fmla, data=train, family="binomial")
	N[length(single.list)+3,j] <- logit$df.null

	# OOS-analysis
	pred<-predict(logit, newdata=test, type="response") # predicted outcome

	location <- names(test) %in% c("b2")
	name <- names(test[location]) # get names
	true<-test[,name] # real outcome

	r<-roc(true,pred,ci=T) # ROC analysis
	aucs[length(single.list)+3,j] <- as.numeric(r$auc)
		
	ci90[length(single.list)+3,j] <- as.numeric(ci.auc(r,conf.level=ci[3]))[1]
	ci95_lo[length(single.list)+3,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[length(single.list)+3,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
	ci99[length(single.list)+3,j] <- as.numeric(ci.auc(r,conf.level=ci[1]))[1]

}

out[length(single.list)+3, 5] <- as.numeric(colMeans(as.matrix(N[length(single.list)+3, ]))) # update output table matrix

out[length(single.list)+3,2]<-as.numeric(colMeans(as.matrix(aucs[length(single.list)+3, ])))
out[length(single.list)+3,1]<-as.numeric(colMeans(as.matrix(ci95_lo[length(single.list)+3, ])))
out[length(single.list)+3,3]<-as.numeric(colMeans(as.matrix(ci95_up[length(single.list)+3, ])))

out[length(single.list)+3,7]<-as.numeric(colMeans(as.matrix(ci90[length(single.list)+3, ])))
out[length(single.list)+3,8]<-as.numeric(colMeans(as.matrix(ci99[length(single.list)+3, ])))

out
spec

save.image("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/Logit_MCCV") 

###############################################################
#															TABLES													#
###############################################################
load("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/Logit_MCCV")

library(xtable)

#SPECIFICATION TABLE (always use double the amount of backslashes needed in latex)
for (i in 1:length(total.list)) { # to bring row-entries into right order
position <- names(Daten) %in% c(total.list[i])
indicator <- var.labels[position]
spec[1+i,1] <- indicator
}

spec[(length(var.list)+length(ia)+num_fe+1),1] <-c("Country-FE")

spec[1,4]<-c("FE \\& IA") # to allow the sanitization command to sanitize the \\checkmark symbol



#OUTPUT TABLE (always use double the amount of backslashes needed in latex)
# rearrange columns
out2<-out[,c(6,2,4,1,3,5,7,8)] 

# reformat decimals
out2[,2:8]<-round(as.numeric(out2[,2:8]), digits=2)
out2[,6]<-round(as.numeric(out2[,6]), digits=0) # n.obs


# add asterisks for significance
for (i in 1:nrow(out2)){
	if(out2[i,7]>0.5 & out2[i,4]<=0.5) {
		out2[i,2] <- paste(out2[i,2], "$^{\\dagger} $",collapse="")
	}
}

for (i in 1:nrow(out2)){
	if(out2[i,4]>0.5) {
		out2[i,2] <- paste(out2[i,2], "*",collapse="")
	}
}

for (i in 1:nrow(out2)){
	if(out2[i,8]>0.5) {
		out2[i,2] <- paste(out2[i,2], "*",collapse="")
	}
}

cis<-paste(out2[,4], out2[,5], sep=",")
cis<-paste("[", cis, sep="")
cis<-paste(cis, "]", sep="")
out3 <- out2[,c(1,2,3,6)] # leave out .9, .99 lower-ci columns
out4 <- cbind(out3,cis)
out5 <- out4[,c(1,2,5,4)] # leave out s.e. column
outF <- rbind(out.list, out5)
# get rid of row and columnnames
x <- data.frame(outF)
outF<-as.matrix(x)
rownames(outF) <- rep("", nrow(outF))
colnames(outF) <- rep("", ncol(outF))

#COMBINED
comb<-rbind(outF,spec)
comb[14,1]<-c("FE \\& IA") # to allow the sanitization command to sanitize the \\checkmark symbol

mat3<-xtable(comb, align="lp{3.5cm}ccc", caption="Logit EWS", label="tab:logit_out") # for whatever reason need one column more than i actually want (added "l" to left)

print(mat3, type="latex", caption.placement="top", hline.after=c(-1,nrow(mat3)), sanitize.text.function = function(x){x}, file="/Users/felixward/Dropbox/CrisisPrediction/Written/logit.txt", replace=T, floating=F, booktabs=T, include.colnames=F, include.rownames=F, add.to.row=list(pos=list(0,0,1,11,14,14,14,14,15,22,28), 
command=c(" \\\\ \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Results}} \\\\",
"  \\cmidrule(l r){2-4} \\\\",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Bivariate}} \\\\",
" \\\\ \\cdashline{2-4} \\multicolumn{4}{l}{\\textbf{Multivariate}} \\\\ ",
"  \\multicolumn{4}{c}{} \\\\",
"  \\multicolumn{4}{c}{} \\\\",
" \\\\ \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Specification}} \\\\",
"  \\cmidrule(l r){2-4} \\\\",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Variables}} \\\\  ",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Interaction Terms}} \\\\  ",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Fixed Effects}} \\\\   ")))


outF[14,1]<-c("FE \\& IA") # to allow the sanitization command to sanitize the \\checkmark symbol
mat_out<-xtable(outF, align="lp{5cm}ccc", caption="Logit EWS", label="tab:logit_out") # for whatever reason need one column more than i actually want (added "l" to left)

print(mat_out, type="latex", caption.placement="top", hline.after=c(), sanitize.text.function = function(x){x}, file="/Users/felixward/Dropbox/CrisisPrediction/Written/logit_out.txt", replace=T, floating=F, booktabs=T, include.colnames=F, include.rownames=F, add.to.row=list(pos=list(0,0,1,11), 
command=c(" \\\\ \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Results}} \\\\",
"  \\cmidrule(l r){2-4} \\\\",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Bivariate}} \\\\",
" \\\\ \\cdashline{2-4} \\multicolumn{4}{l}{\\textbf{Multivariate}} \\\\ ")))


mat_spec<-xtable(spec, align="lp{5cm}ccc", caption="Multivariate Logit Specifications", label="tab:logit_spec") # for whatever reason need one column more than i actually want (added "l" to left)

print(mat_spec, type="latex", caption.placement="top", hline.after=c(), sanitize.text.function = function(x){x}, file="/Users/felixward/Dropbox/CrisisPrediction/Written/logit_spec.txt", replace=T, floating=F, booktabs=T, include.colnames=F, include.rownames=F, add.to.row=list(pos=list(0,0,1,8,14), 
command=c(" \\\\ \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Specification}} \\\\",
"  \\cmidrule(l r){2-4} \\\\",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Variables}} \\\\  ",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Interaction Terms}} \\\\  ",
"  \\\\ \\cdashline{2-4} \\multicolumn{1}{l}{\\textbf{Fixed Effects}} \\\\   ")))




