#####################################################################
#											RANDOM FORESTS 															#
#####################################################################
rm(list=ls())
library(psych)
library(graphics)
library(pROC)
library(randomForest)

###############################################################
#												PREPARATION													#
###############################################################

#Daten  <- read.table("/Users/felixward/Documents/Studium/Bonn/Research/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)
Daten  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class_post70_q.csv", sep=",", dec=".", header=TRUE)


# drop vars not used
hopr <- grep("hopr", names(Daten), value=T)
gap <- grep("gap", names(Daten), value=T)
gdp <- grep("gdp", names(Daten), value=T)
y <- grep("_y", names(Daten), value=T)
stocks <- grep("stocks", names(Daten), value=T)
stir <- grep("stir", names(Daten), value=T)
ltrate <- grep("ltrate", names(Daten), value=T)
glo <- grep("a_", names(Daten), value=T)
fliab <- grep("fliab", names(Daten), value=T)
er <- grep("er", names(Daten), value=T)
res <- grep("res", names(Daten), value=T)
tloans <- grep("tloans", names(Daten), value=T)
cpi <- grep("cpi", names(Daten), value=T)


drops <- names(Daten) %in% c("quarter", "year", "ccode", hopr, gdp, y, stocks, stir, ltrate,glo, fliab) # true-false indicator: true at the names in vector
saves <- names(Daten) %in% c(glo)
full <- Daten[!drops] # drops those variables which have true indication in "drops"
full <- cbind(Daten[glo], full)

# FULL SET: omit observations with missing values
full_om <- na.omit(full)
sum(full_om$b2)/8

# SELECTION SET:
sel.list <-c("b2","tloans_r_gap",  "tloans_r_gr", "a_fliab_r_gap",  "a_ltrate_r",  "a_gdp_r_gap", "cpi_gr",  "er_gap", "res_r_gap", "a_gdp_r_gr")
location <- names(full) %in% c(sel.list) # get location of independent var
name.sel <- names(full[location]) # get names of features
sel <- full[name.sel]
sel_om <- na.omit(sel)
sum(sel_om$b2)/8



#DATA for logit model
## interaction-terms for logit model

ia_prb<-Daten$tloans_y_gap*Daten$ltrate
Daten$ia_prb<-ia_prb

ia_lygr<-Daten$tloans_y*Daten$gdp_r_gr
Daten$ia_lygr<-ia_lygr

ia_lyer<-Daten$tloans_y_gap*Daten$er_gap
Daten$ia_lyer<-ia_lyer

## country factor
Daten$country.factor<-as.factor(Daten$ccode)

#throw out vars not used
drops.logit <- names(Daten) %in% c("year") # true-false indicator: true at the names in vector
full.logit <- Daten[!drops] # drops those variables which have true indication in "drops"

###############################################################
#														ANALYSIS   												#
###############################################################

### CLASSIFICATION-TREE ANALYSIS
##############################################################################################################
# variables
var.list <- c( "tloans_r_gap",  "tloans_r_gr", "a_fliab_r_gap",  "a_ltrate_r",  "a_gdp_r_gap", "cpi_gr",  "er_gap", "res_r_gap", "a_gdp_r_gr")

# model list
model.list <- c("Single Tree", "Bagging", "Random Forest")
model.list2 <- c("\\textbf{Parameter}","Single", "Bagging", "RF","Single", "Bagging", "RF" )

# variables (logit)
var.logit <- c( "tloans_r_gap",  "tloans_r_gr", "a_fliab_r_gap",  "a_ltrate_r",  "a_gdp_r_gap", "cpi_gr",  "er_gap", "res_r_gap", "a_gdp_r_gr")
# interaction terms (logit)
ia.logit <- c("ia_prb", "ia_lygr", "ia_lyer")


# parameter list
param.list <- c("B", "$ J_{try} $", "$ J $", "\\# of crises")
out.list <- c("\\textbf{Model}", "AUC", "95\\%-CI", "N", "", "AUC", "95\\%-CI", "N")

# miscellaneous non-independent
misc.list <- c("b2","b1","b3","rec1","rec2","rec3")

# table matrices
out <- matrix(nrow=3, ncol=9)
spec <- matrix(nrow=4, ncol=7)
sig_base <- matrix(nrow=3,ncol=2)
sig_pre <- matrix(nrow=2,ncol=2)
sig_many <- matrix(nrow=3,ncol=1)

# Bootstrap runs
runs <- 100

# confidence intervals
n.ci <- 3
ci <- c(0.99, 0.95, 0.9)
##############################################################################################################


#LOGIT
aucs <- matrix(nrow=1, ncol=runs)
ci95_lo <- matrix(nrow=1, ncol=runs)
ci95_up <- matrix(nrow=1, ncol=runs)

N <- matrix(nrow=1, ncol=runs)
	

# get formula
location <- names(full.logit) %in% c(var.logit, ia.logit,"country.factor") # get location of vars
name <- names(full.logit[location]) # get names
indep <- paste(name, collapse="+") # indep. variables
dep <- paste("b2~") # dep. variable
fmla <- as.formula(paste(dep, indep)) # get formula


for(j in 1:runs) {
	
	# training, test sample
	set.seed(j)
	indexes = sample(1:nrow(full.logit), size=0.632*nrow(full), replace=F)
	test = full.logit[-indexes,]
	train = full.logit[indexes,]
	
	# Regression
	logit<-glm(fmla, data=train, family="binomial")
	N[1,j] <- logit$df.null

	# OOS-analysis
	pred<-predict(logit, newdata=test, type="response") # predicted outcome

	location <- names(test) %in% c("b2")
	name <- names(test[location]) # get names
	true<-test[,name] # real outcome

	r<-roc(true,pred,ci=T) # ROC analysis
	aucs[1,j] <- as.numeric(r$auc)
		
	ci95_lo[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]

}

N <- as.numeric(colMeans(as.matrix(N[1, ]))) # update output table matrix

auc<-as.numeric(colMeans(as.matrix(aucs[1, ])))
ci95_lo<-as.numeric(colMeans(as.matrix(ci95_lo[1, ])))
ci95_up<-as.numeric(colMeans(as.matrix(ci95_up[1, ])))



# Representative logit model whose AUC equals the MCCV average

# training, test sample
set.seed(7)
indexes = sample(1:nrow(full.logit), size=0.632*nrow(full.logit), replace=F)
test = full.logit[-indexes,]
train = full.logit[indexes,]
	
# Regression
logit<-glm(fmla, data=train, family="binomial")

# OOS-analysis
pred<-predict(logit, newdata=test, type="response") # predicted outcome

true<-test[,"b2"] # real outcome

library(pROC)
r_log<-roc(true,pred,ci=F) # ROC analysis
r_log




## SINGLE TREE-selection
library(randomForest)

location <- names(sel_om) %in% c(var.list) # get location of dependent var
name.indep <- names(sel_om[location]) # get names of features
indep <- sel_om[name.indep]
location <- names(sel_om) %in% c("b2") # get location of dependent var
name.dep <- names(sel_om[location])
dep <- factor(sel_om[,"b2"]>0) # dep. var.

# Define matrices
aucs <- matrix(nrow=1, ncol=runs)
ci95_lo <- matrix(nrow=1, ncol=runs)
ci95_up <- matrix(nrow=1, ncol=runs)

for(j in 1:runs){
	set.seed(j)
	tree_selection= randomForest(indep, y=dep,
	 data=sel_om,
	 ntree=1,
	 replace=T, # bootstrapping (with replacement!)
	 mtry=(ncol(indep)), # all features except dependent variable
 
	 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
	 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 	2007))
	 nodesize=10 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
	 ) 
	tree_selection

	# OOS-analysis
	library(pROC)

	pred <- predict(tree_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with 		normvotes=T equals type="prob")

	true <- sel_om[,name.dep]

	r<-roc(true, pred, ci=T) # ROC analysis
	aucs[1,j] <- as.numeric(r$auc)		
	ci95_lo[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
}

out[1,1]<-model.list[1]
out[1,2]<-as.numeric(colMeans(as.matrix(aucs[1, ])))
out[1,3]<-as.numeric(colMeans(as.matrix(ci95_lo[1, ])))
out[1,4]<-as.numeric(colMeans(as.matrix(ci95_up[1, ])))
out[1,5]<-nrow(sel_om)
	
spec[1,2]<-tree_selection$ntree
spec[2,2]<-tree_selection$mtry


# Representative tree whose AUC equals the MCCV average
set.seed(9)
tree_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=1,
 replace=T, # bootstrapping (with replacement!)
 mtry=(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 	2007))
 nodesize=10 # fully grow trees (to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
tree_selection

# OOS-analysis
library(pROC)

pred <- predict(tree_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with 		normvotes=T equals type="prob")

true <- sel_om[,name.dep]

r_tree<-roc(true, pred, ci=T) # ROC analysis
r_tree


# compare ROCs
testobj <- roc.test(r_tree,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[1,1]<-testobj$p.value[1]




## BAGGING-selection
library(randomForest)

location <- names(sel_om) %in% c(var.list) # get location of independent var
name.indep <- names(sel_om[location]) # get names of features
location <- names(sel_om) %in% c("b2") # get location of dependent var
name.dep <- names(sel_om[location]) # get name of dep. var.
indep <- sel_om[name.indep]
dep <- factor(sel_om[name.dep]>0)

# grow trees
set.seed(1)
bagging_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_selection

# convergence diagnostic
palette("default")
plot(bagging_selection, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(bagging_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,name.dep]

r<-roc(true, pred, ci=T) # ROC analysis

out[2,1]<-model.list[2]
out[2,2] <- as.numeric(r$auc)
out[2,3]<-as.numeric(ci.auc(r,conf.level=0.95))[1]
out[2,4]<-as.numeric(ci.auc(r,conf.level=0.95))[3]
out[2,5]<-nrow(sel_om)

spec[1,3]<-bagging_selection$ntree
spec[2,3]<-bagging_selection$mtry
spec[3,3]<-ncol(indep)
spec[4,3]<-floor(sum(sel_om$b2)/8)

# compare ROCs
r_bag<-r
testobj <- roc.test(r_bag,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[2,1]<-testobj$p.value[1]

testobj <- roc.test(r_bag,r_tree,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[1,1]<-testobj$p.value[1]


## RF-selection
library(randomForest)

location <- names(sel_om) %in% c(var.list) # get location of independent var
name.indep <- names(sel_om[location]) # get names of features
location <- names(sel_om) %in% c("b2") # get location of dependent var
name.dep <- names(sel_om[location]) # get name of dep. var.
indep <- sel_om[name.indep]
dep <- factor(sel_om[name.dep]>0)

# grow trees
set.seed(1)
rf_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf_selection

# convergence diagnostic
palette("default")
plot(rf_selection, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(rf_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,name.dep]

r<-roc(true, pred, ci=T) # ROC analysis

out[3,1]<-model.list[3]
out[3,2] <- as.numeric(r$auc)
out[3,3]<-as.numeric(ci.auc(r,conf.level=0.95))[1]
out[3,4]<-as.numeric(ci.auc(r,conf.level=0.95))[3]
out[3,5]<-nrow(sel_om)

spec[1,4]<-rf_selection$ntree
spec[2,4]<-rf_selection$mtry

# compare ROCs
r_rf<-r
testobj <- roc.test(r_rf,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[3,1]<-testobj$p.value[1]

testobj <- roc.test(r_rf,r_bag,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[2,1]<-testobj$p.value[1]




## SINGLE TREE-full
library(randomForest)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# define matrices
aucs <- matrix(nrow=1, ncol=runs)
ci95_lo <- matrix(nrow=1, ncol=runs)
ci95_up <- matrix(nrow=1, ncol=runs)

for(j in 1:runs){
	set.seed(j)
	tree_full= randomForest(indep, y=dep,
	 data=full_om,
	 ntree=1,
	 replace=T, # bootstrapping (with replacement!)
	 mtry=(ncol(indep)), # all features except dependent variable
 
	 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
	 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 	2007))
	 nodesize=10 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
	 ) 
	tree_full

	# OOS-analysis
	library(pROC)

	pred <- predict(tree_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with 		normvotes=T equals type="prob")

	true <- full_om[,name.dep]

	r<-roc(true, pred, ci=T) # ROC analysis
	aucs[1,j] <- as.numeric(r$auc)		
	ci95_lo[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
}

out[1,6]<-as.numeric(colMeans(as.matrix(aucs[1, ])))
out[1,7]<-as.numeric(colMeans(as.matrix(ci95_lo[1, ])))
out[1,8]<-as.numeric(colMeans(as.matrix(ci95_up[1, ])))
out[1,9]<-nrow(full_om)
	
spec[1,5]<-tree_full$ntree
spec[2,5]<-tree_full$mtry


# compare ROCs
r_tree_m<-r
testobj <- roc.test(r_tree_m,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[1,2]<-testobj$p.value[1]

testobj <- roc.test(r_tree_m,r_tree,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_many[1,1]<-testobj$p.value[1]



## BAGGING-all variables
library(randomForest)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
bagging_full= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=ncol(indep), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_full

# convergence diagnostic
palette("default")
plot(bagging_full, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(bagging_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,name.dep]

r<-roc(true, pred, ci=T) # ROC analysis
out[2,6] <- as.numeric(r$auc)
out[2,7] <- as.numeric(ci.auc(r,conf.level=0.95))[1]
out[2,8] <- as.numeric(ci.auc(r,conf.level=0.95))[3]
out[2,9]<-nrow(full_om)


spec[1,6]<-bagging_full$ntree
spec[2,6]<-bagging_full$mtry
spec[3,6]<-ncol(indep)
spec[4,6]<-floor(sum(full_om$b2)/8)

# compare ROCs
r_bag_m<-r
testobj <- roc.test(r_bag_m,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[2,2]<-testobj$p.value[1]


testobj <- roc.test(r_bag_m,r_tree_m,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[1,2]<-testobj$p.value[1]


testobj <- roc.test(r_bag_m,r_bag,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_many[2,1]<-testobj$p.value[1]




## RANDOM FOREST
library(randomForest)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf_full= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf_full


# convergence diagnostic
palette("default")
plot(rf_full, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(rf_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,"b2"]

r<-roc(true, pred, ci=T) # ROC analysis
out[3,6] <- as.numeric(r$auc)
out[3,7] <- as.numeric(ci.auc(r,conf.level=0.95))[1]
out[3,8] <- as.numeric(ci.auc(r,conf.level=0.95))[3]
out[3,9]<-nrow(full_om)

spec[1,7]<-rf_full$ntree
spec[2,7]<-rf_full$mtry

# compare ROCs
r_rf_m<-r
testobj <- roc.test(r_rf_m,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[3,2]<-testobj$p.value[1]


testobj <- roc.test(r_rf_m, r_bag_m,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[2,2]<-testobj$p.value[1]


testobj <- roc.test(r_rf_m,r_rf,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_many[3,1]<-testobj$p.value[1]



out
spec

sig_base
sig_pre
sig_many

save.image("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/CT_post1970_q") 


###############################################################
#															TABLES													#
###############################################################
load("/Users/felixward/Dropbox/CrisisPrediction//DoFiles/CT_post1970_q")

library(xtable)

#SPECIFICATION TABLE (always use double the amount of backslashes needed in latex)
spec[,1] <- param.list

spec2<-rbind(model.list2,spec) #get model headers
# get rid of row and columnnames
x <- data.frame(spec2)
spec2<-as.matrix(x)
rownames(spec2) <- rep("", nrow(spec2))
colnames(spec2) <- rep("", ncol(spec2))


#OUTPUT TABLE (always use double the amount of backslashes needed in latex)

# reformat decimals
out2<-out
out2[,2:9]<-round(as.numeric(out2[,2:9]), digits=2)

# add symbols for significance

for (i in 1:nrow(sig_base)){
	if(sig_base[i,1]<=0.05) {
		out2[i,2] <- paste("\\textbf{",out2[i,2],"}",collapse="")
	}
	if(sig_base[i,2]<=0.05) {
		out2[i,6] <- paste("\\textbf{",out2[i,6],"}",collapse="")
	}	
}

# # for (i in 1:nrow(sig_pre)){
	# if(sig_pre[i,1]<=0.05) {
		# out2[i+1,2] <- paste(out2[i+1,2],"$^{\\ddagger}$",collapse="")
	# }
	# if(sig_pre[i,2]<=0.05) {
		# out2[i+1,6] <- paste(out2[i+1,6],"$^{\\ddagger}$",collapse="")
	# }	
# }

for (i in 1:nrow(sig_many)){
	if(sig_many[i,1]<=0.05) {
		out2[i,6] <- paste(out2[i,6],"$^{\\mathsection}$",collapse="")
	}	
}

# confidence intervals
cis<-paste(out2[,3], out2[,4], sep=",")
cis<-paste("[", cis, sep="")
cis<-paste(cis, "]", sep="")

cis2<-paste(out2[,7], out2[,8], sep=",")
cis2<-paste("[", cis2, sep="")
cis2<-paste(cis2, "]", sep="")

out3 <- out2[,c(1,2,5,6,9)] # leave out .9, .99 lower-ci columns
out4 <- cbind(out3,cis,cis2)
out5 <- out4[,c(1,2,6,3,4,7,5)] 
out6 <- out5[,1:4]
out7 <- cbind(out6,matrix(nrow=3, ncol=1))
out8 <- cbind(out7,out5[,5:7])
outF<-rbind(out.list,out8) #get model headers
# get rid of row and columnnames
x <- data.frame(outF)
outF<-as.matrix(x)
rownames(outF) <- rep("", nrow(outF))
colnames(outF) <- rep("", ncol(outF))

spec3 <- spec2[,1:4]
spec4 <- cbind(spec3,matrix(nrow=5, ncol=1))
specF <- cbind(spec4,spec2[,5:7])

#COMBINED
comb<-rbind(outF,specF)

mat3<-xtable(comb, align="llcccm{2.5cm}ccc", caption="CT-EWS", label="tab:CT_out") # for whatever reason need one column more than i actually want (added "l" to left)

print(mat3, type="latex", caption.placement="top", hline.after=c(-1,nrow(mat3)), sanitize.text.function = function(x){x}, file="/Users/felixward/Dropbox/CrisisPrediction/Written/CT_rob_post1970_q.txt", replace=T, floating=F, booktabs=T, include.colnames=F, include.rownames=F, add.to.row=list(pos=list(0,0,0,0,1,4,4,4,4,5), 
command=c(" \\multicolumn{1}{c}{} & \\multicolumn{7}{c}{\\textbf{Results}} \\\\",
"  \\cmidrule(l r){2-8} \\\\",
" \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Restricted Selection}} & \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Many Predictors}} \\\\",
"  \\cmidrule(l r){2-4} \\cmidrule(l r){6-8} \\\\",
" \\cdashline{1-8} \\\\",
" \\multicolumn{1}{c}{} & \\multicolumn{7}{c}{\\textbf{Specification}} \\\\",
"  \\cmidrule(l r){2-8} \\\\",
" \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Restricted Selection}} & \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Many Predictors}} \\\\",
"  \\cmidrule(l r){2-4}  \\cmidrule(l r){6-8} \\\\",
"  \\cdashline{1-8} \\\\")))






# Q4 Data only
load("/Users/felixward/Dropbox/CrisisPrediction//DoFiles/CT_post1970_q")


###############################################################
#												PREPARATION													#
###############################################################

#Daten  <- read.table("/Users/felixward/Documents/Studium/Bonn/Research/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)
Daten  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class_post70_q4.csv", sep=",", dec=".", header=TRUE)


# drop vars not used
hopr <- grep("hopr", names(Daten), value=T)
gap <- grep("gap", names(Daten), value=T)
gdp <- grep("gdp", names(Daten), value=T)
y <- grep("_y", names(Daten), value=T)
stocks <- grep("stocks", names(Daten), value=T)
stir <- grep("stir", names(Daten), value=T)
ltrate <- grep("ltrate", names(Daten), value=T)
glo <- grep("a_", names(Daten), value=T)
fliab <- grep("fliab", names(Daten), value=T)
er <- grep("er", names(Daten), value=T)
res <- grep("res", names(Daten), value=T)
tloans <- grep("tloans", names(Daten), value=T)
cpi <- grep("cpi", names(Daten), value=T)


drops <- names(Daten) %in% c("quarter", "year", "ccode", hopr, gdp, y, stocks, stir, ltrate,glo, fliab) # true-false indicator: true at the names in vector
saves <- names(Daten) %in% c(glo)
full <- Daten[!drops] # drops those variables which have true indication in "drops"
full <- cbind(Daten[glo], full)

# FULL SET: omit observations with missing values
full_om <- na.omit(full)
sum(full_om$b2)/2

# SELECTION SET:
sel.list <-c("b2","tloans_r_gap",  "tloans_r_gr", "a_fliab_r_gap",  "a_ltrate_r",  "a_gdp_r_gap", "cpi_gr",  "er_gap", "res_r_gap", "a_gdp_r_gr")
location <- names(full) %in% c(sel.list) # get location of independent var
name.sel <- names(full[location]) # get names of features
sel <- full[name.sel]
sel_om <- na.omit(sel)
sum(sel_om$b2)/8



#DATA for logit model
## interaction-terms for logit model

ia_prb<-Daten$tloans_y_gap*Daten$ltrate
Daten$ia_prb<-ia_prb

ia_lygr<-Daten$tloans_y*Daten$gdp_r_gr
Daten$ia_lygr<-ia_lygr

ia_lyer<-Daten$tloans_y_gap*Daten$er_gap
Daten$ia_lyer<-ia_lyer

## country factor
Daten$country.factor<-as.factor(Daten$ccode)

#throw out vars not used
drops.logit <- names(Daten) %in% c("year") # true-false indicator: true at the names in vector
full.logit <- Daten[!drops] # drops those variables which have true indication in "drops"

###############################################################
#														ANALYSIS   												#
###############################################################

### CLASSIFICATION-TREE ANALYSIS
##############################################################################################################
# variables
var.list <- c( "tloans_r_gap",  "tloans_r_gr", "a_fliab_r_gap",  "a_ltrate_r",  "a_gdp_r_gap", "cpi_gr",  "er_gap", "res_r_gap", "a_gdp_r_gr")

# model list
model.list <- c("Single Tree", "Bagging", "Random Forest")
model.list2 <- c("\\textbf{Parameter}","Single", "Bagging", "RF","Single", "Bagging", "RF" )

# variables (logit)
var.logit <- c( "tloans_r_gap",  "tloans_r_gr", "a_fliab_r_gap",  "a_ltrate_r",  "a_gdp_r_gap", "cpi_gr",  "er_gap", "res_r_gap", "a_gdp_r_gr")
# interaction terms (logit)
ia.logit <- c("ia_prb", "ia_lygr", "ia_lyer")


# parameter list
param.list <- c("B", "$ J_{try} $", "$ J $", "\\# of crises")
out.list <- c("\\textbf{Model}", "AUC", "95\\%-CI", "N", "", "AUC", "95\\%-CI", "N")

# miscellaneous non-independent
misc.list <- c("b2","b1","b3","rec1","rec2","rec3")

# table matrices
out <- matrix(nrow=3, ncol=9)
spec <- matrix(nrow=4, ncol=7)
sig_base <- matrix(nrow=3,ncol=2)
sig_pre <- matrix(nrow=2,ncol=2)
sig_many <- matrix(nrow=3,ncol=1)
sig_Q4 <- matrix(nrow=3, ncol=2)
# Bootstrap runs
runs <- 100

# confidence intervals
n.ci <- 3
ci <- c(0.99, 0.95, 0.9)
##############################################################################################################


#LOGIT
aucs <- matrix(nrow=1, ncol=runs)
ci95_lo <- matrix(nrow=1, ncol=runs)
ci95_up <- matrix(nrow=1, ncol=runs)

N <- matrix(nrow=1, ncol=runs)
	

# get formula
location <- names(full.logit) %in% c(var.logit, ia.logit,"country.factor") # get location of vars
name <- names(full.logit[location]) # get names
indep <- paste(name, collapse="+") # indep. variables
dep <- paste("b2~") # dep. variable
fmla <- as.formula(paste(dep, indep)) # get formula


for(j in 1:runs) {
	
	# training, test sample
	set.seed(j)
	indexes = sample(1:nrow(full.logit), size=0.632*nrow(full), replace=F)
	test = full.logit[-indexes,]
	train = full.logit[indexes,]
	
	# Regression
	logit<-glm(fmla, data=train, family="binomial")
	N[1,j] <- logit$df.null

	# OOS-analysis
	pred<-predict(logit, newdata=test, type="response") # predicted outcome

	location <- names(test) %in% c("b2")
	name <- names(test[location]) # get names
	true<-test[,name] # real outcome

	r<-roc(true,pred,ci=T) # ROC analysis
	aucs[1,j] <- as.numeric(r$auc)
		
	ci95_lo[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]

}

N <- as.numeric(colMeans(as.matrix(N[1, ]))) # update output table matrix

auc<-as.numeric(colMeans(as.matrix(aucs[1, ])))
ci95_lo<-as.numeric(colMeans(as.matrix(ci95_lo[1, ])))
ci95_up<-as.numeric(colMeans(as.matrix(ci95_up[1, ])))



# Representative logit model whose AUC equals the MCCV average

# training, test sample
set.seed(11)
indexes = sample(1:nrow(full.logit), size=0.632*nrow(full.logit), replace=F)
test = full.logit[-indexes,]
train = full.logit[indexes,]
	
# Regression
logit<-glm(fmla, data=train, family="binomial")

# OOS-analysis
pred<-predict(logit, newdata=test, type="response") # predicted outcome

true<-test[,"b2"] # real outcome

library(pROC)
r_logQ4<-roc(true,pred,ci=F) # ROC analysis
r_logQ4




## SINGLE TREE-selection
library(randomForest)

location <- names(sel_om) %in% c(var.list) # get location of dependent var
name.indep <- names(sel_om[location]) # get names of features
indep <- sel_om[name.indep]
location <- names(sel_om) %in% c("b2") # get location of dependent var
name.dep <- names(sel_om[location])
dep <- factor(sel_om[,"b2"]>0) # dep. var.

# Define matrices
aucs <- matrix(nrow=1, ncol=runs)
ci95_lo <- matrix(nrow=1, ncol=runs)
ci95_up <- matrix(nrow=1, ncol=runs)

for(j in 1:runs){
	set.seed(j)
	tree_selection= randomForest(indep, y=dep,
	 data=sel_om,
	 ntree=1,
	 replace=T, # bootstrapping (with replacement!)
	 mtry=(ncol(indep)), # all features except dependent variable
 
	 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
	 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 	2007))
	 nodesize=10 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
	 ) 
	tree_selection

	# OOS-analysis
	library(pROC)

	pred <- predict(tree_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with 		normvotes=T equals type="prob")

	true <- sel_om[,name.dep]

	r<-roc(true, pred, ci=T) # ROC analysis
	aucs[1,j] <- as.numeric(r$auc)		
	ci95_lo[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
}

out[1,1]<-model.list[1]
out[1,2]<-as.numeric(colMeans(as.matrix(aucs[1, ])))
out[1,3]<-as.numeric(colMeans(as.matrix(ci95_lo[1, ])))
out[1,4]<-as.numeric(colMeans(as.matrix(ci95_up[1, ])))
out[1,5]<-nrow(sel_om)
	
spec[1,2]<-tree_selection$ntree
spec[2,2]<-tree_selection$mtry


# Representative tree whose AUC equals the MCCV average
set.seed(9)
tree_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=1,
 replace=T, # bootstrapping (with replacement!)
 mtry=(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 	2007))
 nodesize=10 # fully grow trees (to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
tree_selection

# OOS-analysis
library(pROC)

pred <- predict(tree_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with 		normvotes=T equals type="prob")

true <- sel_om[,name.dep]

r_treeQ4<-roc(true, pred, ci=T) # ROC analysis
r_treeQ4


# compare ROCs
testobj <- roc.test(r_treeQ4,r_logQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[1,1]<-testobj$p.value[1]

testobj <- roc.test(r_treeQ4,r_tree,method="delong",alternative="less")
options("scipen"=10)
options()$scipen

sig_Q4[1,1]<-testobj$p.value[1]



## BAGGING-selection
library(randomForest)

location <- names(sel_om) %in% c(var.list) # get location of independent var
name.indep <- names(sel_om[location]) # get names of features
location <- names(sel_om) %in% c("b2") # get location of dependent var
name.dep <- names(sel_om[location]) # get name of dep. var.
indep <- sel_om[name.indep]
dep <- factor(sel_om[name.dep]>0)

# grow trees
set.seed(1)
bagging_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_selection

# convergence diagnostic
palette("default")
plot(bagging_selection, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(bagging_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,name.dep]

r<-roc(true, pred, ci=T) # ROC analysis

out[2,1]<-model.list[2]
out[2,2] <- as.numeric(r$auc)
out[2,3]<-as.numeric(ci.auc(r,conf.level=0.95))[1]
out[2,4]<-as.numeric(ci.auc(r,conf.level=0.95))[3]
out[2,5]<-nrow(sel_om)

spec[1,3]<-bagging_selection$ntree
spec[2,3]<-bagging_selection$mtry
spec[3,3]<-ncol(indep)
spec[4,3]<-floor(sum(sel_om$b2)/2)

# compare ROCs
r_bagQ4<-r
testobj <- roc.test(r_bagQ4,r_logQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[2,1]<-testobj$p.value[1]

testobj <- roc.test(r_bagQ4,r_treeQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[1,1]<-testobj$p.value[1]

testobj <- roc.test(r_bagQ4,r_bag,method="delong",alternative="less")
options("scipen"=10)
options()$scipen

sig_Q4[2,1]<-testobj$p.value[1]



## RF-selection
library(randomForest)

location <- names(sel_om) %in% c(var.list) # get location of independent var
name.indep <- names(sel_om[location]) # get names of features
location <- names(sel_om) %in% c("b2") # get location of dependent var
name.dep <- names(sel_om[location]) # get name of dep. var.
indep <- sel_om[name.indep]
dep <- factor(sel_om[name.dep]>0)

# grow trees
set.seed(1)
rf_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf_selection

# convergence diagnostic
palette("default")
plot(rf_selection, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(rf_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,name.dep]

r<-roc(true, pred, ci=T) # ROC analysis

out[3,1]<-model.list[3]
out[3,2] <- as.numeric(r$auc)
out[3,3]<-as.numeric(ci.auc(r,conf.level=0.95))[1]
out[3,4]<-as.numeric(ci.auc(r,conf.level=0.95))[3]
out[3,5]<-nrow(sel_om)

spec[1,4]<-rf_selection$ntree
spec[2,4]<-rf_selection$mtry

# compare ROCs
r_rfQ4<-r
testobj <- roc.test(r_rfQ4,r_logQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[3,1]<-testobj$p.value[1]

testobj <- roc.test(r_rfQ4,r_bagQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[2,1]<-testobj$p.value[1]

testobj <- roc.test(r_rfQ4,r_rf,method="delong",alternative="less")
options("scipen"=10)
options()$scipen

sig_Q4[3,1]<-testobj$p.value[1]


## SINGLE TREE-full
library(randomForest)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# define matrices
aucs <- matrix(nrow=1, ncol=runs)
ci95_lo <- matrix(nrow=1, ncol=runs)
ci95_up <- matrix(nrow=1, ncol=runs)

for(j in 1:runs){
	set.seed(j)
	tree_full= randomForest(indep, y=dep,
	 data=full_om,
	 ntree=1,
	 replace=T, # bootstrapping (with replacement!)
	 mtry=(ncol(indep)), # all features except dependent variable
 
	 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
	 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 	2007))
	 nodesize=10 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
	 ) 
	tree_full

	# OOS-analysis
	library(pROC)

	pred <- predict(tree_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with 		normvotes=T equals type="prob")

	true <- full_om[,name.dep]

	r<-roc(true, pred, ci=T) # ROC analysis
	aucs[1,j] <- as.numeric(r$auc)		
	ci95_lo[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[1]
	ci95_up[1,j] <- as.numeric(ci.auc(r,conf.level=ci[2]))[3]
}

out[1,6]<-as.numeric(colMeans(as.matrix(aucs[1, ])))
out[1,7]<-as.numeric(colMeans(as.matrix(ci95_lo[1, ])))
out[1,8]<-as.numeric(colMeans(as.matrix(ci95_up[1, ])))
out[1,9]<-nrow(full_om)
	
spec[1,5]<-tree_full$ntree
spec[2,5]<-tree_full$mtry


# compare ROCs
r_tree_mQ4<-r
testobj <- roc.test(r_tree_mQ4,r_logQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[1,2]<-testobj$p.value[1]

testobj <- roc.test(r_tree_mQ4,r_treeQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_many[1,1]<-testobj$p.value[1]

testobj <- roc.test(r_tree_mQ4,r_tree_m,method="delong",alternative="less")
options("scipen"=10)
options()$scipen

sig_Q4[1,2]<-testobj$p.value[1]



## BAGGING-all variables
library(randomForest)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
bagging_full= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=ncol(indep), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_full

# convergence diagnostic
palette("default")
plot(bagging_full, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(bagging_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,name.dep]

r<-roc(true, pred, ci=T) # ROC analysis
out[2,6] <- as.numeric(r$auc)
out[2,7] <- as.numeric(ci.auc(r,conf.level=0.95))[1]
out[2,8] <- as.numeric(ci.auc(r,conf.level=0.95))[3]
out[2,9]<-nrow(full_om)


spec[1,6]<-bagging_full$ntree
spec[2,6]<-bagging_full$mtry
spec[3,6]<-ncol(indep)
spec[4,6]<-floor(sum(full_om$b2)/2)

# compare ROCs
r_bag_mQ4<-r
testobj <- roc.test(r_bag_mQ4,r_logQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[2,2]<-testobj$p.value[1]


testobj <- roc.test(r_bag_mQ4,r_tree_mQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[1,2]<-testobj$p.value[1]


testobj <- roc.test(r_bag_mQ4,r_bagQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_many[2,1]<-testobj$p.value[1]


testobj <- roc.test(r_bag_mQ4,r_bag_m,method="delong",alternative="less")
options("scipen"=10)
options()$scipen

sig_Q4[2,2]<-testobj$p.value[1]


## RANDOM FOREST
library(randomForest)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf_full= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf_full


# convergence diagnostic
palette("default")
plot(rf_full, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(rf_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,"b2"]

r<-roc(true, pred, ci=T) # ROC analysis
out[3,6] <- as.numeric(r$auc)
out[3,7] <- as.numeric(ci.auc(r,conf.level=0.95))[1]
out[3,8] <- as.numeric(ci.auc(r,conf.level=0.95))[3]
out[3,9]<-nrow(full_om)

spec[1,7]<-rf_full$ntree
spec[2,7]<-rf_full$mtry

# compare ROCs
r_rf_mQ4<-r
testobj <- roc.test(r_rf_mQ4,r_logQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_base[3,2]<-testobj$p.value[1]


testobj <- roc.test(r_rf_mQ4, r_bag_mQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_pre[2,2]<-testobj$p.value[1]


testobj <- roc.test(r_rf_mQ4,r_rfQ4,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen

sig_many[3,1]<-testobj$p.value[1]


testobj <- roc.test(r_rf_mQ4,r_rf_m,method="delong",alternative="less")
options("scipen"=10)
options()$scipen

sig_Q4[3,2]<-testobj$p.value[1]


out
spec

sig_base
sig_pre
sig_many
sig_Q4

save.image("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/CT_post1970_q4") 


###############################################################
#															TABLES													#
###############################################################
load("/Users/felixward/Dropbox/CrisisPrediction//DoFiles/CT_post1970_q4")

library(xtable)

#SPECIFICATION TABLE (always use double the amount of backslashes needed in latex)
spec[,1] <- param.list

spec2<-rbind(model.list2,spec) #get model headers
# get rid of row and columnnames
x <- data.frame(spec2)
spec2<-as.matrix(x)
rownames(spec2) <- rep("", nrow(spec2))
colnames(spec2) <- rep("", ncol(spec2))


#OUTPUT TABLE (always use double the amount of backslashes needed in latex)

# reformat decimals
out2<-out
out2[,2:9]<-round(as.numeric(out2[,2:9]), digits=2)

# add symbols for significance

for (i in 1:nrow(sig_base)){
	if(sig_base[i,1]<=0.05) {
		out2[i,2] <- paste("\\textbf{",out2[i,2],"}",collapse="")
	}
	if(sig_base[i,2]<=0.05) {
		out2[i,6] <- paste("\\textbf{",out2[i,6],"}",collapse="")
	}	
}

# # for (i in 1:nrow(sig_pre)){
	# if(sig_pre[i,1]<=0.05) {
		# out2[i+1,2] <- paste(out2[i+1,2],"$^{\\ddagger}$",collapse="")
	# }
	# if(sig_pre[i,2]<=0.05) {
		# out2[i+1,6] <- paste(out2[i+1,6],"$^{\\ddagger}$",collapse="")
	# }	
# }

for (i in 1:nrow(sig_many)){
	if(sig_many[i,1]<=0.05) {
		out2[i,6] <- paste(out2[i,6],"$^{\\mathsection}$",collapse="")
	}	
}

for (i in 1:nrow(sig_Q4)){
	if(sig_Q4[i,1]<=0.05) {
		out2[i,2] <- paste(out2[i,2],"$^*$",collapse="")
	}	
	if(sig_Q4[i,2]<=0.05) {
		out2[i,6] <- paste(out2[i,6],"$^*$",collapse="")
	}	
}

# confidence intervals
cis<-paste(out2[,3], out2[,4], sep=",")
cis<-paste("[", cis, sep="")
cis<-paste(cis, "]", sep="")

cis2<-paste(out2[,7], out2[,8], sep=",")
cis2<-paste("[", cis2, sep="")
cis2<-paste(cis2, "]", sep="")

out3 <- out2[,c(1,2,5,6,9)] # leave out .9, .99 lower-ci columns
out4 <- cbind(out3,cis,cis2)
out5 <- out4[,c(1,2,6,3,4,7,5)] 
out6 <- out5[,1:4]
out7 <- cbind(out6,matrix(nrow=3, ncol=1))
out8 <- cbind(out7,out5[,5:7])
outF<-rbind(out.list,out8) #get model headers
# get rid of row and columnnames
x <- data.frame(outF)
outF<-as.matrix(x)
rownames(outF) <- rep("", nrow(outF))
colnames(outF) <- rep("", ncol(outF))

spec3 <- spec2[,1:4]
spec4 <- cbind(spec3,matrix(nrow=5, ncol=1))
specF <- cbind(spec4,spec2[,5:7])

#COMBINED
comb<-rbind(outF,specF)

mat3<-xtable(comb, align="llcccm{2.5cm}ccc", caption="CT-EWS", label="tab:CT_out") # for whatever reason need one column more than i actually want (added "l" to left)

print(mat3, type="latex", caption.placement="top", hline.after=c(-1,nrow(mat3)), sanitize.text.function = function(x){x}, file="/Users/felixward/Dropbox/CrisisPrediction/Written/CT_rob_post1970_q4.txt", replace=T, floating=F, booktabs=T, include.colnames=F, include.rownames=F, add.to.row=list(pos=list(0,0,0,0,1,4,4,4,4,5), 
command=c(" \\multicolumn{1}{c}{} & \\multicolumn{7}{c}{\\textbf{Results}} \\\\",
"  \\cmidrule(l r){2-8} \\\\",
" \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Restricted Selection}} & \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Many Predictors}} \\\\",
"  \\cmidrule(l r){2-4} \\cmidrule(l r){6-8} \\\\",
" \\cdashline{1-8} \\\\",
" \\multicolumn{1}{c}{} & \\multicolumn{7}{c}{\\textbf{Specification}} \\\\",
"  \\cmidrule(l r){2-8} \\\\",
" \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Restricted Selection}} & \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Many Predictors}} \\\\",
"  \\cmidrule(l r){2-4}  \\cmidrule(l r){6-8} \\\\",
"  \\cdashline{1-8} \\\\")))


