#####################################################################
#														Robust AUC/ROC													#
#####################################################################
# Binormal CI actually narrower than non-robust CI for restricted variable selection: probably due to right-skewed conditional crisis probability distributions
#
rm(list=ls())
library(psych)
library(graphics)
library(sandwich)
library(bbmle)
library(pROC)
library(randomForest)
library(plm)
library(matrixcalc)
library(stats4)
library(zoo)
library(mvtnorm)
library(MASS)
library(ellipse)
library(DataCombine)
library(Hmisc)

###############################################################
#												PREPARATION													#
###############################################################

#Daten  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)
#Daten_log  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)
Daten <- read.table("/Users/chenyao_ks/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",",dec=".",header=TRUE)

ca <- grep("ca", names(Daten), value=T)
drops <- names(Daten) %in% c(ca)
Daten <- Daten[!drops]

# drop vars not used
assets <- grep("assets", names(Daten), value=T)
stocks <- grep("stocks", names(Daten), value=T)
narrowm <- grep("narrowm", names(Daten), value=T)
money <- grep("money", names(Daten), value=T)
ltrate <- grep("ltrate", names(Daten), value=T)
stir <- grep("stir", names(Daten), value=T)
loans <- grep("loans", names(Daten), value=T)
debt <- grep("debt", names(Daten), value=T)
er <- grep("er", names(Daten), value=T)
cpi <- grep("cpi", names(Daten), value=T)
gap <- grep("gap", names(Daten), value=T)
glo <- grep("a_", names(Daten), value=T)
gdp <- grep("gdp", names(Daten), value=T)
i <- grep("i_", names(Daten), value=T)
c <- grep("c_", names(Daten), value=T)
ri <- grep("ri", names(Daten), value=T)
rc <- grep("rc", names(Daten), value=T)

drops <- names(Daten) %in% c(stocks, money, stir,assets,i,ri,glo) # true-false indicator: true at the names in vector

full <- Daten[!drops] # drops those variables which have true indication in "drops"
full <- cbind(Daten[glo], full)

# FULL SET: omit observations with missing values
full_om <- na.omit(full)
sum(full_om$b2)/2

# SELECTION SET:
sel.list <- c("ccode", "year", "b2", "loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap", "loans1_y", "pdebt", "ltrate")
location <- names(full) %in% c(sel.list) # get location of independent var
name.sel <- names(full[location]) # get names of features
sel <- full[name.sel]
sel_om <- na.omit(sel)
sum(sel_om$b2)/2


###############################################################
#														ANALYSIS   												#
###############################################################

### CASE-STUDY
##############################################################################################################
# countries
c.list <- c("Australia", "Belgium", "Canada", "Switzerland", "Germany", "Denmark", "Spain", "Finland", "France", "UK", "Italy","Japan","Netherlands","Norway","Portugal","Sweden","US")
# variables
var.list <- list("loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap",  "ia_pub", "ia_prb", "ia_jb", "ia_lygr", "ia_pygr", "ia_lyer")
# miscellaneous non-independent
misc.list <- c("b2","b1","b3","rec1","rec2","rec3", "year", "ccode")

loc.misc <- names(full_om) %in% c(misc.list)

#significance level
alpha<-0.05

#Grid points for ROC curve
l<-30

#BS samples
B<-5000

# table matrices
out <- matrix(nrow=17, ncol=14)
outLogit <- matrix(nrow=17, ncol=14)
##############################################################################################################






## BAGGING-selection

location <- names(sel_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(sel_om[!location]) # get names of features
indep <- sel_om[name.indep]
dep <- factor(sel_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
bagging_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_selection

# OOS-analysis
library(pROC)

pred <- predict(bagging_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,"b2"]

r_bag<-roc(true, pred, ci=T) # ROC analysis
r_bag

block_num<-nrow(sel_om)^0.25

X<-cbind(sel_om,pred)
X<-X[,c("ccode","year","pred")]
 
 #with NAs
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

pform<-matrix(XX[,3],nrow=142,ncol=17)
bform<-matrix(XX[,4],nrow=142,ncol=17)

length<-ceiling(nrow(pform)^0.25)
#length<-10
blocks<-floor(nrow(pform)/length)

#get BS CI
r_bag_star<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_bag_star[,j]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_bag_star,c(.025, .975))
robAUC_bag<-c(CI[1],r_bag$auc,CI[2])
robAUC_bag


r_bag_star_CS<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_bag_star_CS[,i]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
	
}

CI<-quantile(r_bag_star_CS,c(.025, .975))
robAUC_bag_CS<-c(CI[1],r_bag$auc,CI[2])
robAUC_bag_CS


r_bag_star_ACCS<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
}
for(i in 1:B){	
	rand<-sample(1:ncol(pform_star),1)
	pform_star2<-pform_star[,rand]
	bform_star2<-bform_star[,rand]
	
	for(j in 2:ncol(pform_star)){
		rand<-sample(1:ncol(pform_star),1)
		pform_star2<-cbind(pform_star2,pform_star[,rand])
		bform_star2<-cbind(bform_star2,bform_star[,rand])		
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_bag_star_ACCS[,i]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_bag_star_ACCS,c(.025, .975))
robAUC_bag_ACCS<-c(CI[1],r_bag$auc,CI[2])
robAUC_bag_ACCS



r_bag_star_CSAC<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){	
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
}
length<-ceiling(nrow(pform_star)^0.25)
#length<-10
blocks<-floor(nrow(pform_star)/length)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star2<-pform_star[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star2<-bform_star[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star2<-rbind(pform_star2,pform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star2<-rbind(bform_star2,bform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_bag_star_CSAC[,j]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis

}


CI<-quantile(r_bag_star_CSAC,c(.025, .975))
robAUC_bag_CSAC<-c(CI[1],r_bag$auc,CI[2])
robAUC_bag_CSAC




d<-data.frame(XX)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation and cross-sect. correlation of crisis risk
AC_bag<-plm(ddd[,3]~ddd[,5], data=ddd,index = c("ccode","year"))

CS<-matrix(nrow=17,ncol=17)
for(i in 1:16){
	for(j in (i+1):17){
	corr<- rcorr(ddd[(((i-1)*142)+1):(i*142),"pred"], ddd[(((j-1)*142)+1):(j*142),"pred"], type=c("spearman"))
	CS[j,i]<-corr$r[1,2]
	}}
	
meanCS_bag<-mean(CS,na.rm=T)	
meanCS_bag






## RF-selection

location <- names(sel_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(sel_om[!location]) # get names of features
indep <- sel_om[name.indep]
dep <- factor(sel_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf_selection

# OOS-analysis
library(pROC)

pred <- predict(rf_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,"b2"]

r_rf<-roc(true, pred, ci=T) # ROC analysis
r_rf
block_num<-nrow(sel_om)^0.25

X<-cbind(sel_om,pred)
X<-X[,c("ccode","year","pred")]
 
 #with NAs
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

pform<-matrix(XX[,3],nrow=142,ncol=17)
bform<-matrix(XX[,4],nrow=142,ncol=17)

length<-ceiling(nrow(pform)^0.25)
#length<-10
blocks<-floor(nrow(pform)/length)

#get BS CI
r_rf_star<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_rf_star[,j]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_rf_star,c(.025, .975))
robAUC_rf<-c(CI[1],r_rf$auc,CI[2])
robAUC_rf

r_rf_star_CS<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_rf_star_CS[,i]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
	
}

CI<-quantile(r_rf_star_CS,c(.025, .975))
robAUC_rf_CS<-c(CI[1],r_rf$auc,CI[2])
robAUC_rf_CS


r_rf_star_ACCS<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
}
for(i in 1:B){	
	rand<-sample(1:ncol(pform_star),1)
	pform_star2<-pform_star[,rand]
	bform_star2<-bform_star[,rand]
	
	for(j in 2:ncol(pform_star)){
		rand<-sample(1:ncol(pform_star),1)
		pform_star2<-cbind(pform_star2,pform_star[,rand])
		bform_star2<-cbind(bform_star2,bform_star[,rand])		
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_rf_star_ACCS[,i]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_rf_star_ACCS,c(.025, .975))
robAUC_rf_ACCS<-c(CI[1],r_rf$auc,CI[2])
robAUC_rf_ACCS



r_rf_star_CSAC<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){	
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
}
length<-ceiling(nrow(pform_star)^0.25)
#length<-10
blocks<-floor(nrow(pform_star)/length)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star2<-pform_star[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star2<-bform_star[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star2<-rbind(pform_star2,pform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star2<-rbind(bform_star2,bform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_rf_star_CSAC[,j]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis

}


CI<-quantile(r_rf_star_CSAC,c(.025, .975))
robAUC_rf_CSAC<-c(CI[1],r_rf$auc,CI[2])
robAUC_rf_CSAC


d<-data.frame(XX)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation and cross-sect. correlation of crisis risk
AC_rf<-plm(ddd[,3]~ddd[,5], data=ddd,index = c("ccode","year"))

CS<-matrix(nrow=17,ncol=17)
for(i in 1:16){
	for(j in (i+1):17){
	corr<- rcorr(ddd[(((i-1)*142)+1):(i*142),"pred"], ddd[(((j-1)*142)+1):(j*142),"pred"], type=c("spearman"))
	CS[j,i]<-corr$r[1,2]
	}}
	
meanCS_rf<-mean(CS,na.rm=T)	
meanCS_rf





## BAGGING-all variables

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
bagging_full= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=ncol(indep), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_full

# OOS-analysis
library(pROC)

pred <- predict(bagging_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,"b2"]

r_bag_m<-roc(true, pred, ci=T) # ROC analysis
r_bag_m

block_num<-nrow(full_om)^0.25

X<-cbind(full_om,pred)
X<-X[,c("ccode","year","pred")]
 
 #with NAs
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

pform<-matrix(XX[,3],nrow=142,ncol=17)
bform<-matrix(XX[,4],nrow=142,ncol=17)

length<-ceiling(nrow(pform)^0.25)
#length<-10
blocks<-floor(nrow(pform)/length)

#get BS CI
r_bag_m_star<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_bag_m_star[,j]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_bag_m_star,c(.025, .975))
robAUC_bag_m<-c(CI[1],r_bag_m$auc,CI[2])
robAUC_bag_m


r_bag_m_star_CS<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_bag_m_star_CS[,i]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
	
}

CI<-quantile(r_bag_m_star_CS,c(.025, .975))
robAUC_bag_m_CS<-c(CI[1],r_bag_m$auc,CI[2])
robAUC_bag_m_CS


r_bag_m_star_ACCS<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
}
for(i in 1:B){	
	rand<-sample(1:ncol(pform_star),1)
	pform_star2<-pform_star[,rand]
	bform_star2<-bform_star[,rand]
	
	for(j in 2:ncol(pform_star)){
		rand<-sample(1:ncol(pform_star),1)
		pform_star2<-cbind(pform_star2,pform_star[,rand])
		bform_star2<-cbind(bform_star2,bform_star[,rand])		
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_bag_m_star_ACCS[,i]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_bag_m_star_ACCS,c(.025, .975))
robAUC_bag_m_ACCS<-c(CI[1],r_bag_m$auc,CI[2])
robAUC_bag_m_ACCS



r_bag_m_star_CSAC<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){	
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
}
length<-ceiling(nrow(pform_star)^0.25)
#length<-10
blocks<-floor(nrow(pform_star)/length)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star2<-pform_star[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star2<-bform_star[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star2<-rbind(pform_star2,pform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star2<-rbind(bform_star2,bform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_bag_m_star_CSAC[,j]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis

}


CI<-quantile(r_bag_m_star_CSAC,c(.025, .975))
robAUC_bag_m_CSAC<-c(CI[1],r_bag_m$auc,CI[2])
robAUC_bag_m_CSAC


d<-data.frame(XX)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation and cross-sect. correlation of crisis risk
AC_bag_m<-plm(ddd[,3]~ddd[,5], data=ddd,index = c("ccode","year"))

CS<-matrix(nrow=17,ncol=17)
for(i in 1:16){
	for(j in (i+1):17){
	corr<- rcorr(ddd[(((i-1)*142)+1):(i*142),"pred"], ddd[(((j-1)*142)+1):(j*142),"pred"], type=c("spearman"))
	CS[j,i]<-corr$r[1,2]
	}}
	
meanCS_bag_m<-mean(CS,na.rm=T)	
meanCS_bag_m










## RANDOM FOREST
library(randomForest)
library(pROC)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf

# OOS-analysis
pred <- predict(rf, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,"b2"]

r_rf_m<-roc(true, pred, ci=T) # ROC analysis
r_rf_m

block_num<-nrow(full_om)^0.25

X<-cbind(full_om,pred)
X<-X[,c("ccode","year","pred")]
 
 #with NAs
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

pform<-matrix(XX[,3],nrow=142,ncol=17)
bform<-matrix(XX[,4],nrow=142,ncol=17)

length<-ceiling(nrow(pform)^0.25)
#length<-10
blocks<-floor(nrow(pform)/length)

#get BS CI
r_rf_m_star<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_rf_m_star[,j]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_rf_m_star,c(.025, .975))
robAUC_rf_m<-c(CI[1],r_rf_m$auc,CI[2])
robAUC_rf_m


r_rf_m_star_CS<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
	
	pred_star<-matrix(pform_star,ncol=1)
	true_star<-matrix(bform_star,ncol=1)
	
	r_rf_m_star_CS[,i]<-as.numeric(roc(as.numeric(true_star), as.numeric(pred_star), ci=T,na.rm=T)$auc) # ROC analysis
	
}

CI<-quantile(r_rf_m_star_CS,c(.025, .975))
robAUC_rf_m_CS<-c(CI[1],r_rf_m$auc,CI[2])
robAUC_rf_m_CS


r_rf_m_star_ACCS<-matrix(NA,nrow=1,ncol=B)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star<-pform[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star<-bform[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star<-rbind(pform_star,pform[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star<-rbind(bform_star,bform[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
}
for(i in 1:B){	
	rand<-sample(1:ncol(pform_star),1)
	pform_star2<-pform_star[,rand]
	bform_star2<-bform_star[,rand]
	
	for(j in 2:ncol(pform_star)){
		rand<-sample(1:ncol(pform_star),1)
		pform_star2<-cbind(pform_star2,pform_star[,rand])
		bform_star2<-cbind(bform_star2,bform_star[,rand])		
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_rf_m_star_ACCS[,i]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis
}

CI<-quantile(r_rf_m_star_ACCS,c(.025, .975))
robAUC_rf_m_ACCS<-c(CI[1],r_rf_m$auc,CI[2])
robAUC_rf_m_ACCS



r_rf_m_star_CSAC<-matrix(NA,nrow=1,ncol=B)
for(i in 1:B){	
	rand<-sample(1:ncol(pform),1)
	pform_star<-pform[,rand]
	bform_star<-bform[,rand]
	
	for(j in 2:ncol(pform)){
		rand<-sample(1:ncol(pform),1)
		pform_star<-cbind(pform_star,pform[,rand])
		bform_star<-cbind(bform_star,bform[,rand])		
	}
}
length<-ceiling(nrow(pform_star)^0.25)
#length<-10
blocks<-floor(nrow(pform_star)/length)
for(j in 1:B){
	rand<-sample(blocks,1)
	pform_star2<-pform_star[((rand-1)*length+1):(rand*length),] #get initial block
	bform_star2<-bform_star[((rand-1)*length+1):(rand*length),] #get initial block
	
	for(i in 1:blocks){
		rand<-sample(blocks,1)
		pform_star2<-rbind(pform_star2,pform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
		bform_star2<-rbind(bform_star2,bform_star[(rand*length+1):(rand*length),]) #add block to BS dataset
	}
	
	pred_star2<-matrix(pform_star2,ncol=1)
	true_star2<-matrix(bform_star2,ncol=1)
	
	r_rf_m_star_CSAC[,j]<-as.numeric(roc(as.numeric(true_star2), as.numeric(pred_star2), ci=T,na.rm=T)$auc) # ROC analysis

}


CI<-quantile(r_rf_m_star_CSAC,c(.025, .975))
robAUC_rf_m_CSAC<-c(CI[1],r_rf_m$auc,CI[2])
robAUC_rf_m_CSAC


d<-data.frame(XX)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation and cross-sect. correlation of crisis risk
AC_rf_m<-plm(ddd[,3]~ddd[,5], data=ddd,index = c("ccode","year"))

CS<-matrix(nrow=17,ncol=17)
for(i in 1:16){
	for(j in (i+1):17){
	corr<- rcorr(ddd[(((i-1)*142)+1):(i*142),"pred"], ddd[(((j-1)*142)+1):(j*142),"pred"], type=c("spearman"))
	CS[j,i]<-corr$r[1,2]
	}}
	
meanCS_rf_m<-mean(CS,na.rm=T)	
meanCS_rf_m



robAUC = rbind(robAUC_bag,robAUC_rf, robAUC_bag_m, robAUC_rf_m)
robAUC_CS = rbind(robAUC_bag_CS,robAUC_rf_CS, robAUC_bag_m_CS, robAUC_rf_m_CS)
robAUC_ACCS = rbind(robAUC_bag_ACCS,robAUC_rf_ACCS, robAUC_bag_m_ACCS, robAUC_rf_m_ACCS)
robAUC_CSAC = rbind(robAUC_bag_CSAC,robAUC_rf_CSAC, robAUC_bag_m_CSAC, robAUC_rf_m_CSAC)

AC = rbind(AC_bag,AC_rf, AC_bag_m, AC_rf_m)

meanCS = rbind(meanCS_bag,meanCS_rf, meanCS_bag_m, meanCS_rf_m)

m_sig = matrix(NA,nrow=8,ncol=1)
m_sig[1,1]<-sum(r_bag_star<r_bag_m_star)/B
m_sig[2,1]<-sum(r_rf_star<r_rf_m_star)/B
m_sig[3,1]<-sum(r_bag_star_CS<r_bag_m_star_CS)/B
m_sig[4,1]<-sum(r_rf_star_CS<r_rf_m_star_CS)/B
m_sig[5,1]<-sum(r_bag_star_ACCS<r_bag_m_star_ACCS)/B
m_sig[6,1]<-sum(r_rf_star_ACCS<r_rf_m_star_ACCS)/B
m_sig[7,1]<-sum(r_bag_star_CSAC<r_bag_m_star_CSAC)/B
m_sig[8,1]<-sum(r_rf_star_CSAC<r_rf_m_star_CSAC)/B


robAUC
robAUC_CS
robAUC_ACCS
robAUC_CSAC

AC
meanCS
m_sig

save.image("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/CT_longrun_ACCS_BS")
 
###############################################################
#  							TABLES							  #
###############################################################
load("/Users/felixward/Dropbox/CrisisPrediction//DoFiles/CT_longrun_ACCS_BS")


library(xtable)

#COMBINED
robAUC<-format(round(robAUC, 2), nsmall = 2)
robAUC_CS<-format(round(robAUC_CS, 2), nsmall = 2)
robAUC_ACCS<-format(round(robAUC_ACCS, 2), nsmall = 2)
robAUC_CSAC<-format(round(robAUC_CSAC, 2), nsmall = 2)

robAUC<-cbind(robAUC[1:2,1:3],"",robAUC[3:4,1:3])
robAUC_CS<-cbind(robAUC_CS[1:2,1:3],"",robAUC_CS[3:4,1:3])
robAUC_ACCS<-cbind(robAUC_ACCS[1:2,1:3],"",robAUC_ACCS[3:4,1:3])
robAUC_CSAC<-cbind(robAUC_CSAC[1:2,1:3],"",robAUC_CSAC[3:4,1:3])

out<-rbind(robAUC, robAUC_CS,robAUC_ACCS, robAUC_CSAC)

for (i in 1:nrow(out)){
	if((1-m_sig[i,1])<=0.05) {
		out[i,6] <- paste(out[i,6],"$^{\\mathsection}$",collapse="")
	}	
}


model.list <- t(c("Bagging", "RF"))
model.list<-t(model.list)
model.list<-rbind(model.list,model.list,model.list,model.list)

comb<-cbind(model.list,out)

mat3<-xtable(comb, align="llcccm{0.5cm}ccc", caption="CT-panel", label="tab:CT_ACCS") # for whatever reason need one column more than i actually want (added "l" to left)

print(mat3, type="latex", caption.placement="top", hline.after=c(-1,nrow(mat3)), sanitize.text.function = function(x){x}, file="/Users/chenyao_ks/Dropbox/CrisisPrediction/Written/CT_ACCS.txt", replace=T, floating=F, booktabs=T, include.colnames=F, include.rownames=F, add.to.row=list(pos=list(0,0,0,0,0,2,2,2,4,4,4,6,6,6), 
command=c(" \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Restricted Selection}} & \\multicolumn{1}{c}{} & \\multicolumn{3}{c}{\\textbf{Many Predictors}} \\\\",
" & low & AUC & up & & low & AUC & up \\\\ ",
"  \\cmidrule(l r){2-4} \\cmidrule(l r){6-8} \\\\",
" & \\multicolumn{7}{c}{Cross-sectional resampling} \\\\",
" \\\\",
" \\\\",
" & \\multicolumn{7}{c}{Temporal resampling} \\\\",
" \\\\",
" \\\\",
" & \\multicolumn{7}{c}{Cross-sectional/Temporal resampling} \\\\",
" \\\\",
" \\\\",
" & \\multicolumn{7}{c}{Temporal/Cross-sectional resampling} \\\\",
" \\\\")))

