#####################################################################
#														Robust AUC/ROC													#
#####################################################################
# Binormal CI actually narrower than non-robust CI for restricted variable selection: probably due to right-skewed conditional crisis probability distributions
#
rm(list=ls())
library(psych)
library(graphics)
library(sandwich)
library(bbmle)
library(pROC)
library(randomForest)
library(plm)
library(matrixcalc)
library(stats4)
library(zoo)
library(mvtnorm)
library(MASS)
library(ellipse)
library(DataCombine)

###############################################################
#												PREPARATION													#
###############################################################

#Daten  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)

Daten <- read.table("/Users/chenyao_ks/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",",dec=".",header=TRUE)

ca <- grep("ca", names(Daten), value=T)
drops <- names(Daten) %in% c(ca)
Daten <- Daten[!drops]

# drop vars not used
assets <- grep("assets", names(Daten), value=T)
stocks <- grep("stocks", names(Daten), value=T)
narrowm <- grep("narrowm", names(Daten), value=T)
money <- grep("money", names(Daten), value=T)
ltrate <- grep("ltrate", names(Daten), value=T)
stir <- grep("stir", names(Daten), value=T)
loans <- grep("loans", names(Daten), value=T)
debt <- grep("debt", names(Daten), value=T)
er <- grep("er", names(Daten), value=T)
cpi <- grep("cpi", names(Daten), value=T)
gap <- grep("gap", names(Daten), value=T)
glo <- grep("a_", names(Daten), value=T)
gdp <- grep("gdp", names(Daten), value=T)
i <- grep("i_", names(Daten), value=T)
c <- grep("c_", names(Daten), value=T)
ri <- grep("ri", names(Daten), value=T)
rc <- grep("rc", names(Daten), value=T)

drops <- names(Daten) %in% c(stocks, money, stir,assets,i,ri,glo) # true-false indicator: true at the names in vector

full <- Daten[!drops] # drops those variables which have true indication in "drops"
full <- cbind(Daten[glo], full)

# FULL SET: omit observations with missing values
full_om <- na.omit(full)
sum(full_om$b2)/2

# SELECTION SET:
sel.list <- c("ccode", "year", "b2", "loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap", "loans1_y", "pdebt", "ltrate")
location <- names(full) %in% c(sel.list) # get location of independent var
name.sel <- names(full[location]) # get names of features
sel <- full[name.sel]
sel_om <- na.omit(sel)
sum(sel_om$b2)/2


###############################################################
#														ANALYSIS   												#
###############################################################

### CLASSIFICATION-TREE ANALYSIS
##############################################################################################################
# countries
c.list <- c("Australia", "Belgium", "Canada", "Switzerland", "Germany", "Denmark", "Spain", "Finland", "France", "UK", "Italy","Japan","Netherlands","Norway","Portugal","Sweden","US")
# variables
var.list <- list("loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap",  "ia_pub", "ia_prb", "ia_jb", "ia_lygr", "ia_pygr", "ia_lyer")
# miscellaneous non-independent
misc.list <- c("b2","b1","b3","rec1","rec2","rec3", "year", "ccode")

loc.misc <- names(full_om) %in% c(misc.list)

#significance level
alpha<-0.05

#Grid points for ROC curve
l<-30

# table matrices
out <- matrix(nrow=17, ncol=14)
outLogit <- matrix(nrow=17, ncol=14)
##############################################################################################################






## BAGGING-selection

location <- names(sel_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(sel_om[!location]) # get names of features
indep <- sel_om[name.indep]
dep <- factor(sel_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
bagging_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_selection

# OOS-analysis
library(pROC)

pred <- predict(bagging_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,"b2"]

r_bag<-roc(true, pred, ci=T) # ROC analysis
r_bag


#no NAs -> estimate parameter vector & get Hessian
obs<-nrow(sel_om)

Z<-sel_om$b2
Y<-pred
TY<-qnorm(Y)   #probit transformation
Mu<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],mean,na.rm=T)
Mu1<-Mu[2]
Mu0<-Mu[1]
Var<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],var)*c((obs-sum(sel_om$b2)-1)/(obs-sum(sel_om$b2)),(sum(sel_om$b2)-1)/sum(sel_om$b2))
Var1<-Var[2]
Var0<-Var[1]

Hessian<-matrix(c(-sum(Z)/Var1,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,0,0,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,sum(Z)/(2*Var1^2)-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1)^2)/Var1^3,0,0,0,0,-sum(1-Z[TY!=-Inf])/Var0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,0,0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,sum(1-Z[TY!=-Inf])/(2*Var0^2)-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0)^2)/Var0^3),nrow=4,ncol=4)/obs


X<-cbind(sel_om,pred)
X<-X[,c("ccode","year","pred")]

# Plot estimated distribution against histogram
a<-seq(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T),length=100)
hx1 <- dnorm(a,mean=Mu1, sd=sqrt(Var1))
hx2 <- dnorm(a,mean=Mu0, sd=sqrt(Var0))

hist(TY[sel_om[,"b2"]==0], col=rgb(0.1,0.1,0.1,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
hist(TY[sel_om[,"b2"]==1], add=T, col=rgb(0.8,0.8,0.8,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
 par(new=T)
 plot(a,hx1,type="l",lwd=1)
 par(new=T)
 plot(a,hx2,type="l",lwd=1)
 
 #with NAs -> get Gradient and Information matrix 
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

Z<-XX[,4]
Y<-XX[,3]
TY<-qnorm(Y)   #probit transformation
TY[TY==-Inf]<-min(TY[TY!=-Inf],na.rm=T)
TY[TY==Inf]<-max(TY[TY!=Inf],na.rm=T)


Gradient<-matrix(nrow=nrow(XX),ncol=4)
for(i in 1:nrow(full)){Gradient[i,]=c(Z[i]*(TY[i]-Mu1)/Var1,Z[i]*(TY[i]-Mu1)^2/(2*Var1^2)- Z[i]/(2*Var1),(1-Z[i])*(TY[i]-Mu0)/Var0,(1-Z[i])*(TY[i]-Mu0)^2/(2*Var0^2)-(1-Z[i])/(2*Var0))}


M<-cbind(Gradient,XX)

d<-data.frame(M)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation of crisis risk
AC_bag<-plm(ddd[,7]~ddd[,9], data=ddd,index = c("ccode","year"))
AC<-lm(ddd[,7]~ddd[,9], data=ddd)

Information=lrvar(Gradient,type="Andrews",prewhite=T,adjust=T)*obs

AsyCov=solve(Hessian)%*%Information%*%solve(Hessian)/obs
SH2<-matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=4,ncol=1)

AUC=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0)))
AUC_up=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))+qnorm(1-alpha/2)*SH2^0.5)
AUC_lo=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))-qnorm(1-alpha/2)*SH2^0.5)

robAUC_bag=c(AUC_lo,AUC,AUC_up)
robAUC_bag

#ROC
eta<-seq(min(TY,na.rm=T)-1.5,max(TY,na.rm=T)+0.7,length=l)

SH<-matrix(nrow=30,ncol=1)
for(i in 1:l){
SH[i]<-matrix(c(1/Var1^0.5,-(Mu1-eta[i])/(2*Var1^1.5),0,0),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/Var1^0.5, -(Mu1- eta[i])/(2*Var1^1.5),0,0),nrow=4,ncol=1)} 
#ROC curve
par(pty = "s") 
plot(pnorm((Mu0-eta)/Var0^0.5), pnorm((Mu1-eta)/Var1^0.5), type='l', xlab="FPR", ylab="TPR", xlim =c(0, 1),ylim =c(0, 1), col="black", lwd=3.5,cex.lab=1.6)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5+qnorm(1-alpha/2)*SH^0.5), col="black", type='l', lty=2, lwd=2)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5-qnorm(1-alpha/2)*SH^0.5), col="black", type='l', pch=22,lty=2,lwd=2)
lines(x=c(0,1), y=c(0,1), col="grey", type='l', lwd=2, lty =2)







## RF-selection

location <- names(sel_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(sel_om[!location]) # get names of features
indep <- sel_om[name.indep]
dep <- factor(sel_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf_selection= randomForest(indep, y=dep,
 data=sel_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(sel_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf_selection

# OOS-analysis
library(pROC)

pred <- predict(rf_selection, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- sel_om[,"b2"]

r_rf<-roc(true, pred, ci=T) # ROC analysis
r_rf

#no NAs -> estimate parameter vector & get Hessian
obs<-nrow(sel_om)

Z<-sel_om$b2
Y<-pred
TY<-qnorm(Y)   #probit transformation
Mu<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],mean,na.rm=T)
Mu1<-Mu[2]
Mu0<-Mu[1]
Var<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],var)*c((obs-sum(sel_om$b2)-1)/(obs-sum(sel_om$b2)),(sum(sel_om$b2)-1)/sum(sel_om$b2))
Var1<-Var[2]
Var0<-Var[1]

Hessian<-matrix(c(-sum(Z)/Var1,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,0,0,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,sum(Z)/(2*Var1^2)-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1)^2)/Var1^3,0,0,0,0,-sum(1-Z[TY!=-Inf])/Var0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,0,0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,sum(1-Z[TY!=-Inf])/(2*Var0^2)-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0)^2)/Var0^3),nrow=4,ncol=4)/obs


X<-cbind(sel_om,pred)
X<-X[,c("ccode","year","pred")]

# Plot estimated distribution against histogram
a<-seq(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T),length=100)
hx1 <- dnorm(a,mean=Mu1, sd=sqrt(Var1))
hx2 <- dnorm(a,mean=Mu0, sd=sqrt(Var0))

hist(TY[sel_om[,"b2"]==0], col=rgb(0.1,0.1,0.1,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
hist(TY[sel_om[,"b2"]==1], add=T, col=rgb(0.8,0.8,0.8,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
 par(new=T)
 plot(a,hx1,type="l",lwd=1)
 par(new=T)
 plot(a,hx2,type="l",lwd=1)
 
 #with NAs -> get Gradient and Information matrix 
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

Z<-XX[,4]
Y<-XX[,3]
TY<-qnorm(Y)   #probit transformation
TY[TY==-Inf]<-min(TY[TY!=-Inf],na.rm=T)
TY[TY==Inf]<-max(TY[TY!=Inf],na.rm=T)


Gradient<-matrix(nrow=nrow(XX),ncol=4)
for(i in 1:nrow(full)){Gradient[i,]=c(Z[i]*(TY[i]-Mu1)/Var1,Z[i]*(TY[i]-Mu1)^2/(2*Var1^2)- Z[i]/(2*Var1),(1-Z[i])*(TY[i]-Mu0)/Var0,(1-Z[i])*(TY[i]-Mu0)^2/(2*Var0^2)-(1-Z[i])/(2*Var0))}

M<-cbind(Gradient,XX)

d<-data.frame(M)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation of crisis risk
AC_rf<-plm(ddd[,7]~ddd[,9], data=ddd,index = c("ccode","year"))


Information=lrvar(Gradient,type="Andrews",prewhite=T,adjust=T)*obs

AsyCov=solve(Hessian)%*%Information%*%solve(Hessian)/obs
SH2<-matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=4,ncol=1)

AUC=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0)))
AUC_up=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))+qnorm(1-alpha/2)*SH2^0.5)
AUC_lo=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))-qnorm(1-alpha/2)*SH2^0.5)

robAUC_rf=c(AUC_lo,AUC,AUC_up)
robAUC_rf

#ROC
eta<-seq(min(TY,na.rm=T)-1.5,max(TY,na.rm=T)+0.7,length=l)

SH<-matrix(nrow=30,ncol=1)
for(i in 1:l){
SH[i]<-matrix(c(1/Var1^0.5,-(Mu1-eta[i])/(2*Var1^1.5),0,0),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/Var1^0.5, -(Mu1- eta[i])/(2*Var1^1.5),0,0),nrow=4,ncol=1)} 
#ROC curve
par(pty = "s") 
plot(pnorm((Mu0-eta)/Var0^0.5), pnorm((Mu1-eta)/Var1^0.5), type='l', xlab="FPR", ylab="TPR", xlim =c(0, 1),ylim =c(0, 1), col="black", lwd=3.5,cex.lab=1.6)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5+qnorm(1-alpha/2)*SH^0.5), col="black", type='l', lty=2, lwd=2)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5-qnorm(1-alpha/2)*SH^0.5), col="black", type='l', pch=22,lty=2,lwd=2)
lines(x=c(0,1), y=c(0,1), col="grey", type='l', lwd=2, lty =2)







## BAGGING-all variables

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
bagging_full= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=ncol(indep), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
bagging_full

# convergence diagnostic
palette("default")
plot(bagging_full, type="l", main="")

# OOS-analysis
library(pROC)

pred <- predict(bagging_full, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,"b2"]

r_bag_m<-roc(true, pred, ci=T) # ROC analysis
r_bag_m

#no NAs -> estimate parameter vector & get Hessian
obs<-nrow(full_om)

Z<-full_om$b2
Y<-pred
TY<-qnorm(Y)   #probit transformation
Mu<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],mean,na.rm=T)
Mu1<-Mu[2]
Mu0<-Mu[1]
Var<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],var)*c((obs-sum(full_om$b2)-1)/(obs-sum(full_om$b2)),(sum(full_om$b2)-1)/sum(full_om$b2))
Var1<-Var[2]
Var0<-Var[1]

Hessian<-matrix(c(-sum(Z)/Var1,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,0,0,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,sum(Z)/(2*Var1^2)-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1)^2)/Var1^3,0,0,0,0,-sum(1-Z[TY!=-Inf])/Var0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,0,0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,sum(1-Z[TY!=-Inf])/(2*Var0^2)-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0)^2)/Var0^3),nrow=4,ncol=4)/obs


X<-cbind(full_om,pred)
X<-X[,c("ccode","year","pred")]

# Plot estimated distribution against histogram
a<-seq(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T),length=100)
hx1 <- dnorm(a,mean=Mu1, sd=sqrt(Var1))
hx2 <- dnorm(a,mean=Mu0, sd=sqrt(Var0))

hist(TY[full_om[,"b2"]==0], col=rgb(0.1,0.1,0.1,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
hist(TY[full_om[,"b2"]==1], add=T, col=rgb(0.8,0.8,0.8,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
 par(new=T)
 plot(a,hx1,type="l",lwd=1)
 par(new=T)
 plot(a,hx2,type="l",lwd=1)
 
 #with NAs -> get Gradient and Information matrix 
obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

Z<-XX[,4]
Y<-XX[,3]
TY<-qnorm(Y)   #probit transformation
TY[TY==-Inf]<-min(TY[TY!=-Inf],na.rm=T)
TY[TY==Inf]<-max(TY[TY!=Inf],na.rm=T)


Gradient<-matrix(nrow=nrow(XX),ncol=4)
for(i in 1:nrow(full)){Gradient[i,]=c(Z[i]*(TY[i]-Mu1)/Var1,Z[i]*(TY[i]-Mu1)^2/(2*Var1^2)- Z[i]/(2*Var1),(1-Z[i])*(TY[i]-Mu0)/Var0,(1-Z[i])*(TY[i]-Mu0)^2/(2*Var0^2)-(1-Z[i])/(2*Var0))}

M<-cbind(Gradient,XX)

d<-data.frame(M)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation of crisis risk
AC_bag_m<-plm(ddd[,7]~ddd[,9], data=ddd,index = c("ccode","year"))

Information=lrvar(Gradient,type="Andrews",prewhite=T,adjust=T)*obs

AsyCov=solve(Hessian)%*%Information%*%solve(Hessian)/obs
SH2<-matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=4,ncol=1)

AUC=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0)))
AUC_up=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))+qnorm(1-alpha/2)*SH2^0.5)
AUC_lo=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))-qnorm(1-alpha/2)*SH2^0.5)

robAUC_bag_m=c(AUC_lo,AUC,AUC_up)
robAUC_bag_m

#ROC
eta<-seq(min(TY,na.rm=T)-1.5,max(TY,na.rm=T)+0.7,length=l)

SH<-matrix(nrow=30,ncol=1)
for(i in 1:l){
SH[i]<-matrix(c(1/Var1^0.5,-(Mu1-eta[i])/(2*Var1^1.5),0,0),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/Var1^0.5, -(Mu1- eta[i])/(2*Var1^1.5),0,0),nrow=4,ncol=1)} 
#ROC curve
par(pty = "s") 
plot(pnorm((Mu0-eta)/Var0^0.5), pnorm((Mu1-eta)/Var1^0.5), type='l', xlab="FPR", ylab="TPR", xlim =c(0, 1),ylim =c(0, 1), col="black", lwd=3.5,cex.lab=1.6)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5+qnorm(1-alpha/2)*SH^0.5), col="black", type='l', lty=2, lwd=2)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5-qnorm(1-alpha/2)*SH^0.5), col="black", type='l', pch=22,lty=2,lwd=2)
lines(x=c(0,1), y=c(0,1), col="grey", type='l', lwd=2, lty =2)










## RANDOM FOREST
library(randomForest)
library(pROC)

location <- names(full_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(full_om[!location]) # get names of features
indep <- full_om[name.indep]
dep <- factor(full_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf= randomForest(indep, y=dep,
 data=full_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(full_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf

# OOS-analysis
pred <- predict(rf, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true <- full_om[,"b2"]

r_rf_m<-roc(true, pred, ci=T) # ROC analysis
r_rf_m

#no NAs -> estimate parameter vector & get Hessian
obs<-nrow(full_om)

Z<-full_om$b2
Y<-pred
TY<-qnorm(Y)   #probit transformation
Mu<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],mean,na.rm=T)
Mu1<-Mu[2]
Mu0<-Mu[1]
Var<-tapply(TY[TY!=-Inf],Z[TY!=-Inf],var)*c((obs-sum(full_om$b2)-1)/(obs-sum(full_om$b2)),(sum(full_om$b2)-1)/sum(full_om$b2))
Var1<-Var[2]
Var0<-Var[1]

Hessian<-matrix(c(-sum(Z)/Var1,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,0,0,-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1))/Var1^2,sum(Z)/(2*Var1^2)-sum(Z[TY!=-Inf]*(TY[TY!=-Inf]-Mu1)^2)/Var1^3,0,0,0,0,-sum(1-Z[TY!=-Inf])/Var0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,0,0,-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0))/Var0^2,sum(1-Z[TY!=-Inf])/(2*Var0^2)-sum((1-Z[TY!=-Inf])*(TY[TY!=-Inf]-Mu0)^2)/Var0^3),nrow=4,ncol=4)/obs


X<-cbind(full_om,pred)
X<-X[,c("ccode","year","pred")]

# Plot estimated distribution against histogram
a<-seq(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T),length=100)
hx1 <- dnorm(a,mean=Mu1, sd=sqrt(Var1))
hx2 <- dnorm(a,mean=Mu0, sd=sqrt(Var0))

hist(TY[full_om[,"b2"]==0], col=rgb(0.1,0.1,0.1,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
hist(TY[full_om[,"b2"]==1], add=T, col=rgb(0.8,0.8,0.8,0.5),xlim=c(min(TY[TY!=-Inf],na.rm=T),max(TY[TY!=-Inf],na.rm=T)),freq=F)
 par(new=T)
 plot(a,hx1,type="l",lwd=1)
 par(new=T)
 plot(a,hx2,type="l",lwd=1)
 
 #with NAs -> get Gradient and Information matrix 
 obs<-nrow(full)
XX<-merge(full,X,all=T,by=c("ccode","year"))
XX<-XX[,c("year","ccode","pred")]
XX<-cbind(as.matrix(XX),as.matrix(full$b2))
colnames(XX)[4]<-"b2"

Z<-XX[,4]
Y<-XX[,3]
TY<-qnorm(Y)   #probit transformation
TY[TY==-Inf]<-min(TY[TY!=-Inf],na.rm=T)
TY[TY==Inf]<-max(TY[TY!=Inf],na.rm=T)


Gradient<-matrix(nrow=nrow(XX),ncol=4)
for(i in 1:nrow(full)){Gradient[i,]=c(Z[i]*(TY[i]-Mu1)/Var1,Z[i]*(TY[i]-Mu1)^2/(2*Var1^2)- Z[i]/(2*Var1),(1-Z[i])*(TY[i]-Mu0)/Var0,(1-Z[i])*(TY[i]-Mu0)^2/(2*Var0^2)-(1-Z[i])/(2*Var0))}

M<-cbind(Gradient,XX)

d<-data.frame(M)
dd<-d[order(d$ccode,d$year),]
ddd<-slide(dd,Var="pred", GroupVar="ccode", slideBy=-1)

#check autocorrelation of crisis risk
AC_rf_m<-plm(ddd[,7]~ddd[,9], data=ddd,index = c("ccode","year"))

Information=lrvar(Gradient,type="Andrews",prewhite=T,adjust=T)*obs

AsyCov=solve(Hessian)%*%Information%*%solve(Hessian)/obs
SH2<-matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/sqrt(Var1+Var0),-(Mu1-Mu0)/(2*(Var1+Var0)^1.5),-1/sqrt(Var1+Var0),(Mu1-Mu0)/(2*(Var1+Var0)^1.5)),nrow=4,ncol=1)

AUC=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0)))
AUC_up=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))+qnorm(1-alpha/2)*SH2^0.5)
AUC_lo=pnorm((Mu1-Mu0)/(sqrt(Var1+Var0))-qnorm(1-alpha/2)*SH2^0.5)


robAUC_rf_m=c(AUC_lo,AUC,AUC_up)
robAUC_rf_m

#ROC
eta<-seq(min(TY,na.rm=T)-1.5,max(TY,na.rm=T)+0.7,length=l)

SH<-matrix(nrow=30,ncol=1)
for(i in 1:l){
SH[i]<-matrix(c(1/Var1^0.5,-(Mu1-eta[i])/(2*Var1^1.5),0,0),nrow=1,ncol=4)%*%AsyCov%*% matrix(c(1/Var1^0.5, -(Mu1- eta[i])/(2*Var1^1.5),0,0),nrow=4,ncol=1)} 
#ROC curve
par(pty = "s") 
plot(pnorm((Mu0-eta)/Var0^0.5), pnorm((Mu1-eta)/Var1^0.5), type='l', xlab="FPR", ylab="TPR", xlim =c(0, 1),ylim =c(0, 1), col="black", lwd=3.5,cex.lab=1.6)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5+qnorm(1-alpha/2)*SH^0.5), col="black", type='l', lty=2, lwd=2)
points(pnorm((Mu0-eta)/Var0^0.5),pnorm((Mu1-eta)/Var1^0.5-qnorm(1-alpha/2)*SH^0.5), col="black", type='l', pch=22,lty=2,lwd=2)
lines(x=c(0,1), y=c(0,1), col="grey", type='l', lwd=2, lty =2)




robAUC = rbind(robAUC_bag,robAUC_rf, robAUC_bag_m, robAUC_rf_m)

AC = rbind(AC_bag,AC_rf, AC_bag_m, AC_rf_m)

save.image("/Users/felixward/Dropbox/CrisisPrediction//DoFiles/CT_longrun_AC")

load("/Users/felixward/Dropbox/CrisisPrediction//DoFiles/CT_longrun_AC")

robAUC
AC

