#####################################################################
#														CASE STUDY															#
#####################################################################
rm(list=ls())
library(psych)
library(graphics)
library(sandwich)
library(bbmle)
library(pROC)
library(randomForest)


###############################################################
#												PREPARATION													#
###############################################################

Daten  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)
Daten_log  <- read.table("/Users/felixward/Dropbox/CrisisPrediction/Data/R_class.csv", sep=",", dec=".", header=TRUE)

## interaction-terms
ia_pub<-Daten_log$pdebt_gap*Daten_log$ltrate
Daten_log$ia_pub<-ia_pub

ia_prb<-Daten_log$loans1_y_gap*Daten_log$ltrate
Daten_log$ia_prb<-ia_prb

ia_jb<-Daten_log$loans1_y_gap*Daten_log$ltrate*Daten_log$pdebt_gap
Daten_log$ia_jb<-ia_jb

ia_lygr<-Daten_log$loans1_y*Daten_log$gr_rgdp
Daten_log$ia_lygr<-ia_lygr

ia_pygr<-Daten_log$pdebt*Daten_log$gr_rgdp
Daten_log$ia_pygr<-ia_pygr

ia_lyer<-Daten_log$loans1_y_gap*Daten_log$er_gap
Daten_log$ia_lyer<-ia_lyer

ca <- grep("ca", names(Daten), value=T)
drops <- names(Daten) %in% c(ca)
Daten <- Daten[!drops]

ca <- grep("ca", names(Daten_log), value=T)
drops <- names(Daten_log) %in% c(ca)
Daten_log <- Daten_log[!drops]

## country factor
Daten_log$country.factor<-as.factor(Daten$ccode)


# drop vars not used
assets <- grep("assets", names(Daten), value=T)
stocks <- grep("stocks", names(Daten), value=T)
narrowm <- grep("narrowm", names(Daten), value=T)
money <- grep("money", names(Daten), value=T)
ltrate <- grep("ltrate", names(Daten), value=T)
stir <- grep("stir", names(Daten), value=T)
loans <- grep("loans", names(Daten), value=T)
debt <- grep("debt", names(Daten), value=T)
er <- grep("er", names(Daten), value=T)
cpi <- grep("cpi", names(Daten), value=T)
gap <- grep("gap", names(Daten), value=T)
glo <- grep("a_", names(Daten), value=T)
gdp <- grep("gdp", names(Daten), value=T)
i <- grep("i_", names(Daten), value=T)
c <- grep("c_", names(Daten), value=T)
ri <- grep("ri", names(Daten), value=T)
rc <- grep("rc", names(Daten), value=T)

drops <- names(Daten) %in% c("ccode", stocks, money, stir,assets,i,ri,glo) # true-false indicator: true at the names in vector

full <- Daten[!drops] # drops those variables which have true indication in "drops"
full <- cbind(Daten[glo], full)

full_log <- Daten_log

# FULL SET: omit observations with missing values
full_om <- na.omit(full)
sum(full_om$b2)/2

sum(full_log$b2)/2


# generate pre- and post 1998 sample: omitted
test_om <- full_om[ which(full_om$year>1997), ]
train_om <- full_om[ which(full_om$year<=1997), ]

# generate test and training sample: full
test_full_log <-  full_log[ which(full_log$year>1997), ]
train_full_log <- full_log[ which(full_log$year<=1997), ]

# count missing values
k<-apply(test_full_log, 1, function(x) sum(is.na(x)))/ncol(test_full_log)*100
n.mobs <- sum(k!=0) # number of obs. where there are missing values
n.mval <- sum(is.na(test_full_log)) # number of missing values
n.mobs
n.mval

# replace missing values in test_full_log by mean/mode to be able to predict full 1998-2011 range with RF
for (var in 1:ncol(test_full_log)) {
    if (class(test_full_log[,var])=="numeric") {
        test_full_log[is.na(test_full_log[,var]),var] <- mean(test_full_log[,var], na.rm = TRUE)
    } else if (class(test_full_log[,var]) %in% c("character", "factor")) {
        test_full_log[is.na(test_full_log[,var]),var] <- Mode(test_full_log[,var], na.rm = TRUE)
    }
}


###############################################################
#														ANALYSIS   												#
###############################################################

### CASE-STUDY
##############################################################################################################
# countries
c.list <- c("Australia", "Belgium", "Canada", "Switzerland", "Germany", "Denmark", "Spain", "Finland", "France", "UK", "Italy","Japan","Netherlands","Norway","Portugal","Sweden","US")
# variables
var.list <- list("loans1_y_gap", "pdebt_gap", "narrowm_y_gap",  "rltrate", "gr_rgdp", "gr_cpi",  "er_gap",  "ia_pub", "ia_prb", "ia_jb", "ia_lygr", "ia_pygr", "ia_lyer")
# miscellaneous non-independent
misc.list <- c("b2","b1","b3","rec1","rec2","rec3", "year", "ccode")

loc.misc <- names(train_om) %in% c(misc.list)

# table matrices
out <- matrix(nrow=17, ncol=14)
outLogit <- matrix(nrow=17, ncol=14)

#crisis year dummy
t<-test_full_log$b2
t2<-t
for(i in 1:nrow(as.matrix(t))){
	t2[i+1] <- t[i]-t[i+1]
	}
nrow(as.matrix(t))
nrow(as.matrix(t2))
t2<-t2[1:nrow(as.matrix(t))]
t2[is.na(t2)] <- 0
t2[t2==-1]<-0


# crises
cris <- matrix(nrow=17, ncol=14)
for(i in 1:17){
	for(j in 1:14){
cris[i, j] <- t2[ 14*(i-1)+j]
}
}
##############################################################################################################

## LOGIT
# get formula
location <- names(full_log) %in% c(var.list,"country.factor") # get location of vars
name <- names(full_log[location]) # get names
indep <- paste(name, collapse="+") # indep. variables
dep <- paste("b2~") # dep. variable
fmla <- as.formula(paste(dep, indep)) # get formula
	
# Regression
logit<-glm(fmla, data=train_full_log, family="binomial")

# OOS-analysis
pred<-predict(logit, newdata=test_full_log, type="response") # predicted outcome

true<-test_full_log[,"b2"] # real outcome

r_log<- roc(true,pred,ci=T,direction="<") # ROC analysis
r_log

for(i in 1:17){
	for(j in 1:14){
outLogit[i, j] <- pred[ 14*(i-1)+j]
}
}

outLogit

## RANDOM FOREST
library(randomForest)
library(pROC)

location <- names(train_om) %in% c(misc.list) # get location of dependent var
name.indep <- names(train_om[!location]) # get names of features
indep <- train_om[name.indep]
dep <- factor(train_om[,"b2"]>0) # dep. var.

# grow trees
set.seed(1)
rf= randomForest(indep, y=dep,
 data=train_om,
 ntree=5000,
 replace=T, # bootstrapping (with replacement!)
 mtry=sqrt(ncol(indep)), # all features except dependent variable
 
 cutoff=c(1/2, 1/2), # majority vote: class with maximum ratio of (prop. of votes/cutoff(=1/k)) wins
 sampsize=nrow(train_om), # bootstrapping (comput. more efficient wihtout much loss by using 1/2*train (see Friedman & Hall, 2007))
 nodesize=1 # fully grow trees (experiment to avoid overfitting (see Segal, 2004)); (also see Biau et al., 2012 on consistency)
 ) 
rf

# OOS-analysis
pred <- predict(rf, newdata=test_full_log, type="prob")[,2] # predicted outcome; second column = TRUE probability (votes combined with normvotes=T equals type="prob")

true<-test_full_log[,"b2"] # real outcome

r_rf<-roc(true, pred, ci=T) # ROC analysis
r_rf

# compare ROCs
testobj <- roc.test(r_rf,r_log,method="delong",alternative="greater")
options("scipen"=10)
options()$scipen


for(i in 1:17){
	for(j in 1:14){
out[i, j] <- pred[ 14*(i-1)+j]
}
}

out

save.image("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/case") 

###############################################################
#															GRAPHS													#
###############################################################
load("/Users/felixward/Dropbox/CrisisPrediction/DoFiles/case")

time<-c(1998:2011)
a<-t(time*t(cris))
z<-matrix(nrow=1, ncol=17)
for(i in 1:17){
z[i]<-max(a[i,])
}

b<-rbind(z,z,z,z,z,z,z,z,z,z,z,z,z,z)

c<-seq(0,1,1/13)
r<-c(1998,2011)

pdf('/Users/felixward/Dropbox/CrisisPrediction/Written/case.pdf', width=6, height=6.5)

par(mfrow=c(5,4), bg="white")
par(oma=c(0.1, 0.1, 0.1, 0.1))
par(mar=c(3, 2, 2, 1.5))
for(i in 1:17){
	# risk
	plot(time,out[i,],type="o", lwd=1, xlim=r,ylim=c(0,0.7),col="black",xlab="", ylab="", cex.main=1, cex.lab=1, cex.axis=1, main=c.list[i], font.main=1)
	par(new=T)
	lines(time,outLogit[i,],type="o", pch=22, lty=1, col="gray50")
	par(new=T)
	# vertical crisis bar
	plot(b[,i],c,type="l", lty=1, xlim=r,ylim=c(0,0.7),col="gray75",xlab="", ylab="", cex.main=1, cex.lab=1, cex.axis=1, main="", font.main=1)
}
	plot(c(0,5), c(0,4), axes=F,type="n", xlab="", ylab="", main="")
	legend(0.0005, 3, c("Logit","RF"),  pch=c(22, 1), lty=c(1,1,2), col=c("gray75", "black",1))

dev.off()


