*-------------------------------------------------------------------------------------------------------------
*
*	Prepare File
*	Felix Ward
*	
*
*-------------------------------------------------------------------------------------------------------------


clear all

set more off

version 12

*working directory
local path /Users/felixward/Dropbox/CrisisPrediction/
*local path C:\Users\Ward\Dropbox\CrisisPrediction\


capture log close

infile using "`path'data/text_longrun.txt", clear

sum

qui{


*__________________________DECLARE TIME SERIES_______________________
n display "Declare dynamic panel structure"
*create numeric countrydata
encode iso, gen(countrynum)
numlabel _all, add

*drop iso

*declare time series
tsset countrynum year, yearly

drop if year > 2011

*use new tloans series
drop loans1
ren tloans loans1

*_________ SINGLE DENOMINATION FOR GERMAN SERIES ____________
n display "Renominate German Series"

local ger loans1 bassets2 narrowm money gdp ca imports exports

foreach v of local ger {
	replace `v' = `v'/10^12 * 0.65 if(iso=="DEU" & year <1924) /*old RM to DM*/
	replace `v' = `v'*0.65 if(iso=="DEU" & year >= 1924 & year < 1948) /*new RM/Rent.M to DM*/
	}

*________________GDP_________________
n display "Deflate"

*deflate and make it per capita (gdp series nominal GDP)
gen rgdp = gdp/cpi/pop

*________INVESTMENT__________
ren iy i_y
gen i=i_y*gdp
*deflate
gen ri = i/cpi


*________CONSUMPTION_________
ren rconsbarro rc
*reflate
gen c = rc*cpi


*________________STOCK PRICE DATA_________________
*deflate
gen rstocks = stocks/cpi


*________________PUBLIC DEBT__________________
*multiply with maddisson gdp (variable "madd") (not p.C); no need to deflate as gdp series already deflated
ren debtgdp pdebt
gen rpubdebt=pdebt*rgdpmad


*__________________OTHER - DEFLATE___________________
gen rca = ca/cpi
gen rloans1 = loans1/cpi
gen rbassets2 = bassets2/cpi
gen rmoney = money/cpi
gen rnarrowm = narrowm/cpi

*__________________GDP-RATIOS________________
n display "to-GDP ratios"
*standardized to the mean of 10 in order to avoid problems in the subsequent gap measure calculation
*where low denominator values due to a trend component close to zero would result in extreme indicator values
*which increases the range of the resulting indicator. This doesn't affect RF results, but makes partial dependence
*function calculation very tedious as far more points have to be calculated across the whole range

gen c_y = c/gdp
gen ca_y = ca/gdp

sum ccode
mat max = r(max)
local max = max[1,1]

local togdp bassets2 loans1 money narrowm 

foreach tgp of local togdp {
	gen `tgp'_y=.
	gen nostd_`tgp'_y = `tgp'/gdp
	
	forvalues x = 1/`max' {
		sum nostd_`tgp'_y if ccode==`x'
		mat mean = r(mean)
		mat sd = r(sd)
		local mean = mean[1,1]
		local sd = sd[1,1]
		replace `tgp'_y = (nostd_`tgp'_y - `mean')/`sd' + 10 if ccode==`x'
		}
	}
	
	drop nostd_*
	
*______________REAL EXCHANGE RATE (rel. to US)________________
n display "Real exchange rate"

ren xrusd er

*U.S. CPI
gen cpi_us = cpi if iso=="USA"
egen cpi_usa = min(cpi_us), by(year)						
drop cpi_us

gen nostd_rer = er*(cpi/cpi_usa)

*standardize
local stdize rer 

sum ccode
mat max = r(max)
local max = max[1,1]

foreach stnd of local stdize { /*standardize as not necessarily comparable across countries (base years etc) -> only within country rer level retained*/
	gen `stnd'=.
	forvalues x = 1/`max' {
		sum nostd_`stnd' if ccode==`x'
		mat mean = r(mean)
		mat sd = r(sd)
		local mean = mean[1,1]
		local sd = sd[1,1]
		replace `stnd' = (nostd_`stnd' - `mean')/`sd' + 10 if ccode==`x'
		}
	}
	
replace rer=10 if iso=="USA"	
	


*_______________GENERATE LAGS, GROWTH RATES & LAGGED GROWTH RATES___________________
n display "Lags, growth rates and lagged growth rates"

local org "rer er rgdp rc ri rca cpi rnarrowm rmoney rstocks rbassets2 rloans1 rpubdebt c_y i_y ca_y bassets2_y loans1_y money_y narrowm_y"
local gro "gr_rer gr_er gr_rgdp gr_rc gr_ri gr_rca gr_cpi gr_rnarrowm gr_rmoney gr_rstocks gr_rbassets2 gr_rloans1 gr_rpubdebt gr_c_y gr_i_y gr_ca_y gr_bassets2_y gr_loans1_y gr_money_y gr_narrowm_y"
local vecsize : word count `org' 

forvalues x = 1/`vecsize' {
 local v`x' : word `x' of `gro'
 local c : word `x' of `org'
 
   forvalues y=0/5 {
   		gen l`y'_`c' = l`y'.`c'
   		}
   	
   gen d_`c' = d.`c'
   gen `v`x'' = d.`c'/l1_`c'		
   forvalues z=0/5 {
   		gen l`z'_`v`x'' = l`z'.`v`x''
   		}
   	}

*___________REAL INTEREST RATES____________
n display "Real interest rates and interest rate differentials"

gen rstir = stir - gr_cpi
gen rltrate = ltrate - gr_cpi

*____________INTEREST RATE DIFFERENTIALS (compared to global average)__________
*build numerator and denominator terms
local rates stir ltrate rstir rltrate 

foreach v of local rates {

	gen denom`v' = .
	gen num`v' =.
	
	forvalues y = 1870/2011 {

		egen denom`y' = count(`v') if(year==`y')
		egen num`y' = total(`v') if(year==`y'), missing
		replace denom`v' = denom`y' if (year==`y')
		replace num`v' = num`y' if (year==`y')
		drop denom`y' num`y'
		
							}
	*world averages						
	gen dif_a_`v' = num`v'/denom`v'
	
	gen dif_`v' = `v' - dif_a_`v'
	
	*drop 
	drop num`v' denom`v' dif_a_`v'
														
						}




*__________GAPS = HP-CYCLE - HP-TREND_____________
* hprescott cannot deal with missing values -> each spell is detrended separately
local det dif_stir dif_ltrate dif_rstir dif_rltrate rer er rgdp rc ri rca stir ltrate rstir rltrate rnarrowm rmoney rstocks rbassets2 rloans1 rpubdebt pdebt c_y i_y ca_y bassets2_y loans1_y money_y narrowm_y
local irates ltrate stir dif_stir dif_ltrate rstir rltrate dif_rstir dif_rltrate
local other rer er rgdp rc ri rca rnarrowm rmoney rstocks rbassets2 rloans1 rpubdebt pdebt c_y i_y ca_y bassets2_y loans1_y money_y narrowm_y

include `path'DoFiles/detrend_mt.do /* short-term cycles at business cycle frequency */

	* DEVIATIONS
	foreach v of local other{ /*% - deviations for other level variables*/
		** HP-Filter
		gen `v'_gap = cyc_`v'/tr_`v'*100 if(tr_`v'>0)
		replace `v'_gap = cyc_`v'/(-tr_`v')*100 if(tr_`v'<0) /* mind neg. trend values! */
		** BK-Filter
		gen `v'_mtbkgap = bk_cyc_`v'/bk_tr_`v'*100 if(bk_tr_`v'>0)
		replace `v'_mtbkgap = bk_cyc_`v'/(-bk_tr_`v')*100 if(bk_tr_`v'<0) /* mind neg. trend values! */
		** CF-Filter
		gen `v'_mtcfgap = cf_cyc_`v'/cf_tr_`v'*100 if(cf_tr_`v'>0)
		replace `v'_mtcfgap = cf_cyc_`v'/(-cf_tr_`v')*100 if(cf_tr_`v'<0) /* mind neg. trend values! */
		}
	
	foreach v of local irates{ /*ppts - deviations for interest rate variables*/
		** HP-Filter
		gen `v'_gap = cyc_`v'
		** BK-Filter
		gen `v'_mtbkgap = bk_cyc_`v'
		** CF-Filter
		gen `v'_mtcfgap = cf_cyc_`v'
		}		
		
	** drop helvars
	drop *cyc_* *tr_* *end *seq *spell hmseq* mhp* mseq*




*_____________GLOBAL GDP-WEIGHED AVERAGES, all vars, no std_cpi______________
n display "Global indicators"

*GDP-weights, using barro&ursua p.C gdp * maddison population level for weights as Schularick&Taylor GDP is incomparable across countries:
gen rgdpba = pop*rgdpbarro

bys year: egen mrgdpba = mean(rgdpba)
sort countrynum year

gen w = rgdpba/mrgdpba

*build (weighed) numerator and denominator terms
local glob stir ltrate rstir rltrate rpubdebt pdebt c_y i_y ca_y bassets2_y loans1_y money_y narrowm_y gr_rgdp gr_rc gr_ri gr_rca gr_cpi gr_rnarrowm gr_rmoney gr_rstocks gr_rbassets2 gr_rloans1 gr_rpubdebt gr_c_y gr_i_y gr_ca_y gr_bassets2_y gr_loans1_y gr_money_y gr_narrowm_y rgdp_gap rc_gap ri_gap rca_gap stir_gap ltrate_gap rstir_gap rltrate_gap rnarrowm_gap rmoney_gap rstocks_gap rbassets2_gap rloans1_gap rpubdebt_gap pdebt_gap c_y_gap i_y_gap ca_y_gap bassets2_y_gap loans1_y_gap money_y_gap narrowm_y_gap

foreach v of local glob {

	gen denom`v' = .
	gen num`v' =.
	
	forvalues y = 1870/2011 {

		egen denom`y' = count(`v') if(year==`y')
		egen num`y' = total(`v'*w) if(year==`y'), missing
		replace denom`v' = denom`y' if (year==`y')
		replace num`v' = num`y' if (year==`y')
		drop denom`y' num`y'
		
							}
	*gdp-weighted world averages						
	gen a_`v' = num`v'/denom`v'
	
	*drop 
	drop num`v' denom`v'
														
						}

*drop
drop w 


*________________RECESSION HORIZONS________________
n display "Recession dummies"

gen rec = 0
replace rec = 1 if (gr_rgdp<0 & l.gr_rgdp>0)
replace rec =. if (l.gr_rgdp==. | gr_rgdp==.)

foreach v of varlist rec {

	gen `v'0 = `v'

	gen `v'1 = `v'0
	replace `v'1 = 1 if(f.`v'0==1)
	replace `v'1 = 0 if(`v'0==1)

	gen `v'2 = `v'1
	replace `v'2 = 1 if(f.`v'1==1)

	gen `v'3 = `v'2
	replace `v'3 = 1 if(f.`v'2==1)
	
	drop `v'
}


*________________CRISES HORIZONS________________
*Schularick & Taylor, 2011/13 systemic crises
n display "Crisis dummies"

foreach v of varlist crisisJST RRcrisis {

	gen `v'0 = `v'

	gen `v'1 = `v'0
	replace `v'1 = 1 if(f.`v'==1)
	replace `v'1 = 0 if(`v'==1)

	gen `v'2 = `v'1
	replace `v'2 = 1 if(f.`v'1==1)

	gen `v'3 = `v'2
	replace `v'3 = 1 if(f.`v'2==1)
	
	drop `v'
}



*give shorter names to avoid variable name limit
forvalues x = 0/3 {
	ren crisisJST`x' b`x'
	}

n display "Save prepared dataset"
save "`path'Data/prepared.dta", replace

*outsheet _all using "`path'Data/ExcelExt.csv", comma replace

*R classification tree
local all dif_stir dif_ltrate dif_rstir dif_rltrate a_gr_i_y ///
ltrate stir rltrate rstir ///
c_y i_y ca_y bassets2_y loans1_y ///
pdebt gr_er gr_rgdp gr_rc ///
gr_ri gr_rca gr_cpi gr_rnarrowm ///
gr_rmoney gr_rstocks gr_rbassets2 gr_rloans1 gr_rpubdebt ///
gr_c_y gr_ca_y gr_bassets2_y gr_loans1_y er_gap ///
rgdp_gap rc_gap ri_gap rca_gap ///
stir_gap ltrate_gap rstir_gap rnarrowm_gap ///
rmoney_gap rstocks_gap rbassets2_gap rloans1_gap rpubdebt_gap ///
pdebt_gap c_y_gap i_y_gap ca_y_gap bassets2_y_gap ///
loans1_y_gap narrowm_y_gap money_y_gap a_stir a_ltrate a_rstir ///
a_rltrate  a_pdebt a_c_y a_i_y gr_rer ///
a_ca_y a_bassets2_y a_loans1_y a_gr_rgdp a_gr_rc ///
a_gr_ri a_gr_rca a_gr_cpi a_gr_rmoney ///
a_gr_rstocks a_gr_rbassets2 a_gr_rloans1 a_gr_rpubdebt a_gr_c_y ///
a_gr_ca_y a_gr_bassets2_y a_gr_loans1_y a_rgdp_gap a_rc_gap ///
a_ri_gap a_rca_gap a_stir_gap a_rstir_gap ///
a_rltrate_gap a_rnarrowm_gap a_rmoney_gap a_rstocks_gap a_rbassets2_gap  ///
a_rloans1_gap  a_pdebt_gap a_c_y_gap a_i_y_gap rer_gap ///
a_ca_y_gap a_bassets2_y_gap a_loans1_y_gap er rer ///
rltrate_gap a_gr_rnarrowm a_ltrate_gap a_gr_money_y a_money_y_gap ///
a_pdebt_gap money_y gr_money_y money_y_gap a_money_y ///
b2 b3 b1 rec1 rec2 rec3 year ccode
 

local RRdummy dif_stir dif_ltrate dif_rstir dif_rltrate a_gr_i_y ///
ltrate stir rltrate rstir ///
c_y i_y ca_y bassets2_y loans1_y ///
pdebt gr_er gr_rgdp gr_rc ///
gr_ri gr_rca gr_cpi gr_rnarrowm ///
gr_rmoney gr_rstocks gr_rbassets2 gr_rloans1 gr_rpubdebt ///
gr_c_y gr_ca_y gr_bassets2_y gr_loans1_y er_gap ///
rgdp_gap rc_gap ri_gap rca_gap ///
stir_gap ltrate_gap rstir_gap rnarrowm_gap ///
rmoney_gap rstocks_gap rbassets2_gap rloans1_gap rpubdebt_gap ///
pdebt_gap c_y_gap i_y_gap ca_y_gap bassets2_y_gap ///
loans1_y_gap narrowm_y_gap money_y_gap a_stir a_ltrate a_rstir ///
a_rltrate  a_pdebt a_c_y a_i_y gr_rer ///
a_ca_y a_bassets2_y a_loans1_y a_gr_rgdp a_gr_rc ///
a_gr_ri a_gr_rca a_gr_cpi a_gr_rmoney ///
a_gr_rstocks a_gr_rbassets2 a_gr_rloans1 a_gr_rpubdebt a_gr_c_y ///
a_gr_ca_y a_gr_bassets2_y a_gr_loans1_y a_rgdp_gap a_rc_gap ///
a_ri_gap a_rca_gap a_stir_gap a_rstir_gap ///
a_rltrate_gap a_rnarrowm_gap a_rmoney_gap a_rstocks_gap a_rbassets2_gap  ///
a_rloans1_gap  a_pdebt_gap a_c_y_gap a_i_y_gap rer_gap ///
a_ca_y_gap a_bassets2_y_gap a_loans1_y_gap er rer ///
rltrate_gap a_gr_rnarrowm a_ltrate_gap a_gr_money_y a_money_y_gap ///
a_pdebt_gap money_y gr_money_y money_y_gap a_money_y ///
RRcrisis2 RRcrisis3 RRcrisis1 rec1 rec2 rec3 year ccode 



outsheet `all' using "`path'Data/R_class.csv", comma replace
outsheet `RRdummy' using "`path'Data/R_class_RR.csv", comma replace



}
