*-------------------------------------------------------------------------------------------------------------
*
*	Prepare File
*	Felix Ward
*	
*
*-------------------------------------------------------------------------------------------------------------


clear all

set more off

version 13

*================= SETTINGS ======================================================================================

* Working directory

*local path C:\Users\Ward\Dropbox\CrisisPrediction\
local path /Users/felixward/Dropbox/CrisisPrediction/


capture log close

infile using "`path'data/text_post70_y_DE.txt", clear

sum

qui{


*__________________________DECLARE PANEL_______________________
n display "Declare panel"
*create numeric countrydata
numlabel _all, add

*declare time series
sort ifs year
order ifs year
drop if ifs==.
drop if year > 2011
egen ccode = group(ifs)

tsset ifs year, yearly

*merge loan variables (hardly any discrepancies at break)
replace loans = loans_std if loans==. & loans_std!=.
drop loans_std


*__________________RE-/DEFLATE___________________
n display "Deflate"

gen cpi = v_gdp/q_gdp

gen gdp = v_gdp/pop
gen gdp_r = q_gdp/pop

gen fliab_r = fliab/cpi
gen stocks_r = stocks/cpi
gen loans_r = loans/cpi

ren pdebt pdebt_y
gen pdebt_r = pdebt_y*q_gdp
gen pdebt = pdebt_r*v_gdp

ren v_i i
gen i_r = i/cpi

ren v_c c
gen c_r = c/cpi

gen nx = v_x - v_m
gen nx_r = nx/cpi



*__________________GDP-RATIOS________________
n display "to-GDP ratios"
*standardized to the mean of 10 in order to avoid problems in the subsequent gap measure calculation
*where low denominator values due to a trend component close to zero would result in extreme indicator values
*which increases the range of the resulting indicator. This doesn't affect RF results, but makes partial dependence
*function calculation very tedious as far more points have to be calculated across the whole range

sum ccode 
mat max = r(max)
local max = max[1,1]

local togdp fliab loans

foreach tgp of local togdp {/*only retain within country level information - not comparable across countries*/
	gen `tgp'_y =.
	gen nostd_`tgp'_y = `tgp'/gdp
	forvalues x = 1/`max'{
		sum nostd_`tgp'_y if ccode==`x'
		mat mean = r(mean)
		mat sd = r(sd)
		local mean = mean[1,1]
		local sd = sd[1,1]
		replace `tgp'_y = (nostd_`tgp'_y-`mean')/`sd' +10 if ccode==`x'
		}
	}

gen i_y = i/v_gdp /*here the level is interpretable*/
gen c_y = c/v_gdp
gen nx_y = nx/v_gdp

drop v_g v_m v_x pop nostd_*



*______________REAL EXCHANGE RATE (rel. to US)________________
n display "Real exchange rate"

sort ccode year

ren xr2 er

*U.S. CPI
gen cpi_us = cpi if ifs==111
egen cpi_usa = min(cpi_us), by(year)						
drop cpi_us

gen nostd_rer = er*(cpi/cpi_usa)

*standardize
local stdize rer 

sum ccode
mat max = r(max)
local max = max[1,1]

foreach stnd of local stdize { /*standardize as not necessarily comparable across countries (base years etc) -> only within country rer level retained*/
	gen `stnd'=.
	forvalues x = 1/`max' {
		sum nostd_`stnd' if ccode==`x'
		mat mean = r(mean)
		mat sd = r(sd)
		local mean = mean[1,1]
		local sd = sd[1,1]
		replace `stnd' = (nostd_`stnd' - `mean')/`sd' + 10 if ccode==`x'
		}
	}
	
replace rer=10 if ifs==111	
drop pl_gdpe


*_______________GENERATE LAGS, GROWTH RATES & LAGGED GROWTH RATES___________________
n display "Lags, growth rates and lagged growth rates"
sort ifs year

local org "er rer cpi gdp_r c_r i_r nx_r fliab loans stocks fliab_r loans_r stocks_r fliab_y loans_y i_y c_y nx_y pdebt_r"
local gro "er_gr rer_gr cpi_gr gdp_r_gr c_r_gr i_r_gr nx_r_gr fliab_gr loans_gr stocks_gr fliab_r_gr loans_r_gr stocks_r_gr fliab_y_gr loans_y_gr i_y_gr c_y_gr nx_y_gr pdebt_r_gr"
local vecsize : word count `org' 

forvalues x = 1/`vecsize' {
 local v`x' : word `x' of `gro'
 local c : word `x' of `org'
 
   forvalues y=0/5 {
   		gen l`y'_`c' = l`y'.`c'
   		}
   	
   gen d_`c' = d.`c'
   gen `v`x'' = d.`c'/l1_`c'		
   forvalues z=0/5 {
   		gen l`z'_`v`x'' = l`z'.`v`x''
   		}
   	}

*___________REAL INTEREST RATES____________
n display "Real interest rates and interest rate differentials"

gen stir = mmrate
gen ltrate = ltbondrate
gen stir_d = d.stir
gen ltrate_d = d.ltrate
gen tbillrate_d = d.tbillrate
gen mortrate_d = d.mortrate
*generate aggregate ST and LT variables
replace stir_d = tbillrate_d if stir_d ==.
replace ltrate_d = mortrate_d if ltrate_d==.
replace stir = l.stir + stir_d if (stir==. &l.stir!=.&stir_d!=.)
replace ltrate = l.ltrate + ltrate_d if (ltrate==. &l.ltrate!=.&ltrate_d!=.)

gen stir_r = stir - cpi_gr
gen ltrate_r = ltrate - cpi_gr

drop tbillrate* mortrate* mmrate ltbondrate


*____________INTEREST RATE DIFFERENTIALS (compared to global average)__________
*build numerator and denominator terms
local rates stir ltrate stir_r ltrate_r 

foreach v of local rates {

	gen denom`v' = .
	gen num`v' =.
	
	forvalues y = 1970/2011 {

		egen denom_`y' = count(`v') if(year==`y')
		egen num_`y' = total(`v') if(year==`y'), missing
		replace denom`v' = denom_`y' if (year==`y')
		replace num`v' = num_`y' if (year==`y')
		drop denom_`y' num_`y'
		
						}
	*world averages						
	gen dif_a_`v' = num`v'/denom`v'
	
	gen dif_`v' = `v' - dif_a_`v'
	
	*drop 
	drop num`v' denom`v' dif_a_`v'
														
						}




*__________GAPS = HP-CYCLE - HP-TREND_____________
* hprescott cannot deal with missing values -> each spell is detrended separately
local det dif_stir dif_ltrate dif_stir_r dif_ltrate_r stir ltrate stir_r ltrate_r er rer cpi fliab loans stocks gdp i c nx fliab_r loans_r stocks_r gdp_r i_r c_r nx_r fliab_y loans_y pdebt_y i_y c_y nx_y pdebt_r
local irates dif_stir dif_ltrate dif_stir_r dif_ltrate_r stir ltrate stir_r ltrate_r
local other er rer cpi fliab loans stocks gdp i c nx fliab_r loans_r stocks_r gdp_r i_r c_r nx_r fliab_y loans_y pdebt_y i_y c_y nx_y pdebt_r


include `path'DoFiles/detrend_mt.do /* short-term cycles at business cycle frequency */

	* DEVIATIONS
	foreach v of local other{ /*% - deviations for other level variables*/
		** HP-Filter
		gen `v'_gap = cyc_`v'/tr_`v'*100 if(tr_`v'>0)
		replace `v'_gap = cyc_`v'/(-tr_`v')*100 if(tr_`v'<0) /* mind neg. trend values! */
		** BK-Filter
		gen `v'_mtbkgap = bk_cyc_`v'/bk_tr_`v'*100 if(bk_tr_`v'>0)
		replace `v'_mtbkgap = bk_cyc_`v'/(-bk_tr_`v')*100 if(bk_tr_`v'<0) /* mind neg. trend values! */
		** CF-Filter
		gen `v'_mtcfgap = cf_cyc_`v'/cf_tr_`v'*100 if(cf_tr_`v'>0)
		replace `v'_mtcfgap = cf_cyc_`v'/(-cf_tr_`v')*100 if(cf_tr_`v'<0) /* mind neg. trend values! */
		}
	
	foreach v of local irates{ /*ppts - deviations for interest rate variables*/
		** HP-Filter
		gen `v'_gap = cyc_`v'
		** BK-Filter
		gen `v'_mtbkgap = bk_cyc_`v'
		** CF-Filter
		gen `v'_mtcfgap = cf_cyc_`v'
		}		
		
	** drop helvars
	drop *cyc_* *tr_* *end *seq *spell hmseq* mhp* mseq*


*_____________GLOBAL GDP-WEIGHED AVERAGES, all vars, no std_cpi______________
n display "Global indicators"

bys year: egen m_gdp = mean(q_gdp)
sort ccode year

gen w = q_gdp/m_gdp
drop v_gdp q_gdp

*build (weighed) numerator and denominator terms
local glob stir ltrate stir_r ltrate_r pdebt_r pdebt_y fliab_y loans_y i_y c_y nx_y gdp_r_gr i_r_gr c_r_gr nx_r_gr cpi_gr stocks_r_gr fliab_r_gr loans_r_gr pdebt_r_gr fliab_y_gr loans_y_gr c_y_gr i_y_gr nx_y_gr gdp_r_gap i_r_gap c_r_gap nx_r_gap stir_gap ltrate_gap stir_r_gap ltrate_r_gap stocks_r_gap fliab_r_gap loans_r_gap pdebt_r_gap pdebt_y_gap fliab_y_gap loans_y_gap i_y_gap c_y_gap nx_y_gap
			
foreach v of local glob {

	gen denom`v' = .
	gen num`v' =.
	
	forvalues y = 1966/2011 {

		egen denom_`y' = count(`v') if(year==`y')
		egen num_`y' = total(`v'*w) if(year==`y'), missing
		replace denom`v' = denom_`y' if (year==`y')
		replace num`v' = num_`y' if (year==`y')
		drop denom_`y' num_`y'
		
							}
	*gdp-weighted world averages						
	gen a_`v' = num`v'/denom`v'
	
	*drop 
	drop num`v' denom`v'
														
						}

*drop
drop w 



*________________RECESSION HORIZONS________________
n display "Recession dummies"

sort ifs year 
gen rec = 0
replace rec = 1 if (gdp_r_gr<0 & l.gdp_r_gr>0)
replace rec =. if (l.gdp_r_gr==. | gdp_r_gr==.)

foreach v of varlist rec {

	gen `v'0 = `v'

	gen `v'1 = `v'0
	replace `v'1 = 1 if(f.`v'0==1)
	replace `v'1 = 0 if(`v'0==1)

	gen `v'2 = `v'1
	replace `v'2 = 1 if(f.`v'1==1)

	gen `v'3 = `v'2
	replace `v'3 = 1 if(f.`v'2==1)
	
	drop `v'
}


*________________CRISES HORIZONS________________
*Laeven, 2013 systemic crises
n display "Crisis dummies"

sort ifs year 
foreach v of varlist crisisL {

	gen `v'0 = `v'

	gen `v'1 = `v'0
	replace `v'1 = 1 if(f.`v'==1)
	replace `v'1 = 0 if(`v'==1)

	gen `v'2 = `v'1
	replace `v'2 = 1 if(f.`v'1==1)

	gen `v'3 = `v'2
	replace `v'3 = 1 if(f.`v'2==1)
	
	drop `v'
}



*give shorter names to avoid variable name limit
forvalues x = 0/3 {
	ren crisisL`x' b`x'
	}

n display "Save prepared dataset"
*outsheet _all using "`path'Data/ExcelExt.csv", comma replace


*R classification tree
local allAEDE dif_stir dif_ltrate dif_stir_r dif_ltrate_r ltrate ///
	stir ltrate_r stir_r fliab_y loans_y ///
	pdebt_y er_gr gdp_r_gr cpi_gr stocks_r_gr ///
	fliab_r_gr loans_r_gr pdebt_r_gr fliab_y_gr a_ltrate_gap ///
	loans_y_gr er_gap gdp_r_gap stir_gap ltrate_gap ///
	stir_r_gap stocks_r_gap fliab_r_gap loans_r_gap pdebt_r_gap ///
	pdebt_y_gap fliab_y_gap ltrate_r_gap a_pdebt_y_gap rer ///
	loans_y_gap a_stir a_ltrate a_stir_r a_ltrate_r ///
	a_pdebt_y rer_gr a_fliab_y a_loans_y a_gdp_r_gr /// 
	a_cpi_gr a_stocks_r_gr a_fliab_r_gr a_loans_r_gr a_pdebt_r_gr /// 
	a_fliab_y_gr a_loans_y_gr a_gdp_r_gap a_stir_gap a_stir_r_gap ///
	a_ltrate_r_gap a_stocks_r_gap a_fliab_r_gap a_loans_r_gap a_pdebt_y_gap ///
	rer_gap a_fliab_y_gap a_loans_y_gap er ///
	c_y i_y nx_y c_r_gr i_r_gr ///
	nx_r_gr c_y_gr i_y_gr nx_y_gr c_r_gap ///
	i_r_gap nx_r_gap c_y_gap i_y_gap nx_y_gap ///
	a_c_y a_i_y a_nx_y a_c_r_gr a_i_r_gr ///
	a_nx_r_gr a_c_y_gr a_i_y_gr a_nx_y_gr a_c_r_gap ///
	a_i_r_gap a_nx_r_gap a_c_y_gap a_i_y_gap a_nx_y_gap ///	
	b2 b3 b1 rec1 rec2 rec3 year ccode AE DE QU big ME


outsheet `allAEDE' using "`path'Data/R_class_post70_y_AEDE.csv", comma replace

}
