******************************************************
* CREATES TABLE 5 IN 'FUELING CONFLICT' 
* Estimated transition probabilities and state dependence
*****************************************************


************** preliminaries **************
clear *
global reps = 999 // bootstrap iterations, paper = 999
set maxiter 50 // max NR iterations for one estimation
parallel setclusters 30, force // set to number of cores, alters seeds used
set more off, perm

************** Programs **************
/*
ssc install gtools
gtools, upgrade
ssc install parallel
*/

**************  set working dir **************

*add your working directory
* cd 


** code for generic bootstrap program

* define 3-step bootstrap and tempvars neeeed
program threestep_boot, eclass 

	* allow changing dep var and adding controls
	syntax , Y(varname numeric) [Z(varlist numeric) initrends]
	tempname aa bb cc ss conv
	local initial_trends `initrends'

	* check if parallel execution was aborted
	parallel break
	
	* moved in here in case parallel does  not export this parameter
	set maxiter 50 // max NR iterations for one estimation

	* allow going back to bilateral structure
	preserve
	
	* re-define panel
	gegen newpair = group(newid don_id)
	xtset newpair year
	
	*** predict aid bilaterally
	xtreg netoda_gdp c.gfrac##c.prob_recddon c.frac_ukc##c.prob_recddon i.year, fe
	predict netoda_frac_ukc, xbu
	
	* aggregate to country-year panel
	bys newid year: gegen aggnetoda_gdp=total(netoda_gdp), missing  
	by newid year: gegen agghatnetoda_gdp=total(netoda_frac_ukc), missing 
	keep if doncode == doncode[1] // works like collapse
	drop don_id doncode

	* mark the sample to use and drop rest 
	mark touse
	markout touse `y' aggnetoda_gdp agghatnetoda_gdp `z'
	drop if !touse

	* reset xt to panel bootstrap handle
	xtset newid year
	
	* balance sample to max Ti
	by newid (year): gen Ti = _N  
	qui sum Ti // fine now
	keep if Ti== `r(max)'

	* generate lagged conflict states
	gen lagged_o_2 = (l.`y' == 1)
	gen lagged_o_3 = (l.`y' == 2)
	gen lagged_o_4 = (l.`y' == 3)

	* generate initial year var and initial state dummies
	by newid (year): gegen inityear = min(year)  
	gen initcondtemp = `y' if year==inityear
	by newid(year) : gegen init = max(initcondtemp)
	drop initcondtemp
	qui tab init, gen(init_)  
	* note that we have few initial war observations, thus in some 
	* bootstrap iterations init_4 will be empty (not found)
	drop init_1 init // will always be omitted, second is tempvar

	* generate T-2 year dummies 
	qui sum year
	// first year will always drop out given condition below, second year is base
	forv i = `=r(min)+2'(1)`=r(max)' {
		gen y_`i' =  (year==`i')
	}

	if "`initial_trends'" != "" {
		gen init2_X_year = init_2*year
		gen init3_X_year = init_3*year
		gen init4_X_year = init_4*year	
		local init_first init_2 init_3 init_4 init2_X_year init3_X_year init4_X_year
		local trends_main init2_X_year init3_X_year init4_X_year
	}
	
	
	* generate z_i vector including controls and IV
	local allvars agghatnetoda_gdp `z' // put other control vars here 
	foreach var in `allvars' {
		by newid (year): gegen m_`var' = mean(`var') if year>inityear 
		qui sum year // not first year, gets omitted otherwise
		forv i = `=r(min)+1'(1)`r(max)' {
			by newid (year): gen temp_z_`var'_`i' = `var' if year==`i'
			by newid (year): gegen z_`var'_`i' = max(temp_z_`var'_`i')
			drop  temp_z_`var'_`i'
		}
	}
	
	* panel set and sort again
	*xtset 

	*** first stage, equiv to xtreg, fe
	reg aggnetoda_gdp agghatnetoda_gdp `z' `init_first' m_* y_1977-y_2010 if year>inityear, cluster(newid)
	local NT = e(N) // needed for later to replace "wrong N" from bilateral sample
	local N = e(N_clust)
	predict nu, resid 

	* generate averages and nu_i vector
	by newid (year): gegen m_nu = mean(nu) if year>inityear 
	sum year // not first year
	forv t = `=r(min)+1'(1)`r(max)' {
		by newid (year): gen temp_z_nu_`t' = nu if year==`t'
		by newid (year): gegen z_nu_`t' = max(temp_z_nu_`t')
		replace z_nu_`t' = 0 if missing(z_nu_`t') // needed?
		drop  temp_z_nu_`t'
	}

	*** main model
	* constrained version: averages plus first few years separately
	* nb: include controls after nu ...
	xtoprobit `y' aggnetoda_gdp nu ///
		c.aggnetoda_gdp#1.lagged_o_2 c.aggnetoda_gdp#1.lagged_o_3 ///
		c.aggnetoda_gdp#1.lagged_o_4 lagged_o_2-lagged_o_4 init_2-init_4 ///
		`z' `trends_main' m_* z_*_197* y_1977-y_2010  if year>inityear, i(newid) 
	
	* save convergence results, needed for BS rejections	
	scalar `conv' = e(converged)
	
	* save coefficients
	mat `aa' = e(b)
	* count relevant output size
	local n1 : word count `z'
	local n2 : word count `trends_main'

	mat `aa' = `aa'[1,1..`=11+`n1'+`n2''] // save only relevant coef, not CRE stuff

	* init matrix of transition probabilities
	mat `ss' = J(4,4,.)
	
	*** get probs and GSD
	
		replace lagged_o_2 = 0 
		replace lagged_o_3 = 0 
		replace lagged_o_4 = 0 
		
		margins, expression(normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose
		mat `ss'[1,1] = r(b)
		margins, expression( normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[1,2] = r(b)
		margins, expression( normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[1,3] = r(b)
		margins, expression(1-normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose
		mat `ss'[1,4] = r(b)

		
		replace lagged_o_2 = 1 
		replace lagged_o_3 = 0 
		replace lagged_o_4 = 0 
		
		margins, expression(normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose	
		mat `ss'[2,1] = r(b)
		margins, expression( normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[2,2] = r(b)
		margins, expression( normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[2,3] = r(b)
		margins, expression(1-normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose
		mat `ss'[2,4] = r(b)		
			
		replace lagged_o_2 = 0 
		replace lagged_o_3 = 1 
		replace lagged_o_4 = 0 
		
		margins, expression(normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose		
		mat `ss'[3,1] = r(b)
		margins, expression( normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[3,2] = r(b)
		margins, expression( normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[3,3] = r(b)
		margins, expression(1-normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose
		mat `ss'[3,4] = r(b)
		
		replace lagged_o_2 = 0 
		replace lagged_o_3 = 0 
		replace lagged_o_4 = 1 
		
		margins, expression(normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose		
		mat `ss'[4,1] = r(b)
		margins, expression( normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut1:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[4,2] = r(b)
		margins, expression( normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))  - ///
			normal(((_b[cut2:_cons] - xb())/sqrt(1 + e(sigma_u)^2))) ) force nose
		mat `ss'[4,3] = r(b)
		margins, expression(1-normal(((_b[cut3:_cons] - xb())/sqrt(1 + e(sigma_u)^2)))) force nose
		mat `ss'[4,4] = r(b)
		
	** eof probs and GSD components
	
	* calc GSD with weights	
	mata: S = st_matrix("`ss'")
	mata: S = diagonal(S)' - colsum(S:*R)
	mata: st_matrix("`cc'", S)	
	
	* post the main coefficients as e-class results
	ereturn post `aa' 
	
	* post estimated markov matrix as phat_ij's in e-class scalars
	forv i = 0(1)3 {
		forv j = 0(1)3 {
			ereturn scalar phat_`i'`j'=`ss'[`=`i'+1',`=`j'+1']
		}
	}
	
	* post the gsd estimates
	ereturn scalar gsd_0=`cc'[1,1]
	ereturn scalar gsd_1=`cc'[1,2]
	ereturn scalar gsd_2=`cc'[1,3]
	ereturn scalar gsd_3=`cc'[1,4]

	* return e-class recording if iteration converged
	ereturn scalar converged =`conv'
	ereturn scalar NgT = `NT'
	ereturn scalar Ng = `N'	
	ereturn scalar T = `=`NT'/`N''

	* go back to bilateral structure
	restore
end

** work on panel data for transition probs

* use data and set sample to max balanced given variables
use ./data/balance_sample28, clear
drop y_*

* mark the sample to use and drop rest 
mark touse
markout touse conflict_pb aggnetoda_gdp agghatnetoda_gdp  ln_gdp ln_pop
drop if !touse

* balance sample to max Ti
bys rec_id (year): gen Ti = _N 
tab Ti

keep if Ti==36
tab year

* get transitions

* can't save results with xttrans
xttrans conflict_pb  

* reproduce using tab
gen l_conflict_pb = l.conflict_pb
tab l_conflict_pb conflict_pb,  ro nof matcell(T)

* turn frequencies into probabilities
mata: 
T = st_matrix("T")
U = J(1,cols(T),1)
c = (U*T')
T = T:/c' * 100
st_matrix("T", T)
end

* add CJ style weights, just simple classes	
tab  conflict_pb,  matcell(CJ)

mat li CJ

* create weights for state dependence calculation
* delete diagonal, sum, divide by sum gives omega matrix
* do the same for the CJ style weights 
mata:
R = T/100 - diag(T/100)
O = R:/colsum(R)
O

CJ = st_matrix("CJ")
CJ = CJ,CJ,CJ,CJ
R = CJ:/colsum(CJ) - diag(CJ:/colsum(CJ))
R = R:/colsum(R)
R

end

** open bilateral data
use ./data/AiC_all_bootstrap.dta, clear
xtset recdon_id year

* set the seed only once, parallel will take it from there
set seed 10101

* start timer
timer clear 1
timer on 1

* clear current panel setting
xtset, clear

* call bootstrap in parallel and store all results
eststo: parallel bs, expression(_b ///
	phat_00=e(phat_00) phat_01=e(phat_01) phat_02=e(phat_02) phat_03=e(phat_03) ///
	phat_10=e(phat_10) phat_11=e(phat_11) phat_12=e(phat_12) phat_13=e(phat_13) ///
	phat_20=e(phat_20) phat_21=e(phat_21) phat_22=e(phat_22) phat_23=e(phat_23) ///
	phat_30=e(phat_30) phat_31=e(phat_31) phat_32=e(phat_32) phat_33=e(phat_33) ///
	gsd_0=e(gsd_0) gsd_1=e(gsd_1) gsd_2=e(gsd_2) gsd_3=e(gsd_3)) ///
	nowarn reps($reps) reject(e(converged)==0) cluster(rec_id) ///
	idcluster(newid) randtype(current) mata: threestep_boot, /// 
	y(conflict_pb) z(ln_pop ln_gdp)
di r(pll_seeds) // for log file

* calculate the conditonional transition matrix
mat A= J(4,4,.)
forv i = 0(1)3 {
	forv j = 0(1)3 {
		mat A[`=`i'+1',`=`j'+1'] = _b[_eq2:phat_`i'`j']
	}
}

mat li A

* add a matrix of SEs 
mat B = J(4,4,.)
forv i = 0(1)3 {
	forv j = 0(1)3 {
		mat B[`=`i'+1',`=`j'+1'] = _se[_eq2:phat_`i'`j']
	}
}

mat T = (A[1...,1] , B[1...,1]), (A[1...,2] , B[1...,2]), ///
        (A[1...,3] , B[1...,3]), (A[1...,4] , B[1...,4])
mat T = 100*T

mat li T

* mat of significance stars 
local bc = rowsof(T)
local cc = colsof(T)

matrix stars = J(`bc',`cc',0)
forvalues k = 1/`bc' {
	forvalues j = 1(2)`cc' {
		matrix stars[`k',`j'] =   ///
		(abs(T[`k',`j']/T[`k',`=`j'+1']) > abs(invnormal(0.1/2))) + ///
		(abs(T[`k',`j']/T[`k',`=`j'+1']) > abs(invnormal(0.05/2))) + ///
		(abs(T[`k',`j']/T[`k',`=`j'+1']) > abs(invnormal(0.01/2)))
	}
}

frmttable using ./tables/Table_5, statmat(T) annotate(stars) asymbol(*,**,***) substat(1) sdec(4) ///
	title("Conditional Markov Transition Matrix") ///
	ctitle("From","Peace", "Small Conflict","Armed Conflict", "Civil War" ) ///
	rtitle("Peace" \  "" \ "Small Conflict"\ "" \  "Armed Conflict" \  "" \ "Civil War" \ "") ///
         tex varlabels  replace

* get GSD
mat C = J(2,4,.)
forv i = 0(1)3 {
	mat C[1,`=`i'+1'] = _b[_eq2:gsd_`i'] * 100
	mat C[2,`=`i'+1'] = _se[_eq2:gsd_`i'] * 100
}

mat C = C[1,1], C[2,1], C[1,2], C[2,2], C[1,3], C[2,3], C[1,4], C[2,4]

* mat of significance stars 
local bc = rowsof(C)
local cc = colsof(C)

matrix stars = J(`bc',`cc',0)
forvalues k = 1/`bc' {
	forvalues j = 1(2)`cc' {
		matrix stars[`k',`j'] =   ///
		(abs(C[`k',`j']/C[`k',`=`j'+1']) > abs(invnormal(0.1/2))) + ///
		(abs(C[`k',`j']/C[`k',`=`j'+1']) > abs(invnormal(0.05/2))) + ///
		(abs(C[`k',`j']/C[`k',`=`j'+1']) > abs(invnormal(0.01/2)))
	}
}


frmttable using ./tables/Table_5, tex annotate(stars) asymbol(*,**,***) frag statmat(C) substat(1) sdec(4) append ///
rtitle("GSD")
frmttable using ./tables/Table_5, replace

* add expected survival times and quantiles (mentioned in text)

mat li A

mata
A = st_matrix("A")
A = diagonal(A)'
A
1:/(1:-A)
log(1-1/4):/log(A)
log(1-2/4):/log(A)
log(1-94/100):/log(A)
log(1-99/100):/log(A)
end

* total time taken
timer off 1
timer list 1
