******************************************************
* CREATES TABLE D6 IN 'FUELING CONFLICT' 
* Partialling out bilateral variables during IV prediction stage
*****************************************************


************** preliminaries **************
clear *
global reps = 999 // bootstrap iterations, paper = 999
set maxiter 50 // max NR iterations for one estimation
parallel setclusters 30, force // set to number of cores, alters seeds used
set more off, perm

************** Programs **************
/*
ssc install gtools
gtools, upgrade
ssc install parallel
*/

**************  set working dir **************

*add your working directory
* cd 


** code for generic bootstrap program

* drop current prog in memory
cap program drop threestep_boot

* define 3-step bootstrap and tempvars neeeed
program threestep_boot, eclass 

	* allow changing dep var and adding controls
	syntax , Y(varname numeric) [Z(varlist numeric) bpartial(varlist numeric) initrends]
	tempname aa bb cc ss conv
	local initial_trends `initrends'
	local bil_partial `bpartial'

	* check if parallel execution was aborted
	parallel break
	
	* moved in here in case parallel does  not export this parameter
	set maxiter 50 // max NR iterations for one estimation

	* allow going back to bilateral structure
	preserve
	
	* re-define panel
	gegen newpair = group(newid don_id)
	xtset newpair year
	
	*** predict aid bilaterally
	xtreg netoda_gdp c.gfrac##c.prob_recddon c.frac_ukc##c.prob_recddon `bil_partial' i.year, fe
	predict netoda_frac_ukc, xbu
	
	foreach x of local bil_partial {
		replace netoda_frac_ukc = netoda_frac_ukc - _b[`x']*`x'
	}
	
	* aggregate to country-year panel
	bys newid year: gegen aggnetoda_gdp=total(netoda_gdp), missing  
	by newid year: gegen agghatnetoda_gdp=total(netoda_frac_ukc), missing 
	keep if doncode == doncode[1] // works like collapse
	drop don_id doncode

	* mark the sample to use and drop rest 
	mark touse
	markout touse `y' aggnetoda_gdp agghatnetoda_gdp `z' 
	drop if !touse

	* reset xt to panel bootstrap handle
	xtset newid year
	
	* balance sample to max Ti
	by newid (year): gen Ti = _N  
	qui sum Ti // fine now
	keep if Ti== `r(max)'

	* generate lagged conflict states
	gen lagged_o_2 = (l.`y' == 1)
	gen lagged_o_3 = (l.`y' == 2)
	gen lagged_o_4 = (l.`y' == 3)

	* generate initial year var and initial state dummies
	by newid (year): gegen inityear = min(year)  
	gen initcondtemp = `y' if year==inityear
	by newid(year) : gegen init = max(initcondtemp)
	drop initcondtemp
	qui tab init, gen(init_)  
	* note that we have few initial war observations, thus in some 
	* bootstrap iterations init_4 will be empty (not found)
	drop init_1 init // will always be omitted, second is tempvar

	* generate T-2 year dummies 
	qui sum year
	// first year will always drop out given condition below, second year is base
	forv i = `=r(min)+2'(1)`=r(max)' {
		gen y_`i' =  (year==`i')
	}

	if "`initial_trends'" != "" {
		gen init2_X_year = init_2*year
		gen init3_X_year = init_3*year
		gen init4_X_year = init_4*year	
		local init_first init_2 init_3 init_4 init2_X_year init3_X_year init4_X_year
		local trends_main init2_X_year init3_X_year init4_X_year
	}
	
	
	* generate z_i vector including controls and IV
	local allvars agghatnetoda_gdp `z' // put other control vars here 
	foreach var in `allvars' {
		by newid (year): gegen m_`var' = mean(`var') if year>inityear 
		qui sum year // not first year, gets omitted otherwise
		forv i = `=r(min)+1'(1)`r(max)' {
			by newid (year): gen temp_z_`var'_`i' = `var' if year==`i'
			by newid (year): gegen z_`var'_`i' = max(temp_z_`var'_`i')
			drop  temp_z_`var'_`i'
		}
	}
	
	* panel set and sort again
	*xtset 

	*** first stage, equiv to xtreg, fe
	reg aggnetoda_gdp agghatnetoda_gdp `z' `init_first' m_* y_1977-y_2010 if year>inityear, cluster(newid)
	local NT = e(N) // needed for later to replace "wrong N" from bilateral sample
	local N = e(N_clust)
	local F_IV = (_b[agghatnetoda_gdp]/_se[agghatnetoda_gdp])^2
	predict nu, resid 
	
	* generate averages and nu_i vector
	by newid (year): gegen m_nu = mean(nu) if year>inityear 
	sum year // not first year
	forv t = `=r(min)+1'(1)`r(max)' {
		by newid (year): gen temp_z_nu_`t' = nu if year==`t'
		by newid (year): gegen z_nu_`t' = max(temp_z_nu_`t')
		replace z_nu_`t' = 0 if missing(z_nu_`t') // needed?
		drop  temp_z_nu_`t'
	}

	*** main model
	* constrained version: averages plus first few years separately
	* nb: include controls after nu ...
	xtoprobit `y' aggnetoda_gdp nu ///
		c.aggnetoda_gdp#1.lagged_o_2 c.aggnetoda_gdp#1.lagged_o_3 ///
		c.aggnetoda_gdp#1.lagged_o_4 lagged_o_2-lagged_o_4 init_2-init_4 ///
		`z' `trends_main' m_* z_*_197* y_1977-y_2010  if year>inityear, i(newid) 
	
	* save convergence results, needed for BS rejections	
	scalar `conv' = e(converged)
	
	* save coefficients
	mat `aa' = e(b)
	* count relevant output size
	local n1 : word count `z'
	local n2 : word count `trends_main'

	mat `aa' = `aa'[1,1..`=11+`n1'+`n2''] // save only relevant coef, not CRE stuff

	* post the main coefficients as e-class results
	ereturn post `aa' 
	
	* return e-class recording if iteration converged
	ereturn scalar converged =`conv'
	ereturn scalar NgT = `NT'
	ereturn scalar Ng = `N'	
	ereturn scalar T = `=`NT'/`N''	
	ereturn scalar F_IV = `F_IV'	

	* go back to bilateral structure
	restore
end


** open bilateral data
use ./data/AiC_all_bilateralrobust.dta, clear
xtset recdon_id year

* set the seed only once, parallel will take it from there
set seed 10101

* start timer
timer clear 1
timer on 1

*** Column 1: partial out UNGA agreement

* clear current panel setting
xtset, clear

* parallel does not save results from first run, obtain beforehand
gen newid = rec_id
threestep_boot, y(conflict_pb) z(ln_pop ln_gdp) bpartial(agree2un)
drop newid

local F = e(F_IV)	
local T = e(T)
local Ng = e(Ng)
local NT = e(NgT)

* call bootstrap in parallel and store all results
eststo: parallel bs,  ///
	nowarn reps($reps) reject(e(converged)==0) cluster(rec_id) ///
	idcluster(newid) randtype(current): threestep_boot, ///
		y(conflict_pb) z(ln_pop ln_gdp) bpartial(agree2un)


di r(pll_seeds) // for log file
	
estadd scalar Ng = `=`Ng''
estadd scalar T = `=`T''
estadd scalar NgT = `=`Ng''*`=`T''
estadd scalar F_IV = `=`F''


*** Column 2: partial out bilateral trade

* clear current panel setting
xtset, clear

* parallel does not save results from first run, obtain beforehand
gen newid = rec_id
threestep_boot, y(conflict_pb) z(ln_pop ln_gdp) bpartial(ln_import_don ln_import_rec)
drop newid
	
local F = e(F_IV)	
local T = e(T)
local Ng = e(Ng)
local NT = e(NgT)

* call bootstrap in parallel and store all results
eststo: parallel bs,  ///
	nowarn reps($reps) reject(e(converged)==0) cluster(rec_id) ///
	idcluster(newid) randtype(current): threestep_boot, ///
		y(conflict_pb) z(ln_pop ln_gdp) bpartial(ln_import_don ln_import_rec)

di r(pll_seeds) // for log file

estadd scalar Ng = `=`Ng''
estadd scalar T = `=`T''
estadd scalar NgT = `=`Ng''*`=`T''
estadd scalar F_IV = `=`F''

*** Column 3: partial out bilateral trade and UNGA

* clear current panel setting
xtset, clear

* parallel does not save results from first run, obtain beforehand
gen newid = rec_id
threestep_boot, y(conflict_pb) z(ln_pop ln_gdp) bpartial(agree2un ln_import_don ln_import_rec)
drop newid
	
local F = e(F_IV)	
local T = e(T)
local Ng = e(Ng)
local NT = e(NgT)

* call bootstrap in parallel and store all results
eststo: parallel bs,  ///
	nowarn reps($reps) reject(e(converged)==0) cluster(rec_id) ///
	idcluster(newid) randtype(current): threestep_boot, ///
		y(conflict_pb) z(ln_pop ln_gdp) bpartial(agree2un ln_import_don ln_import_rec)

di r(pll_seeds) // for log file

estadd scalar Ng = `=`Ng''
estadd scalar T = `=`T''
estadd scalar NgT = `=`Ng''*`=`T''
estadd scalar F_IV = `=`F''

* Create output using estimated coefs and bootstrapped SEs
esttab using ./tables/Table_D6.tex, replace  tex star(* 0.10 ** 0.05 *** 0.01) ///
	se  nobaselevels b(4) se(4)  ///
	stats(F_IV NgT T Ng, fmt(a3) labels("First stage F-stat" "Observations" "Years" "Countries")) 
esttab using ./tables/Table_D6.rtf, replace rtf star(* 0.10 ** 0.05 *** 0.01) ///
	se nobaselevels b(4) se(4)  ///
	stats(F_IV NgT T Ng, fmt(a3) labels("First stage F-stat" "Observations" "Years" "Countries")) 

* total time taken
timer off 1
timer list 1
