******************************************************
* CREATES FIGURE C2 IN 'FUELING CONFLICT' 
* Leave-one-out test: Recipients
******************************************************

* lets try this with gtools, speeds up aggregation
which gtools
di _rc
if _rc==111 {
	ssc install parallel
	ssc install gtools
	gtools, upgrade
}



************** preliminaries **************
clear *
set more off, perm
**************  set working dir **************

*add your working directory
* cd 


* open bilateral data
use ./data/AiC_analysis_28.dta, clear 
sort recdon_id year

* they are donors
drop if rec == "Korea"
drop if rec == "Slovenia"

* take care of pol systems
gen frac_ukc = 0
replace frac_ukc = frac if don == "United States" | don == "Canada" | don == "United Kingdom"
gen gfrac = govfrac
replace gfrac = 0 if don == "United States" | don == "Canada" | don == "United Kingdom"

* main measures
replace netoda_gdp = (netoda/pwt7_gdp_cUS)*100

* do some stuff to anticipate which 125 rec_ids will be relevant in the later sample
bys rec_id year: gegen aggnetoda_gdp=total(netoda_gdp), missing  
mark touse
markout touse conflict_pb aggnetoda_gdp  ln_gdp ln_pop
bys touse rec_id don_id (year): gen Ti = _N  
tab Ti if touse
sum Ti
replace rec_id=. if !touse | Ti< `r(max)'
gegen group = group(rec_id) if !missing(rec_id)
replace rec_id=group

* clean up
drop touse Ti aggnetoda_gdp group

* remove irrelevant stuff
keep recdon_id rec_id reccode don_id doncode year conflict_pb netoda_gdp ln_gdp ln_pop gfrac frac_ukc prob_recddon

xtset recdon_id year
save ./data/jackknife.dta, replace

*** now compute the leave on out estimates in parallel

* start timing
timer clear 
timer on 1	

* get count of IDs
levelsof rec_id, local(allids)
local N: word count `allids'
di `N'

* create temp dir and dump all files in there
mkdir tmp_dump

forv i=1(1)`N' { 
	use ./data/jackknife, clear
	drop if rec_id==`i'
	save ./tmp_dump/`i'_jack

}

* this is the meat to be run on each data set
cap prog drop first_stage
program def first_stage

	* help stopping if things go wrong
	parallel break
	
	*** predict aid bilaterally
	xtreg netoda_gdp c.gfrac##c.prob_recddon c.frac_ukc##c.prob_recddon i.year , fe 
	predict netoda_frac_ukc, xbu
	
	* aggregate to country-year panel
	bys rec_id year: gegen aggnetoda_gdp=total(netoda_gdp), missing  
	by rec_id year: gegen agghatnetoda_gdp=total(netoda_frac_ukc), missing 
	keep if doncode == "USA" // works like collapse
	drop don_id doncode
	
	* obtain correct sample in a jiffy
	drop if rec_id == .
	xtset rec_id year
	
	* generate initial year var 
	by rec_id (year): gegen inityear = min(year)  

	*** first stage, equiv to xtreg, fe
	xtreg aggnetoda_gdp agghatnetoda_gdp ln_gdp ln_pop i.year if year>inityear, fe vce(cluster rec_id) 

	* create results mat	
	mat R =  _b[agghatnetoda_gdp], _se[agghatnetoda_gdp]
	matrix colnames R = "coef" "se" 
	
	* turn into data set for appending
	clear
	svmat R, names(col)
end

* define according to your number of cores
parallel setclusters 8
parallel append, do(first_stage) prog(first_stage) ///
                e("./tmp_dump/%g_jack.dta, 1/`N'")

* clean up
forv i=1(1)`N' { 
	rm ./tmp_dump/`i'_jack.dta
}
rmdir tmp_dump

* time
timer off 1
timer list 1

* get the ID out of the file names
decode  dta_source, gen(rec_id)
split rec_id, p("./tmp_dump/" "_jack.dta")
destring rec_id2, replace
drop rec_id rec_id1
ren rec_id2 rec_id
save ./data/coefresults_rec, replace

* get original data
use ./data/jackknife.dta, clear
collapse (first) reccode , by(rec_id)
*** obtain country codes
merge 1:1 rec_id using ./data/coefresults_rec
keep if _merge==3

*** generate uper and lower 95 CI
gen CI_u = coef + se*abs(invnorm(0.025))
gen CI_l = coef - se*abs(invnorm(0.025))

drop rec_id
encode reccode, gen(rec_id)

* recipients 1 - 31
twoway ///
	(scatter CI_l rec_id if rec_id <= 31 , msymbol(none)) ///
	(scatter CI_u rec_id if rec_id <= 31 , msymbol(none)) ///
	(rcap CI_u CI_l rec_id if rec_id <= 31, lwidth(small) msize(small) color(gs8)) ///
	(scatter coef rec_id if rec_id <= 31 , msymbol(o) color(black)) , ///
	yscale(range(0.8(0.4)1.6)) ylabel(0.8 "0.8" 1 "1" 1.2 "1.2" 1.4 "1.4" 1.6 "1.6") ///
	xlabel(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ///
	21 22 23 24 25 26 27 28 29 30 31, valuelabel alternate labsize(tiny)) ///
	graphregion(color(white)) ytitle(Coefficient and 95% CI) xtitle(Excluded Recipients) ///
	legend(off) name(rec1, replace) ///
	yline(1.232559, lc(red))
	
* recipients 32 - 63	
twoway ///
	(scatter CI_l rec_id if rec_id >= 32 & rec_id <= 63 , msymbol(none)) ///
	(scatter CI_u rec_id if rec_id >= 32 & rec_id <= 63 , msymbol(none)) ///
	(rcap CI_u CI_l rec_id if rec_id >= 32 & rec_id <= 63 , lwidth(small) msize(small) color(gs8)) ///
	(scatter coef rec_id if rec_id >= 32 & rec_id <= 63 , msymbol(o) color(black)) , ///
	yscale(range(0.8(0.4)1.6)) ylabel(0.8 "0.8" 1 "1" 1.2 "1.2" 1.4 "1.4" 1.6 "1.6") ///
	xlabel(32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 ///
	51 52 53 54 55 56 57 58 59 60 61 62 63, valuelabel alternate labsize(tiny)) ///
	graphregion(color(white)) ytitle(Coefficient and 95% CI) xtitle(Excluded Recipients) ///
	legend(off) name(rec2, replace) ///
	yline(1.232559, lc(red)) 
	 
* recipients 64 - 95	
twoway ///
	(scatter CI_l rec_id if rec_id >= 64 & rec_id <= 95 , msymbol(none)) ///
	(scatter CI_u rec_id if rec_id >= 64 & rec_id <= 95 , msymbol(none)) ///
	(rcap CI_u CI_l rec_id if rec_id >= 64 & rec_id <= 95 , lwidth(small) msize(small) color(gs8)) ///
	(scatter coef rec_id if rec_id >= 64 & rec_id <= 95 , msymbol(o) color(black)) , ///
	yscale(range(0.8(0.4)1.6)) ylabel(0.8 "0.8" 1 "1" 1.2 "1.2" 1.4 "1.4" 1.6 "1.6") ///
	xlabel(64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 ///
	81 82 83 84 85 86 87 88 89 90 91 92 93 94 95, valuelabel alternate labsize(tiny)) ///
	graphregion(color(white)) ytitle(Coefficient and 95% CI) xtitle(Excluded Recipients) ///
	legend(off) name(rec3, replace) ///
	yline(1.232559, lc(red)) 
	
* recipients 96 - 125	
twoway ///
	(scatter CI_l rec_id if rec_id >= 96 & rec_id <= 125 , msymbol(none)) ///
	(scatter CI_u rec_id if rec_id >= 96 & rec_id <= 125 , msymbol(none)) ///
	(rcap CI_u CI_l rec_id if rec_id >= 96 & rec_id <= 125 , lwidth(small) msize(small) color(gs8)) ///
	(scatter coef rec_id if rec_id >= 96 & rec_id <= 125 , msymbol(o) color(black)) , ///
	yscale(range(0.8(0.4)1.6)) ylabel(0.8 "0.8" 1 "1" 1.2 "1.2" 1.4 "1.4" 1.6 "1.6") ///
	xlabel(96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 ///
	111 112 113 114 115 116 117 118 119 120 121 122 123 124 125, valuelabel alternate labsize(tiny)) ///
	graphregion(color(white)) ytitle(Coefficient and 95% CI) xtitle(Excluded Recipients) ///
	legend(off) name(rec4, replace) ///
	yline(1.232559, lc(red)) 
	
*** combine graphs and export
graph combine rec1 rec2 rec3 rec4, graphregion(color(white))
graph export ./figures/Figure_C2.pdf, replace
