#delimit; 
capture log close; 
clear; 
set mem 600m; 
set more off; 
set matsize 2000;  
graph drop _all;

log using "results_fortables", replace;
use "chaudhuri_guilkey_data.dta", clear;


***************************************************;
*               PRELIMINARY STUFF;
***************************************************;


* Label the variables in the original data;
label variable mother_id "Unique ID for Mother";
label variable child_id "Unique ID for Child";
label variable com_id "Community ID where Mother lives: Match with IFLS";
label variable prov_id "Province ID where Mother lives";
label variable year "Year of Interview";
label variable c_border "Child's birth order";
label variable c_male "Child is Male";
label variable c_age "Child's Age";
label variable c_edu "Child's Year of Schooling";
label variable m_kids "Number of Kids Mother has";
label variable m_edu1 "Mother's highest education middle school";
label variable m_edu2 "Mother's highest education high school";
label variable m_edu3 "Mother's highest education college";
label variable m_read "Mother can read";
label variable m_muslim "Mother is Muslim";
label variable m_hedu1 "Mother's husband's education middle school";
label variable m_hedu2 "Mother's husband's education high school";
label variable m_hedu3 "Mother's husband's education college";
label variable urban "Community is in urban area";
label variable number_elementary "Number of Elementary Schools";
label variable number_junior "Number of Junior Schools";
label variable number_senior "Number of Serior Schools";
label variable med_post "Community has a medicinal post";
label variable del_post "Community has a village policlinic";
label variable midwife "Community has a midwife";
label variable polindes "Community has a health care facility";
label variable number_posyandu "Number of Posyandu in the community";
label variable number_fp_post "Number of Familiy Planning Post in the community";



**************************************************; 
* SAMPLE DEFINITION: RELEVANT PART FOR ANALYSIS ;
**************************************************; 

keep if (c_age > 10) & (c_age < 21);
replace m_kids = 5 if m_kids > 5;
gen n_sib = m_kids -1;
label variable n_sib "Number of sibling the child has";
drop if n_sib < 0;
drop if c_edu ==.;
drop if n_sib ==.;
drop if c_age ==.;
drop if c_male ==.;
drop if m_edu1 ==.;
drop if m_edu2 ==.;
drop if m_edu3 ==.;
drop if m_read ==.;
drop if m_muslim ==.;
drop if m_hedu2 ==.;
drop if m_hedu3 ==.;
drop if urban ==.;
drop if number_elementary ==.;
drop if number_junior ==.;
drop if number_senior ==.;
drop if med_post ==.;
drop if del_post ==.;
drop if midwife ==.;
drop if number_posyandu ==.;

desc;



**************************************************;
*                  KEY VARIABLES; 
**************************************************;

local X_var i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior;
 
local W_var `X_var' number_posyandu  med_post c_edu;

local other_instruments number_posyandu med_post del_post midwife;

* Missing Instruments: Missingness Pattern Characterized;
gen D_1 = (polindes !=.);
gen D_2 = (number_fp_post !=.);



********************************************************;
*   MISSING IV: STATISTICAL ANALYSIS (IPW and AIPW);
********************************************************;

* Estimate Pr(D_1=d|W) and Pr(D_2=d|W);
logit D_1 `W_var';
predict p_1;
logit D_2 `W_var';
predict p_2;

* Genrate IPW weights; 
gen ipw_1 = D_1/p_1;
gen ipw_2 = D_2/p_2;

* Estimate E[polindes|W] and the Influence function representation;
xi: reg polindes `W_var' del_post midwife;
predict poli_hat;
predict poli_resid, r;

gen ipw_poli = ipw_1*polindes;
gen aipw_poli = ipw_1*poli_resid + poli_hat;

* Estimate E[fam_plan_post|W] and the Influence function representation;
xi: reg number_fp_post `W_var' del_post midwife;
predict famplan_hat;
predict famplan_resid,r;

gen ipw_famplan = ipw_2*number_fp_post;
gen aipw_famplan = ipw_2*famplan_resid + famplan_hat;

* Make all the observations usable (replace missing by 0: doesn't matter here);
replace ipw_poli = 0 if ipw_poli == .;
replace aipw_poli = 0 if aipw_poli == .;
replace ipw_famplan = 0 if ipw_famplan ==.;
replace aipw_famplan = 0 if aipw_famplan ==.;

* Instruments corresponding to different subsamples (based on missingness);
local ipw_inst ipw_poli ipw_famplan; 
local aipw_inst aipw_poli aipw_famplan; 


* Proposed IPW and AIPW Estimation: Consistent and Efficient (IPW uses wrong SE here: as if P(D_j=1|W) is known);
xi: ivreg2 c_edu `X_var' (n_sib = `ipw_inst' `other_instruments'), gmm2s robust; 
xi: ivreg2 c_edu `X_var' (n_sib = `aipw_inst' `other_instruments'), gmm2s robust;



******************************************************************************;
*   IV (SMALLER IV SET BUT NONE MISSING) & IV (LARGER IV SET BUT WITH MISSING)  ;
******************************************************************************;

xi: ivreg2 c_edu `X_var' (n_sib = polindes number_fp_post `other_instruments'), gmm2s robust;

xi: ivreg2 c_edu `X_var' (n_sib = `other_instruments'), gmm2s robust;



************************************************************;
*             OLS RESULTS FOR VARIOUS SUBSAMPLES;
************************************************************;

xi: reg c_edu n_sib `X_var'  ;
xi: reg c_edu n_sib `X_var'  if (D_1==0) & (D_2==0);
xi: reg c_edu n_sib `X_var'  if (D_1==0) & (D_2==1);
xi: reg c_edu n_sib `X_var'  if (D_1==1) & (D_2==0);
xi: reg c_edu n_sib `X_var'  if (D_1==1) & (D_2==1);
xi: reg c_edu n_sib `X_var'  if (D_1==0);
xi: reg c_edu n_sib `X_var'  if (D_1==1);
xi: reg c_edu n_sib `X_var'  if (D_2==0);
xi: reg c_edu n_sib `X_var'  if (D_2==1);


****************************************************;
*    SUMMARY STATISTICS FOR VARIOUS SUBSAMPLES;
****************************************************;

sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post;
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_1==0) & (D_2==0);
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_1==0) & (D_2==1);
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_1==1) & (D_2==0);
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_1==1) & (D_2==1);
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_1==0);
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_1==1); 
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_2==0);
sum c_edu n_sib c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior
    number_posyandu med_post del_post midwife polindes number_fp_post if (D_2==1);
	
	
	
	
****************************************************;
*            BOOTSTRAP STANDARD ERRORS              : 
****************************************************;

keep mother_id child_id com_id prov_id year c_border c_male c_age c_edu m_kids m_edu1 m_edu2 m_edu3 m_read m_muslim m_hedu1 m_hedu2 m_hedu3 urban 
     number_elementary number_junior number_senior med_post del_post midwife polindes number_posyandu number_fp_post;
	
	
* Same labels and same sample as before;
label variable mother_id "Unique ID for Mother";
label variable child_id "Unique ID for Child";
label variable com_id "Community ID where Mother lives: Match with IFLS";
label variable prov_id "Province ID where Mother lives";
label variable year "Year of Interview";
label variable c_border "Child's birth order";
label variable c_male "Child is Male";
label variable c_age "Child's Age";
label variable c_edu "Child's Year of Schooling";
label variable m_kids "Number of Kids Mother has";
label variable m_edu1 "Mother's highest education middle school";
label variable m_edu2 "Mother's highest education high school";
label variable m_edu3 "Mother's highest education college";
label variable m_read "Mother can read";
label variable m_muslim "Mother is Muslim";
label variable m_hedu1 "Mother's husband's education middle school";
label variable m_hedu2 "Mother's husband's education high school";
label variable m_hedu3 "Mother's husband's education college";
label variable urban "Community is in urban area";
label variable number_elementary "Number of Elementary Schools";
label variable number_junior "Number of Junior Schools";
label variable number_senior "Number of Serior Schools";
label variable med_post "Community has a medicinal post";
label variable del_post "Community has a village policlinic";
label variable midwife "Community has a midwife";
label variable polindes "Community has a health care facility";
label variable number_posyandu "Number of Posyandu in the community";
label variable number_fp_post "Number of Familiy Planning Post in the community";

keep if (c_age > 10) & (c_age < 21);
replace m_kids = 5 if m_kids > 5;
gen n_sib = m_kids -1;
label variable n_sib "Number of sibling the child has";
drop if n_sib < 0;
drop if c_edu ==.;
drop if n_sib ==.;
drop if c_age ==.;
drop if c_male ==.;
drop if m_edu1 ==.;
drop if m_edu2 ==.;
drop if m_edu3 ==.;
drop if m_read ==.;
drop if m_muslim ==.;
drop if m_hedu2 ==.;
drop if m_hedu3 ==.;
drop if urban ==.;
drop if number_elementary ==.;
drop if number_junior ==.;
drop if number_senior ==.;
drop if med_post ==.;
drop if del_post ==.;
drop if midwife ==.;
drop if number_posyandu ==.;


global X_var i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior;
 
global W_var i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior number_posyandu  med_post c_edu;

global other_instruments number_posyandu med_post del_post midwife;

* Missing Instruments: Missingness Pattern Characterized;
gen D_1 = (polindes !=.);
gen D_2 = (number_fp_post !=.);



********************************************************;
*       MISSING IV: BOOTSTRAP SE (IPW and AIPW);
********************************************************;


program define ipw_aipw;
	{;
	
	preserve;
	
	bsample;
	
	* Estimate Pr(D_1=d|W) and Pr(D_2=d|W);
	logit D_1 i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior number_posyandu  med_post c_edu;
	predict p_1;
	logit D_2 i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior number_posyandu  med_post c_edu;
	predict p_2;

	* Genrate IPW weights; 
	gen ipw_1 = D_1/p_1;
	gen ipw_2 = D_2/p_2;

	
	* Estimate E[polindes|W] and the Influence function representation;
	xi: reg polindes i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior number_posyandu  med_post c_edu del_post midwife;
	predict poli_hat;
	predict poli_resid, r;

	gen ipw_poli = ipw_1*polindes;
	gen aipw_poli = ipw_1*poli_resid + poli_hat;

	* Estimate E[fam_plan_post|W] and the Influence function representation;
	xi: reg number_fp_post i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior number_posyandu  med_post c_edu del_post midwife;
	predict famplan_hat;
	predict famplan_resid,r;

	gen ipw_famplan = ipw_2*number_fp_post;
	gen aipw_famplan = ipw_2*famplan_resid + famplan_hat;

	
	* Make all the observations usable (replace missing by 0: doesn't matter here);
	replace ipw_poli = 0 if ipw_poli == .;
	replace aipw_poli = 0 if aipw_poli == .;
	replace ipw_famplan = 0 if ipw_famplan ==.;
	replace aipw_famplan = 0 if aipw_famplan ==.;

	
	* Proposed IPW and AIPW Estimation: Consistent and Efficient (IPW uses wrong SE here: as if P(D_j=1|W) is known);
	xi: ivreg2 c_edu i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior (n_sib = ipw_poli ipw_famplan number_posyandu med_post del_post midwife), gmm2s robust;
	matrix ipw_coeff = e(b);
	
	xi: ivreg2 c_edu i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior (n_sib = aipw_poli aipw_famplan number_posyandu med_post del_post midwife), gmm2s robust;
    matrix aipw_coeff = e(b);
	
	
	*   IV (SMALLER IV SET BUT NONE MISSING) & IV (LARGER IV SET BUT WITH MISSING)  ;
	xi: ivreg2 c_edu i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior (n_sib = polindes number_fp_post number_posyandu med_post del_post midwife), gmm2s robust;
	matrix cc_coeff = e(b);
	xi: ivreg2 c_edu i.c_age c_male m_edu1 m_edu2 m_edu3 m_hedu2 m_hedu3 m_read m_muslim urban number_elementary number_junior number_senior (n_sib = number_posyandu med_post del_post midwife), gmm2s robust;
	matrix full_coeff = e(b);
	
	
    restore;
	};
		
end;


simulate ipw_coeff[1,1] ipw_coeff[1,2] ipw_coeff[1,3] ipw_coeff[1,4] ipw_coeff[1,5] ipw_coeff[1,6] ipw_coeff[1,7] ipw_coeff[1,8]
         ipw_coeff[1,9] ipw_coeff[1,10] ipw_coeff[1,11] ipw_coeff[1,12] ipw_coeff[1,13] ipw_coeff[1,14] ipw_coeff[1,15] ipw_coeff[1,16]
         ipw_coeff[1,17] ipw_coeff[1,18] ipw_coeff[1,19] ipw_coeff[1,20] ipw_coeff[1,21] ipw_coeff[1,22] ipw_coeff[1,23] 
         aipw_coeff[1,1] aipw_coeff[1,2] aipw_coeff[1,3] aipw_coeff[1,4] aipw_coeff[1,5] aipw_coeff[1,6] aipw_coeff[1,7] aipw_coeff[1,8]
         aipw_coeff[1,9] aipw_coeff[1,10] aipw_coeff[1,11] aipw_coeff[1,12] aipw_coeff[1,13] aipw_coeff[1,14] aipw_coeff[1,15] aipw_coeff[1,16]
         aipw_coeff[1,17] aipw_coeff[1,18] aipw_coeff[1,19] aipw_coeff[1,20] aipw_coeff[1,21] aipw_coeff[1,22] aipw_coeff[1,23]
		 cc_coeff[1,1] cc_coeff[1,2] cc_coeff[1,3] cc_coeff[1,4] cc_coeff[1,5] cc_coeff[1,6] cc_coeff[1,7] cc_coeff[1,8]
         cc_coeff[1,9] cc_coeff[1,10] cc_coeff[1,11] cc_coeff[1,12] cc_coeff[1,13] cc_coeff[1,14] cc_coeff[1,15] cc_coeff[1,16]
         cc_coeff[1,17] cc_coeff[1,18] cc_coeff[1,19] cc_coeff[1,20] cc_coeff[1,21] cc_coeff[1,22] cc_coeff[1,23] 
         full_coeff[1,1] full_coeff[1,2] full_coeff[1,3] full_coeff[1,4] full_coeff[1,5] full_coeff[1,6] full_coeff[1,7] full_coeff[1,8]
         full_coeff[1,9] full_coeff[1,10] full_coeff[1,11] full_coeff[1,12] full_coeff[1,13] full_coeff[1,14] full_coeff[1,15] full_coeff[1,16]
         full_coeff[1,17] full_coeff[1,18] full_coeff[1,19] full_coeff[1,20] full_coeff[1,21] full_coeff[1,22] full_coeff[1,23] 
         , reps(1000) seed(12345): ipw_aipw;

* Bootstrap results: The 4 sd's for each sum command gives the bootstrap standard deviations for the 4 estimators in Table 8 of the figure. Each sum 
* command corresponds to a row (from top to bottom) in Table 8;
 
sum _sim_70 _sim_47 _sim_1 _sim_24;
sum _sim_71 _sim_48 _sim_2 _sim_25;
sum _sim_72 _sim_49 _sim_3 _sim_26;
sum _sim_73 _sim_50 _sim_4 _sim_27;
sum _sim_74 _sim_51 _sim_5 _sim_28;
sum _sim_75 _sim_52 _sim_6 _sim_29;
sum _sim_76 _sim_53 _sim_7 _sim_30;
sum _sim_77 _sim_54 _sim_8 _sim_31;
sum _sim_78 _sim_55 _sim_9 _sim_32;
sum _sim_79 _sim_56 _sim_10 _sim_33;
sum _sim_80 _sim_57 _sim_11 _sim_34;
sum _sim_81 _sim_58 _sim_12 _sim_35;
sum _sim_82 _sim_59 _sim_13 _sim_36;
sum _sim_83 _sim_60 _sim_14 _sim_37;
sum _sim_86 _sim_63 _sim_17 _sim_40;
sum _sim_87 _sim_64 _sim_18 _sim_41;
sum _sim_84 _sim_61 _sim_15 _sim_38;
sum _sim_85 _sim_62 _sim_16 _sim_39;
sum _sim_88 _sim_65 _sim_19 _sim_42;
sum _sim_89 _sim_66 _sim_20 _sim_43;
sum _sim_90 _sim_67 _sim_21 _sim_44;
sum _sim_91 _sim_68 _sim_22 _sim_45;
sum _sim_92 _sim_69 _sim_23 _sim_46;


log close;
