#delim;
clear all;
set mem 200m;
set more off;
cap log close;

local hhdata "I:/Schooling/hh_data";
global c_data "C:\Users\wb245036\Dropbox\Schooling\created_data";
global results "C:\users\wb245036\documents\indonesia_see wd drive\schooling\programs\ols_iv\results";
global progdir "C:\users\wb245036\documents\indonesia_see wd drive\Schooling\programs";


cap program drop myiv;
program define myiv;

syntax varlist [if], iv(string) educ(string) specname(string) sheet(string) filename(string);
local yvar `varlist';

ivreg2 `yvar' (`educ' = `iv') `specname' `if', cl(commid00);
est store iv;
tempvar fixsamp;
cap drop ivsamp; 
gen ivsamp = 1 if e(sample);
gen `fixsamp' = 1 if e(sample);
xi: reg `yvar' `educ' `specname' if `fixsamp' ==1, cluster(commid00);
est store ols;

xi: reg `educ' `iv' `specname' if `fixsamp' ==1 , cluster(commid00);
est store fstage;
testparm `iv';
scalar F = r(F);

mat ftest`educ' = (1,F);
*scalar pvalue = r(p);

xi: reg `yvar' `iv' `specname' if `fixsamp' ==1, cluster(commid00);
est store reducedf;

xml_tab ols fstage reducedf iv, rblank(rural "Locality dummies suppressed" S2130)  sheet(`sheet') style(s1)  save("$results/`filename'")   newappend;

end;

cap mat drop _all;

use ind_jaerr2.dta, clear; 
keep if inlist(worktype00, 4,5);  

gen prov_NSUM = (province == 12 & province !=.) ; 
gen prov_WSUM = (province == 13 & province !=.) ; 
gen prov_SSUM = (province == 16 & province !=.) ; 
gen prov_LAMP = (province == 18 & province !=.) ; 
gen prov_JAKA = (province == 31 & province !=.) ; 
*gen prov_WJAV = (province == 32 & province !=.) ; 
gen prov_CJAV = (province == 33 & province !=.) ; 
gen prov_YOGI = (province == 34 & province !=.) ; 
gen prov_EJAV = (province == 35 & province !=.) ; 
gen prov_BALI = (province == 51 & province !=.) ; 
gen prov_WNUSA = (province == 52 & province !=.) ; 
gen prov_SKALI = (province == 63 & province !=.) ; 
gen prov_SSUL = (province == 73 & province !=.) ; 


** IN KILOMETERS;
gen yearwas12 = 2000-age+100000; //FOR RR CONSTRUCT A VAR YEAR WAS INDIVIDUAL WAS 12


** THE COMM FAC VARIABLES ARE km2KD AND SCHOOLAGE;

for num 1/15: ge km2kdX_12 = km2kdX; // if schoolageX != . & schoolageX < yearwas12;

egen iv = rmin(km2kd1_12 km2kd2_12 km2kd3_12 km2kd4_12 km2kd5_12 km2kd6_12 km2kd7_12 km2kd8_12 km2kd9_12 km2kd10_12 km2kd11_12 km2kd12_12 km2kd13_12 km2kd14_12 km2kd15_12);


cap erase "$results/ols_iv.xml";
*replace kmsd =.  if kmsd >10; 
replace hsec_f = 1 if ter_f == 1; 
replace hsec_m = 1 if ter_m == 1;
replace jsec_f =1 if hsec_f==1; 
replace jsec_m =1 if hsec_m==1; 

global X "age age2 r_* elem_f jsec_f  edumiss_f elem_m jsec_m edumiss_m  rur prov_* ";  

** TABLE 2: Ols, IV results, discreet and binary schooling variable; 
myiv learnhr00, iv(iv) educ(educ00) specname($X) sheet(iv1 discreet) filename(ols_iv); 
myiv learnhr00, iv(iv) educ(dschool) specname($X) sheet(iv1 binary) filename(ols_iv);


** TABLE 3: Correlates of dschool and observable characteristics;
xi: dprobit dschool $X , cl(commid00); 
est store dschool; 

xi: dprobit dschool failed $X, cl(commid00); 
est store failed; 

xi: dprobit dschool work_sch $X, cl(commid00); 
est store work; 

xi: dprobit dschool st_pur st_prnr st_prisl st_prchrist $X, cl(commid00); 
est store school_type;

xi: dprobit dschool repeated_n age elem_f jsec_f hsec_f ter_f edumiss_f elem_m jsec_m hsec_m ter_m edumiss_m town_12 city_12 moved rural st_*ls, cl(commid00); 
est store school_repeats;

xml_tab dschool failed work school_type school_repeats , drop(_Iprov* _Ilk*) title("Correlates of observables and participation in secondary schooling")  sheet(dschool corr) style(s1)  save("$results/ols_iv")  newappend;

// TABLE4a,b : correlation between distance measure and pre-treatment(lower secondary school) variables such as worked during school, repeated grades and type of school attended.

** correlates of distance and primary school variables; 
//academic achievement in elementary school

xi: regress failed kmsmp $X, cl(commid00); 
est store failed; 
xi: regress repeated_n kmsmp $X, cl(commid00);
est store repeats; 
xi: regress work_sch kmsmp $X, cl(commid00); 
est store worked_during_school; 
xi: regress kmsmp st_pur st_prnr st_prisl st_prchrist $X, cl(commid00);
est store school_type; 

xml_tab failed repeats worked_during_school school_type, drop(prov* lk*) title(Correlates between distance and elementary school variables )  rblank(rural "Province and subdistrict dummies suppressed" S2130)  sheet(elementary) style(s1)  save("$results/ols_iv")  newappend;



preserve; 
keep if ivsamp == 1; 

gen r_muslim = (ar15 ==1) & ar15 !=.; 

foreach var of varlist learnhr00 educ00 kmsmp kmsd age r_* unsch_f elem_f jsec_f edumiss_f unsch_m elem_m jsec_m edumiss_m vill_12 town_12 city_12 moved rural {; 
 
qui ttest `var', by(dschool); 
mat v = [r(mu_2),r(mu_1),r(se), r(t),r(p)];
mat rownames v = `var';
mat sd = nullmat(sd)\v; 
mat colnames sd = Mean1 Mean0 sediff tstat pvalue; 

}; 

mat list sd, format(%9.2g);
tab dschool; 
xml_tab sd ,  sheet(means) save("$results/ols_iv") newappend; 
restore; 
