#delim; 
clear; 
set mem 200m; 
set more off; 
cap log close; 
global F8 "do I:\Schooling\programs\data_work\bk3_iv.do";

******************************************************************************************;
** Uses book III (B3A_DL3) to create dataset with distance travelled (in min) to the     *;  
** school. It then creates an IV based on average distance travelled within each Kec     *; 
**                                                   					           *;		
**											                       *;
******************************************************************************************;

local hhdata "I:/Schooling/hh_data"; 
global bk_k "`hhdata'/bk_k"; 
global bk_3a "`hhdata'/bk_3a"; 
global bk_tk "`hhdata'/bk_tk"; 
global c_data "I:\Schooling\created_data";  
global results "I:\Schooling\programs\ols_iv\results"; 
global progdir "I:\Schooling\programs";  


use $bk_3a/b3a_dl3, clear; 
keep hhid00 pid00 pidlink dl16fd dl16fc dl16fb dl3type dl2name dl16fnmx dl16fadx dl16flcx dl16fax dl16fbx dl16fcx dl16fdx dl16g dl16ix dl16i dl16jx dl16j dl16fcf; 

gen eaid00 = substr(hhid00, 1,3); 

keep if dl16flcx ==1; // drop if doesnt know about location of school
replace dl16j = dl16j*60 if dl16jx ==2; // for consistency convert distance in minutes

tab  dl16fbx, missing; //have codes if ==1,3
drop if inlist(dl16fbx,8, 9); 

tab  dl16fcx, missing; 
drop if inlist(dl16fcx,8, 9); 

tab  dl16fdx, missing; 
drop if inlist(dl16fdx, 8,9); 

preserve;  //by subdistrict, SMP
keep if inlist(dl3, 2); 
bys eaid00: egen dist_smp_kec = mean(dl16j) ; //by kecamatan
keep eaid00 dist_smp_kec;  
sort eaid00; 
duplicates report eaid00; 
collapse  dist_smp_kec, by(eaid00);
tempfile dist_smp; 
drop if dist_smp > 100; 
save `dist_smp', replace; 
restore; 
 
preserve;  //by subdistrict, SMU 
keep if inlist(dl3, 3); 
bys eaid00: egen dist_smu_kec = mean(dl16j) ; //by kecamatan
keep eaid00 dist_smu_kec;  
sort eaid00; 
duplicates report eaid00; 
collapse  dist_smu_kec, by(eaid00);
tempfile dist_smu; 
drop if dist_smu > 300; 
sort eaid00; 
save `dist_smu', replace; 
restore; 

preserve;  //by subdistrict University 
keep if inlist(dl3, 4); 
bys eaid00: egen dist_u_kec = mean(dl16j) ; //by kecamatan
keep eaid00 dist_u_kec;  
sort eaid00; 
duplicates report eaid00; 
collapse  dist_u_kec, by(eaid00);
drop if dist_u > 300; 
tempfile dist_u; 
sort eaid00; 
save `dist_u', replace; 
restore; 


use `dist_smp', clear; 
sort eaid00; 
merge eaid00 using `dist_smu'; tab _merge; drop _merge; 
sort eaid00; 
merge eaid00 using `dist_u'; tab _merge; drop _merge; 

rename dist_smp_kec min2smp;
rename dist_smu_kec min2smu; 

sort eaid00; 

save $c_data\dist_kec_ea, replace; 

** next try averaging in EA; 