#delim;
clear;
set mem 200m;
set more off;
cap log close;

******************************************************************************************;
** Creates a dataset that includes variables on household characteristics to check if    *;
** distance to nearest school in 2000 is correlated with these measures                  *;
**                                                                                       *;
**                               							 *;
******************************************************************************************;

local hhdata "I:/Schooling/hh_data";
global bk_k "`hhdata'/bk_k";
global bk_3a "`hhdata'/bk_3a";
global bk_b3b "I:\Schooling\hh_data\bk_b3b";
global bk_tk "`hhdata'/bk_tk";
global c_data "I:/Schooling\created_data";
global results "I:/Schooling\programs\ols_iv\results";
global progdir "I:/Schooling\programs";


// add non resident parents education
use $bk_b3b/b3b_ba0, clear; 
keep ba07* ba08p ba08m pidlink; 
tempfile nonres_pareduc; 
sort pidlink; 
save `nonres_pareduc', replace; 

//add individual data created by kathleen
use $c_data/ifls_individ.dta, clear;
sort pidlink;
merge pidlink using `nonres_pareduc'; tab _merge; drop _merge; 
sort pidlink; 
merge pidlink using $c_data/ifls_labor; tab _merge;
*keep if _m ==3; 
drop _merge;


//add id for linking commune data
preserve;
use `hhdata'/htrack, clear;
keep hhid00 commid00;
sort hhid00;
tempfile commid;
save `commid', replace;
restore;

sort hhid00;
merge hhid00 using `commid'; tab _merge; keep if _m ==3; drop _m;

order commid00 hhid00 pidlink;
sort commid00;
merge commid00 using $c_data/com_fac; tab _merge; //note that no distance is available if moved to non-ifls areas
*keep if  _merge == 3; 
drop _merge;


//add weights 

preserve;
use `hhdata'/ptrack, clear;
keep pidlink pwt00xa;
sort pidlink;
tempfile weights;
save `weights', replace;
restore;

sort pidlink;
merge pidlink using `weights'; tab _merge; keep if _merge == 3;  drop _merge;

save $c_data/ifls00, replace;

** migration variables;

preserve; 
use $bk_3a/b3a_mg1.dta, clear;
rename mg04 birthpl; 
recode birthpl 9 = . 8 =.; 
rename mg04a  samebpl;
recode samebpl 9=. 3=0; 
rename mg19b moved12; 

recode moved12 9=. 3 = 0; 
recode moved12 . = 1 if inrange(mg20c, 1,17); 
recode moved12 . =0 if mg20c ==0; 

label var birthpl "type of birthplace village town city"; 
label var samebpl "at 12 lived same as birth place"; 
label var moved12 "ever moved since 12"; 

keep pidlink birthpl samebpl moved12; 
sort pidlink;
tempfile migration; 
save `migration', replace; 
restore; 

sort pidlink; 
merge pidlink using `migration'; tab _merge; drop _merge; 

** add subjective wellbeing questions; 
sort pidlink; 
merge pidlink using  $bk_3a/b3a_sw; tab _merge; 

** IV;

keep if age_00 >= 25 & age_00 <= 60;

** construct/modify variables;

*generate basic variables;

gen dschool = 0;
replace dschool = 1 if educ00 > 9;
replace dschool = . if educ00 == .;
gen learnhr00 = ln(earnhr00);
rename age_00 age;

** Parental education: is missing for those who are no longer in the household;
** Insert a dummy if not in the household;

codebook feducstat00; // no missing

codebook feduc00 meduc00;
gen feduc = 0 if feducstat00 == 2;  
replace feduc = 1 if inrange(feduc00, 1,6);    //elementary
replace feduc = 2 if inrange(feduc00, 7, 9) ;  //junior high
replace feduc = 3 if inrange(feduc00, 10, 12); //senior high  
replace feduc = 4 if inrange(feduc00, 13, 22); //tertiary
replace feduc = 99 if inlist(feducstat00,3); //non-resident parents

** non-resident parents; 
recode feduc 99 = 0 if ba07ap == 3; 
recode feduc 99 = 1 if inlist(ba08p, 2); 
recode feduc 99 = 2 if inlist(ba08p, 3,4);
recode feduc 99 = 3 if inlist(ba08p, 5,6); 
recode feduc 99 = 4 if inlist(ba08p, 60,61,62,63,13); 
recode feduc 99 = 98 if inlist(ba08p, 11,12,14,70,72,73,74,90);

** maternal education; 

gen meduc = 0 if meducstat00 == 2;  
replace meduc = 1 if inrange(meduc00, 1,6);
replace meduc = 2 if inrange(meduc00, 7, 9);
replace meduc = 3 if inrange(meduc00, 10, 12);
replace meduc = 4 if inrange(meduc00, 13, 22);
replace meduc = 99 if inlist(meducstat00, 3);

recode meduc 99 = 0 if ba07am == 3; 
recode meduc 99 = 1 if inlist(ba08m, 2); 
recode meduc 99 = 2 if inlist(ba08m, 3,4);
recode meduc 99 = 3 if inlist(ba08m, 5,6); 
recode meduc 99 = 4 if inlist(ba08m, 60,61,62,63,13); 
recode meduc 99 = 98 if inlist(ba08m, 11,12,14,70,72,73,74,90);

** modify community variables;

recode lk03 260 = 999 230 = 999 220=999 210=999;

do $progdir\label.do;

* summary stat table ;

/*
tabstat learnhr00 age  female00 feduc meduc if inlist(worktype00, 1, 2), statistics(mean sd count) columns(statistics) format(%9.3f);
tabstat learnhr00 age  female00 feduc meduc rural  vsize_hect popsize  if inlist(worktype00, 4, 5), statistics(mean sd count) columns(statistics) format(%9.3f);
tabstat learnhr00 age  female00 feduc  meduc rural  vsize_hect popsize  if inlist(worktype00, 1, 2) & dschool ==1, statistics(mean sd count) columns(statistics) format(%9.3f); //upper secondary
tabstat learnhr00 age  female00 feduc meduc rural  vsize_hect popsize  if inlist(worktype00, 1, 2) & dschool ==0, statistics(mean sd count) columns(statistics) format(%9.3f); //less than upper secondary
*/

sort pidlink; 
save $c_data/data00, replace; 
