#delim;
clear;
set mem 200m;
set more off;
cap log close;

local hhdata "I:/Schooling/hh_data";
global c_data "I:/Schooling\created_data";
global results "I:/Schooling\programs\ols_iv\results";
global progdir "I:/Schooling\programs\data_work";
global ifls93 "I:\Schooling\alldata\hh1993"; 
global community93 "I:\Schooling\com_data\1993"; 

/*ba_chkid.dta*/

use $community93\bukii01.dta, clear; 
preserve; 
keep if bi2 == 5; 
for var bi6 bi8a bi8b: replace X = . if X > 95; 
collapse (min) bi6 bi8a bi8b, by(commid93); 
tempfile distance; 
sort commid93; 
save `distance', replace; 
restore; 
keep if bi2 == 3; 
for var bi6 bi8a bi8b: replace X = . if X > 95; 
for var bi6 bi8a bi8b: rename  X X_junior; 
collapse (min) bi6 bi8a bi8b, by(commid93); 
sort commid93; 
merge commid93 using `distance'; tab _merge;  drop _merge;
sort commid93; 
save `distance', replace; 

use $ifls93/BUK2PH1.dta, clear;  //wages
sum ph04 ph07r1; 
sort commid93; 
merge commid93 using $community93\bkii_cf.dta; tab _merge; drop _merge; 
rename ph00 ar001a; 
sort case ar001a; 
merge case ar001a using $ifls93/bukkar2.dta; tab _merge;  drop _merge; 
sort commid93;  
merge commid93 using `distance'; drop _merge; 

sum ph04 ph07r1 ar09yr ar07 ar16 ; 

keep if inrange(ar09yr, 20,60); 
keep if ar07  ==1; 
keep if inlist(ph04, 4,5);
drop if ar16 >9; 

replace ph07r1 = . if ph07r1 > 20000000;
gen lwage = log(ph07r1); 


xi: ivreg2 lwage (ar16 = bi6) ar09yr i.cidprop i.cidkab ;

s;



use $c_data/ifls_individ.dta, clear;
sort pidlink;
merge pidlink using `nonres_pareduc00'; tab _merge; drop _merge; 
sort pidlink; 
merge pidlink using `nonres_pareduc97'; tab _merge; drop _merge; 
sort pidlink; 
merge pidlink using `nonres_pareduc93'; tab _merge; drop _merge; 
sort pidlink; 

merge pidlink using $c_data/ifls_labor; tab _merge;
*keep if _m ==3; 
drop _merge;

preserve; //00 id for linking comunity variables
use `hhdata'/htrack, clear;
keep hhid00 commid00;
sort hhid00;
tempfile commid;
save `commid', replace;
restore;

** distance from community SAR; 
sort hhid00;
merge hhid00 using `commid'; tab _merge; 
*keep if _m ==3; 
drop _m;
order commid00 hhid00 pidlink;
sort commid00;
merge commid00 using $c_data/com_fac; tab _merge; 
//THIS EXCLUDES SAR INFORMATION IF HOUSEHOLDS HAVE MOVED TO NON-IFLS AREAS SO THIS SAMPLE SELECTION NEEDS TO BE POINTED OUT, A POSSIBLE IMPUTATION CAN BE DONE. I CAN USE THE COMMID97 TO REPLACE COMMID00 WHEREVER THE HAS MOVED. THUS COMMUNITY INFORMATION FOR THOSE HOUSEHOLDS WILL BE FROM 1997. SIMPLY REPLACE COMMID00 WITH COMMID97 WHEREVER COMMID00 CONTAINS LETTERS. THE MERGE WITH THE COM_FAC

*keep if  _merge == 3; 
drop _merge;

preserve;
use `hhdata'/ptrack, clear;
keep pidlink pwt00xa;
sort pidlink;
tempfile weights;
save `weights', replace;
restore;

sort pidlink;
merge pidlink using `weights'; tab _merge; 
*keep if _merge == 3;  
drop _merge;
sort pidlink; 
merge pidlink using $c_data\eduhist; tab _merge; drop _merge; 

** ROSTER: Religion; 

preserve; 
use $ifls00/bk_ar1.dta, clear; 
keep pidlink ar15; 
sort pidlink; 
tempfile relig; 
save `relig', replace; 
restore; 

sort pidlink; 
merge pidlink using `relig'; tab _merge; drop _m;

** location at 12, evermoved since 12?;
sort pidlink; 
merge pidlink using $c_data\migration.dta; tab _merge; drop _merge; 

//limit sample to salaried working age males 
keep if age_97 >= 25 & age_97 <= 60;
keep if female97 ==0; 

*generate basic variables;

gen dschool = 0;
replace dschool = 1 if educ97 > 9;
replace dschool = . if educ97 == .;
gen learnhr97 = ln(earnhr97);
rename age_97 age;

** Parental education: is missing for those who are no longer in the household;
** Insert a dummy if not in the household;

codebook feducstat00; // no missing

codebook feduc00 meduc00;
gen feduc = 0 if feducstat00 == 2;  
replace feduc = 1 if inrange(feduc00, 1,6);    //elementary
replace feduc = 2 if inrange(feduc00, 7, 9) ;  //junior high
replace feduc = 3 if inrange(feduc00, 10, 12); //senior high  
replace feduc = 4 if inrange(feduc00, 13, 22); //tertiary
*replace feduc = 99 if inlist(feducstat00,3); //non-resident parents

** non-resident parents from 2000; 
recode feduc . = 0 if ba07ap == 3; 
recode feduc . = 1 if inlist(ba08p, 2); 
recode feduc . = 2 if inlist(ba08p, 3,4);
recode feduc . = 3 if inlist(ba08p, 5,6); 
recode feduc . = 4 if inlist(ba08p, 60,61,62,63,13); 
recode feduc . = 98 if inlist(ba08p, 11,12,14,70,72,73,74,90);

//from 1997

recode feduc . = 0 if ba08p_97 == 1; 
recode feduc . = 1 if inlist(ba08p_97, 2); 
recode feduc . = 2 if inlist(ba08p_97, 3,4);
recode feduc . = 3 if inlist(ba08p_97, 5,6); 
recode feduc . = 4 if inlist(ba08p_97, 7,8,9); 
recode feduc . = 98 if inlist(ba08p_97, 10,11,12,13,14,17,70,90);

//from 1993

recode feduc . = 0 if ba08_f_93 == 1; 
recode feduc . = 1 if inlist(ba08_f_93, 2); 
recode feduc . = 2 if inlist(ba08_f_93, 3,4);
recode feduc . = 3 if inlist(ba08_f_93, 5,6); 
recode feduc . = 4 if inlist(ba08_f_93, 7,8,9); 
recode feduc . = 98 if inlist(ba08_f_93, 10);

//from feduc97 in household parents
recode feduc . = 0 if feduc97 == 0;  
recode feduc .= 1 if inrange(feduc97, 1,6);    
recode feduc .= 2 if inrange(feduc97, 7, 9) ;  
recode feduc .= 3 if inrange(feduc97, 10, 12); 
recode feduc .= 4 if inrange(feduc97, 13, 22); 

** maternal education; 

gen meduc = 0 if meducstat00 == 2;  
replace meduc = 1 if inrange(meduc00, 1,6);
replace meduc = 2 if inrange(meduc00, 7, 9);
replace meduc = 3 if inrange(meduc00, 10, 12);
replace meduc = 4 if inrange(meduc00, 13, 22);
*replace meduc = 99 if inlist(meducstat00, 3);

//from 2000
recode meduc . = 0 if ba07am == 3; 
recode meduc . = 1 if inlist(ba08m, 2); 
recode meduc . = 2 if inlist(ba08m, 3,4);
recode meduc . = 3 if inlist(ba08m, 5,6); 
recode meduc . = 4 if inlist(ba08m, 60,61,62,63,13); 
recode meduc . = 98 if inlist(ba08m, 11,12,14,70,72,73,74,90);

//from 1997 
recode meduc . = 0 if ba08m_97 == 1; 
recode meduc . = 1 if inlist(ba08m_97, 2); 
recode meduc . = 2 if inlist(ba08m_97, 3,4);
recode meduc . = 3 if inlist(ba08m_97, 5,6); 
recode meduc . = 4 if inlist(ba08m_97, 7,8,9); 
recode meduc . = 98 if inlist(ba08m_97, 10,11,12,13,14,17,70,90);

//maternal education: from 1993
recode meduc . = 0 if ba08_m_93 == 1; 
recode meduc . = 1 if inlist(ba08_m_93, 2); 
recode meduc . = 2 if inlist(ba08_m_93, 3,4);
recode meduc . = 3 if inlist(ba08_m_93, 5,6); 
recode meduc . = 4 if inlist(ba08_m_93, 7,8,9); 
recode meduc . = 98 if inlist(ba08_m_93, 10);

//from feduc97 in household parents
recode feduc . = 0 if feduc97 == 0;  
recode feduc .= 1 if inrange(feduc97, 1,6);    
recode feduc .= 2 if inrange(feduc97, 7, 9) ;  
recode feduc .= 3 if inrange(feduc97, 10, 12); 
recode feduc .= 4 if inrange(feduc97, 13, 22); 

for var feduc meduc : recode X . = 999; 
//there are still many missing observations in parental education. use meduc97 as well since var was created by Kathleen

gen unsch_f = (feduc ==0); 
gen elem_f = (feduc==1); 
gen jsec_f = (feduc ==2,3,4); 
gen edumiss_f = (feduc == 999); 
for var *_f : replace X =. if feduc ==.; 


gen unsch_m = (meduc ==0); 
gen elem_m = (meduc==1); 
gen jsec_m = (meduc ==2,3,4); 
gen edumiss_m = (meduc == 999); 
for var *_m: replace X = . if meduc ==.;  

** religion; 
*gen r_muslim = (ar15 ==1); 
gen r_protest = (ar15==2); 
gen r_cathol = (ar15 == 3); 
gen r_other = inlist(ar15,4,5,6);
for var r_*: replace X = . if ar15 ==.; 

** modify community variables;

recode lk03 260 = 999 230 = 999 220=999 210=999;

** binary treatment variable for upper secondary; 
drop dschool; 
gen dschool = 0;
replace dschool = 1 if educ97 > 9;
replace dschool = . if educ97 == .;

** school type and other vars; 
gen age2 = age^2; 
gen stype =  st_punr; 
replace stype =  st_pur if stype ==0; 

do $progdir\label.do;

sort pidlink; 
save $c_data/ind_temp, replace; 
