#delim;
clear;
set mem 200m;
set more off;
cap log close;

local hhdata "I:/Schooling/hh_data";
global bk_k "`hhdata'/bk_k";
global bk_3a "`hhdata'/bk_3a";
global bk_b3b "I:\Schooling\hh_data\bk_b3b";
global bk_tk "`hhdata'/bk_tk";
global c_data "I:/Schooling\created_data";
global results "I:/Schooling\programs\ols_iv\results";
global progdir "I:/Schooling\programs";
global ifls97 "I:\Schooling\alldata\hh1997"; 
global ifls93 "I:\Schooling\alldata\hh1993"; 


use $bk_3a/b3a_dl1, clear; 
keep pidlink dl07e;
sort pidlink; 
tempfile panelch; 
save `panelch', replace; 

**********EDUC HISTORY FOR PRIMARY SCHOOL**********; 
** Educ history from 1993 wave; 

use $ifls93\buk3dl2, clear;  
rename dl08 dl2type;
keep if dl2type == 2; 
keep pidlink dl2type dl10 dl11 dl13 dl14a dl14b dl14c dl14d dl14e dl14f dl15;
for var dl2type dl10 dl11 dl13 dl14a dl14b dl14c dl14d dl14e dl14f dl15: rename X X_yr93;
sort pidlink; 
tempfile eduhist93;
save `eduhist93', replace;

** add varibles from section B3A_DL3 (repeated grades, type of school,
** score, distance) using 2000 data;

preserve; 
use $bk_3a/b3a_dl3, clear; 
keep if dl3type == 2; 
keep pidlink dl15; 
sort pidlink;
tempfile dl15; 
save `dl15', replace; 
restore; 

use $bk_3a/b3a_dl2, clear; 
keep if dl2type == 2; 
keep pidlink dl15x - dl15a ; sum ;
sort pidlink; 
merge pidlink using `dl15'; tab _merge; drop _merge; 
for var  dl15x - dl15a dl15: rename X X_yr00; 

sort pidlink; 
tempfile eduhist00; 
save `eduhist00', replace; 
merge pidlink using `panelch'; tab _m; 
drop _m dl07e; 

** data from 1997; 
preserve; 
use $ifls97/b3a_dl2, clear; 
keep if dl2type == 2; 
keep pidlink  dl2type- dl15b;
for var dl2type- dl15b: rename X X_yr97;
sort pidlink;
merge pidlink using `panelch'; tab _m; 
gen check =1 if inlist(_m, 3) & dl07e == 1; 
recode check .=2 if _m ==2 & dl07e==1;//are panel missings in 2000 not found in 1997 or never attended school

drop _m dl07e; 
tempfile eduhist97; 
sort pidlink; sum;
save `eduhist97', replace; 
restore; 

sort pidlink;
merge pidlink using `eduhist97'; tab _merge; drop _merge; 
sort pidlink; 
merge pidlink using `eduhist93'; tab _merge; drop _merge; 

** create pre-treatment variables from IFLS1, 2 and 3; 
//type of school attended
gen stype_ls = dl11_yr00; 
replace stype_ls = dl11_yr97 if stype_ls ==.; 
replace stype_ls = dl11_yr93 if stype_ls ==.; 
replace stype_ls = . if stype_ls > 8; 

gen st_punr_ls = (stype_ls ==1); 
gen st_pur_ls = (stype_ls ==2); 
gen st_prnr_ls = (stype_ls==3);
gen st_prisl_ls = (stype_ls==4); 
gen st_prchrist_ls = inlist(stype_ls,5,6,7,8);
for var st_*_ls: replace X = . if stype_ls ==.; 

//everfailed
gen failed = dl13_yr00; 
replace failed = dl13_yr97 if failed ==.; 
replace failed = dl13_yr93 if failed ==.;
replace failed =. if failed >3; 
recode failed 3 = 0; 

//repeated grades
egen repeated_n00 = rsum(dl14a_yr00  dl14b_yr00 dl14c_yr00 dl14d_yr00 dl14e_yr00 dl14f_yr00); 
recode repeated_n00 0=.; 
recode repeated_n00 .=0 if failed ==0; 

egen repeated_n97 = rsum(dl14a_yr97  dl14b_yr97 dl14c_yr97 dl14d_yr97 dl14e_yr97 dl14f_yr97); 
recode repeated_n97 0=.; 
recode repeated_n97 .=0 if failed ==0; 

egen repeated_n93 = rsum(dl14a_yr93  dl14b_yr93 dl14c_yr93 dl14d_yr93 dl14e_yr93 dl14f_yr93); 
recode repeated_n93 0=.; 
recode repeated_n93 .=0 if failed ==0; 

gen repeated_n = repeated_n00; 
replace repeated_n = repeated_n97 if repeated_n == .; 
replace repeated_n = repeated_n93 if repeated_n == .; 
replace repeated_n = 0 if failed == 0 & repeated_n ==.;
replace repeated_n =. if repeated_n > 4; 
recode repeated_n 4 =3; 

gen repeated = 0 if failed == 0; 
replace repeated = 1 if repeated_n >0 & repeated_n !=. & repeated ==.; 

//worked during school 

gen work_sch = dl15_yr00; 
replace work_sch = dl15_yr97 if work_sch ==.; 
replace work_sch = dl15_yr93 if work_sch ==.; 
recode work_sch 6 =. 9 =. 3 =0; 

rename work_sch work_sch_ls; 
rename repeated repeated_ls; 
rename repeated_n repeated_n_ls; 
rename failed failed_ls;

keep pidlink work_sch_ls failed_ls repeated_n_ls repeated_ls st*ls; 

sort pidlink; 
tempfile educhist_ls; 
save `educhist_ls', replace; 
**********END EDUC HISTORY FOR LOWER SECONDARY SCHOOL**********; 

**********REDO EDUC HISTORY FOR ELEMENTARY SCHOOL**********; 

** Educ history from 1993 wave; 

use $ifls93\buk3dl2, clear;  
rename dl08 dl2type;
keep if dl2type == 1; 
keep pidlink dl2type dl10 dl11 dl13 dl14a dl14b dl14c dl14d dl14e dl14f dl15;
for var dl2type dl10 dl11 dl13 dl14a dl14b dl14c dl14d dl14e dl14f dl15: rename X X_yr93;
sort pidlink; 
tempfile eduhist93;
save `eduhist93', replace;

** add varibles from section B3A_DL3 (repeated grades, type of school,
** score, distance) using 2000 data;

preserve; 
use $bk_3a/b3a_dl3, clear; 
keep if dl3type == 1; 
keep pidlink dl15; 
sort pidlink;
tempfile dl15; 
save `dl15', replace; 
restore; 

use $bk_3a/b3a_dl2, clear; 
keep if dl2type == 1; 
keep pidlink dl15x - dl15a ; sum ;
sort pidlink; 
merge pidlink using `dl15'; tab _merge; drop _merge; 
for var  dl15x - dl15a dl15: rename X X_yr00; 

sort pidlink; 
tempfile eduhist00; 
save `eduhist00', replace; 
merge pidlink using `panelch'; tab _m; 
drop _m dl07e; 

** data from 1997; 
preserve; 
use $ifls97/b3a_dl2, clear; 
keep if dl2type == 1; 
keep pidlink  dl2type- dl15b;
for var dl2type- dl15b: rename X X_yr97;
sort pidlink;
merge pidlink using `panelch'; tab _m; 
gen check =1 if inlist(_m, 3) & dl07e == 1; 
recode check .=2 if _m ==2 & dl07e==1;//are panel missings in 2000 not found in 1997 or never attended school

drop _m dl07e; 
tempfile eduhist97; 
sort pidlink; sum;
save `eduhist97', replace; 
restore; 

sort pidlink;
merge pidlink using `eduhist97'; tab _merge; drop _merge; 
sort pidlink; 
merge pidlink using `eduhist93'; tab _merge; drop _merge; 

** create pre-treatment variables from IFLS1, 2 and 3; 
//type of school attended
gen stype = dl11_yr00; 
replace stype = dl11_yr97 if stype ==.; 
replace stype = dl11_yr93 if stype ==.; 
replace stype = . if stype > 8; 

gen st_punr = (stype ==1); 
gen st_pur = (stype ==2); 
gen st_prnr = (stype==3);
gen st_prisl = (stype==4); 
gen st_prchrist = inlist(stype,5,6,7,8);
for var st_*: replace X = . if stype ==.; 

//everfailed
gen failed = dl13_yr00; 
replace failed = dl13_yr97 if failed ==.; 
replace failed = dl13_yr93 if failed ==.;
replace failed =. if failed >3; 
recode failed 3 = 0; 

//repeated grades
egen repeated_n00 = rsum(dl14a_yr00  dl14b_yr00 dl14c_yr00 dl14d_yr00 dl14e_yr00 dl14f_yr00); 
recode repeated_n00 0=.; 
recode repeated_n00 .=0 if failed ==0; 

egen repeated_n97 = rsum(dl14a_yr97  dl14b_yr97 dl14c_yr97 dl14d_yr97 dl14e_yr97 dl14f_yr97); 
recode repeated_n97 0=.; 
recode repeated_n97 .=0 if failed ==0; 

egen repeated_n93 = rsum(dl14a_yr93  dl14b_yr93 dl14c_yr93 dl14d_yr93 dl14e_yr93 dl14f_yr93); 
recode repeated_n93 0=.; 
recode repeated_n93 .=0 if failed ==0; 

gen repeated_n = repeated_n00; 
replace repeated_n = repeated_n97 if repeated_n == .; 
replace repeated_n = repeated_n93 if repeated_n == .; 
replace repeated_n = 0 if failed == 0 & repeated_n ==.;
replace repeated_n =. if repeated_n > 4; 
recode repeated_n 4 =3; 

gen repeated = 0 if failed == 0; 
replace repeated = 1 if repeated_n >0 & repeated_n !=. & repeated ==.; 

//worked during school 

gen work_sch = dl15_yr00; 
replace work_sch = dl15_yr97 if work_sch ==.; 
replace work_sch = dl15_yr93 if work_sch ==.; 
recode work_sch 6 =. 9 =. 3 =0; 

keep pidlink work_sch failed repeated_n repeated st_*; 
sort pidlink; 
merge pidlink using `educhist_ls'; tab _merge; drop _merge; 
sort pidlink; 
save $c_data\eduhist, replace; 
