
#delimit;

/* 

Program created by Liana Jacobi, Department of Economics, University of Melbourne 

Last updated: June 2015

Creates Data for "Bayesian Fuzzy Regression Discontinuity Analysis and Returns to Schooling"
by Siddhartha Chib and Liana Jacobi, accepted by Journal of Applied Econometrics May 2015

Part 1:
- creates data large cohort data set from UK General Household surveys
- defines key earnings variables etc
- code based on do file by Devereux and Hart (Economic Journal paper, 2010)
- parts related to NES data omitted

Part 2:
- defines variables for RDD design such discrete education and forcing variable
- creates short cohort samples based on three and four year window around policy change
- creates auxilliary samples for prior formulation for  3 and 4 year window analyses

Data access is restricted but available from the UK government on the website of http://data.gov.uk/ 
Access is obtained to data via online application for data use at http://data.gov.uk/data-request.
See also http://data.gov.uk/dataset/general_household_survey  for info on survey years available.


*/

clear;


Define
path1 path1\Stata\Devereux\
path2 path1


log using "path1\Stata\Devereux\paper\temp\read_ukghs7998_chibjacobi2015_fuzzyrdd.log", replace;


set mem 550000;
set matsize 400;

********************************************************************************************************;
************************   Part 1   ****************************************************************;
********************************************************************************************************;


********************************************************************************************************;
************************READ IN GHS DATA ****************************************************************;


**************;
** 79 ***;
**************;

use "path1\Data\UKGHS79\UKDA-1831-stata6\stata6\ghs79.por", clear;
keep sex age arriveuk agelfts agelftc employed marital month payweek selfemp whborn year incself workhrs paygross houseno perno hedqual;
su;
rename whborn cob;
rename month dmonth;
rename year hyear;
rename houseno hserno;
rename perno persno;

gen agelfted=agelfts;

keep if age>=16 & age<=65;
gen datyear =79;
gen dobirthy = hyear-age;
gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

replace paygross=paygross*100;

save "path1\Stata\Devereux\paper\temp\ghhs79.dta", replace;

su;


**************;
** 80 ***;
**************;

use "path1\Data\UKGHS80\UKDA-1897-stata8\stata8\ghs80.dta", clear;
keep sex age arriveuk agelfts agelftc employed marital month payweek selfemp whborn year incself workhrs paygross houseno perno hedqual;
su;
rename whborn cob;
rename month dmonth;
rename year hyear;
rename houseno hserno;
rename perno persno;

gen agelfted=agelfts;

keep if age>=16 & age<=65;
gen datyear =80;
gen dobirthy = hyear-age;
gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

replace paygross=paygross*100;

save "path1\Stata\Devereux\paper\temp\ghhs80.dta", replace;

su;


**************;
** 81 ***;
**************;

use "path1\Data\UKGHS81\UKDA-2080-stata8\stata8\ghs81.dta", clear;
keep sex age arriveuk agelfts agelftc employed marital month payweek selfemp whborn year incself workhrs paygross houseno perno hedqual;
su;
rename whborn cob;
rename month dmonth;
rename year hyear;
rename houseno hserno;
rename perno persno;

gen agelfted=agelfts;

keep if age>=16 & age<=65;
gen datyear =81;
gen dobirthy = hyear-age;
gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

replace paygross=paygross*100;

save "path1\Stata\Devereux\paper\temp\ghhs81.dta", replace;


su;


**************;
** 82 ***;
**************;

use "path1\Data\UKGHS82\UKDA-2127-stata6\stata6\ghs82.dta", clear;
keep sex age arriveuk agelfts agelftc employed marital month payweek selfemp whborn year incself workhrs paygross  houseno perno hedqual;
su;
rename whborn cob;
rename month dmonth;
rename year hyear;
rename houseno hserno;
rename perno persno;

gen agelfted=agelfts;

keep if age>=16 & age<=65;
gen datyear =82;
gen dobirthy = hyear-age;
gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

replace paygross=paygross*100;

save "path1\Stata\Devereux\paper\temp\ghhs82.dta", replace;

su;


**************;
** 83 ***;
**************;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\person.dta", clear;
keep  hserno persno sex age marstat cob arruk origin schedtyp; 
sort hserno persno;
save temp11, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\indivdl.dta", clear;
keep hserno persno cne cnemnjb cni edlev selfemp econsta
tea 
uge ugi;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\educatn.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals degree;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\income.dta", clear;
keep hserno persno suppen pygrsusl pyprdusl payusual payperd paynet paygross pynetusl pygrsusl pyprdusl
secjob
grsprft regprft proftout noprofit prftshr prftprd grsbonus netbonus paybonus sjgrspay sjprfgrs sjprd sjprfprd;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\indivdl.dta", clear;
keep hserno persno worklwk suppben;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\househld.dta", clear;
keep day month year hserno region hhld hohspugi region tothhcni hohcni gb;
rename day hdayno;
rename month dmonth;
rename year hyear;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\unemplyd.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;

use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\leisure.dta", clear;
keep hserno persno holiday tv radio records books visit food drink garden;
replace tv = (tv~=.);
replace radio = (radio~=.);
replace records = (records~=.);
replace book = (books~=.);
replace visit = (visit~=.);
replace food = (food~=.);
replace drink = (drin~=.);
replace garden = (garden~=.);
sort hserno persno;
save temp8, replace;

****added by Paul*******;
use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\faminf1.dta", clear;
keep hserno persno yearborn monborn;
sort hserno persno;
save temp9, replace;
use "path1\Data\UKGHS83\UKDA-2099-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;


use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp8; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp9; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno persno using temp11; tabulate _merge; drop _merge; sort hserno persno;
merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

keep hserno persno sex age hyear marstat region cob origin worklwk
agelftsc furthred lastsch agelftft quals degree
genhlth illness limitact 
hohcni tothhcni
edlev tea uge ugi
paygross payperd
holiday tv radio records books visit food drink garden
econsta suppen selfemp yearborn dmonth monborn workhrs pygrsusl pyprdusl
schedtyp secjob payusual
grsprft regprft proftout noprofit prftshr prftprd grsbonus netbonus paybonus gb sjgrspay sjprfgrs sjprd sjprfprd;


keep if age>=16 & age<=65;
gen datyear =83;
gen dobirthy = hyear-age;
gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

save "path1\Stata\Devereux\paper\temp\ghhs83.dta", replace;

su;


**************;
** 84 ***;
**************;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\person.dta", clear;
keep  hserno persno sex age marstat cob arruk origin schedtyp; 
sort hserno persno;
save temp11, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\indivdl.dta", clear;
keep hserno persno cne cnemnjb cni edlev selfemp econsta
tea 
uge ugi;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\educatn.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals degree;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\income.dta", clear;
keep hserno persno suppen pygrsusl pyprdusl payusual payperd paynet paygross pynetusl pygrsusl pyprdusl secjob
grsprft regprft proftout noprofit prftshr prftprd grsbonus netbonus paybonus sjgrspay sjprfgrs sjprd sjprfprd;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\indivdl.dta", clear;
keep hserno persno worklwk supbnrec;
rename supbnrec suppben;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\househld.dta", clear;
keep day month year hserno region hhld hohspugi region tothhcni hohcni gb;
rename day hdayno;
rename month dmonth;
rename year hyear;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\unemplyd.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;

****added by Paul*******;
use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\faminf1.dta", clear;
keep hserno persno yearborn monborn;
sort hserno persno;
save temp9, replace;
use "path1\Data\UKGHS84\UKDA-2154-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp9; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno persno using temp11; tabulate _merge; drop _merge; sort hserno persno;
merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

keep hserno persno sex age hyear marstat region cob origin worklwk
agelftsc furthred lastsch agelftft quals degree
genhlth illness limitact 
hohcni tothhcni
edlev tea uge ugi
paygross payperd pygrsusl pyprdusl
econsta suppen selfemp dmonth monborn yearborn workhrs schedtyp secjob payusual
grsprft regprft proftout noprofit prftshr prftprd grsbonus netbonus paybonus gb sjgrspay sjprfgrs sjprd sjprfprd;


keep if age>=16 & age<=65;
gen datyear =84;
gen dobirthy = hyear-age;
replace dobirthy=84-age if dobirthy==.;

gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

** added by me **;
gen dobyear = yearborn;
gen dobmonth = monborn;


save "path1\Stata\Devereux\paper\temp\ghhs84.dta", replace;



**************;
** 85 ***;
**************;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\person.dta", clear;
keep  hserno persno sex age marstat cob arruk origin; 
sort hserno persno;
save temp11, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\housing1.dta", clear;
keep hserno persno cne cnemnjb cni edlev selfempa econsta
lasted tea 
uge ugi;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\educatn.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals degree;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\income.dta", clear;
keep hserno persno suppen pygrsusl pyprdusl payusual payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\indivdl.dta", clear;
keep hserno persno worklwk empstat selfemp;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\househld.dta", clear;
keep day month year hserno region hhld hohspugi region tothhcni hohcni;
rename day hdayno;
rename month dmonth;
rename year hyear;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\unemplyd.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;
****added by Paul*******;
use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\faminf1.dta", clear;
keep hserno persno yearborn monborn;
sort hserno persno;
save temp9, replace;
use "path1\Data\UKGHS85\UKDA-2349-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;


use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp9; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno persno using temp11; tabulate _merge; drop _merge; sort hserno persno;
merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

keep hserno persno sex age hyear marstat region cob origin worklwk
agelftsc furthred lastsch agelftft quals degree
genhlth illness limitact 
hohcni tothhcni
edlev tea uge ugi
paygross payperd econsta suppen selfemp suppen dmonth monborn yearborn workhrs;

keep if age>=16 & age<=65;
gen datyear =85;
gen dobirthy = hyear-age;
replace dobirthy=85-age if dobirthy==.;

gen dobirthy_paul=hyear-age if dmonth>=7;
replace dobirthy_paul=hyear-age-1 if dmonth<=6;

** added by me **;
gen dobyear = yearborn;
gen dobmonth = monborn;


save "path1\Stata\Devereux\paper\temp\ghhs85.dta", replace;
su;


**************;
** 86 ***;
**************;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\person.dta", clear;
keep  hserno persno sex age marstat cob arruk origin dobirthy dobirthm; 
sort hserno persno;
save temp11, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\housing1.dta", clear;
keep hserno persno cne cnemnjb cni edlev selfempa econsta
lasted tea 
uge ugi;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\educatn.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals degree;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\income.dta", clear;
keep hserno persno suppen pygrsusl pyprdusl payusual payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\indivdl.dta", clear;
keep hserno persno worklwk empstat selfemp;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\househld.dta", clear;
keep dayno month year hserno region hhld hohspugi region tothhcni hohcni;
rename dayno hdayno;
rename month dmonth;
rename year hyear;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\unemplyd.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;

****added by Paul*******;
use "path1\Data\UKGHS86\UKDA-2569-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno persno using temp11; tabulate _merge; drop _merge; sort hserno persno;
merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

keep hserno persno sex age hyear marstat region cob origin worklwk
agelftsc furthred lastsch agelftft quals degree
genhlth illness limitact 
hohcni tothhcni
edlev tea uge ugi
paygross payperd econsta suppen selfemp suppen dobirthy dobirthm dmonth workhrs;

keep if age>=16 & age<=65;
gen datyear =86;

gen dobirthy_paul=dobirthy;
replace dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

/*CHANGE -- note Phil's code replaces already existing dobirthy*/;
replace dobirthy = hyear-age;
replace dobirthy=86-age if dobirthy==.;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;


save "path1\Stata\Devereux\paper\temp\ghhs86.dta", replace;

tab dobirthy;
su;

**************;
** 87 ***;
**************;


use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\person.dta", clear;
keep  hserno persno sex age marstat cob arruk origin
dobirthd dobirthm dobirthy
age1marr cne cnemnjb cni edlev selfempa econsta
lasted longill lsill tea
uge ugi; 
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\educatn.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals degree;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\income.dta", clear;
keep hserno persno suppen pygrsusl pyprdusl payusual payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\indivdl.dta", clear;
keep hserno persno worklwk empstat selfemp unemwtjb;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\househld.dta", clear;
keep hdayno hmonth hyear hserno region hhld hohspugi region tothhcni hohcni;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\unemplyd.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;

****added by Paul*******;
use "path1\Data\UKGHS87\UKDA-2679-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals degree
genhlth illness limitact 
hohcni tothhcni
edlev tea uge  ugi
hohspugi tothhcni hohcni
selfempa econsta suppen selfemp empstat dmonth workhrs;

keep if age>=16 & age<=65;
gen datyear =87;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

gen dobirthy_phil=hyear-age if dobirthy==.;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;


save "path1\Stata\Devereux\paper\temp\ghhs87.dta", replace;


**************;
** 88 - 89 ***;
**************;


use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr cne cnemnjb cni edlev selfempa econsta
lasted longill lsill mainjbcn mainjbug tea 
uge ugi;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\educmast.dta", clear;
*use "ghhs8889\stata\g888918.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\empleinc.dta", clear;
*use "ghhs8889\stata\g888934.dta", clear;
keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\empmast.dta", clear;
*use "ghhs8889\stata\g88898.dta", clear;
keep hserno persno worklwk unemwtjb selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\health.dta", clear;
*use "ghhs8889\stata\g888920.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\househld.dta", clear;
*use "ghhs8889\stata\g88891.dta", clear;
keep hdayno hmonth hyear hserno region hhld hohpugi region tothhcni hohcni;
rename hohpugi hohspugi;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\unemplyd.dta", clear;
*use "ghhs8889\stata\g888930.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;
/*
use "ghhs8889\stata\g888919.dta", clear
keep hserno persno levcode
sort hserno persno
save temp8, replace
*/
****added by Paul*******;
use "path1\Data\UKGHS8889\UKDA-2724-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact 
hohcni tothhcni
claimben edlev tea uge  ugi
hohspugi tothhcni hohcni
selfempa econsta selfemp empstat dmonth workhrs;
gen datyear =88;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

gen dobirthy_phil=hyear-age if dobirthy==.;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;


save "path1\Stata\Devereux\paper\temp\ghhs88.dta", replace;




**************;
** 89 - 90 ***;
**************;


use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr cne cnemnjb cni edlev1 edlev2 edlev7 edlev selfempe ecstaa
lasted longill lsill mainjbcn mainjbug tea 
uge ugi sege;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\educmast.dta", clear;
*use "ghhs8990\stata\ghs8918.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\empleinc.dta", clear;
*use "ghhs8990\stata\ghs8934.dta", clear;
keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\empmast.dta", clear;
*use "ghhs8990\stata\ghs898.dta", clear;
keep hserno persno worklwk unemwtjb selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\health.dta", clear;
*use "ghhs8990\stata\ghs8920.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\househld.dta", clear;
*use "ghhs8990\stata\ghs891.dta", clear;
keep hdayno hmonth hyear hserno region hhld hohpugi region tothhcni hohcni;
rename hohpugi hohspugi;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\unemplyd.dta", clear;
*use "ghhs8990\stata\ghs8930.dta", clear;
keep hserno persno claimben;
sort hserno persno;
save temp7, replace;
/*
use "ghhs8990\stata\ghs8919.dta", clear
keep hserno persno levcode
sort hserno persno
save temp8, replace
*/
****added by Paul*******;
use "path1\Data\UKGHS8990\UKDA-2832-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact 
hohcni tothhcni
claimben edlev2 edlev tea uge  ugi
hohspugi tothhcni hohcni
selfempa empstat econsta dmonth selfemp workhrs;
gen datyear =89;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

gen dobirthy_phil=hyear-age if dobirthy==.;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;


save "path1\Stata\Devereux\paper\temp\ghhs89.dta", replace;



**************;
** 90 - 91 ***;
**************;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr cne cnemnj89 cnemnj90 cnemnjb cni89 cni90 cni dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mainjbcn mainjbug mumlftsc tea uge89 uge90 
uge ugemnj89 ugemnj90 ugi89 ugi90 ugi sege selfempe ecstaa;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\educmast.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\empleinc.dta", clear;
keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\empmast.dta", clear;
keep hserno persno worklwk unemwtjb selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\househld.dta", clear;
keep hdayno hmonth insuphs hyear hserno region hhld hohpgi89 hohpgi90 hohpugi reg17 reg2 thhcni89 thhcni90 thhugi89 thhugi90 tothhcni hohcni;
rename insuphs suppen;
rename hohpugi hohspugi;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\incmast.dta", clear;
keep hserno persno unembn;
sort hserno persno;
save temp7, replace;

use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\sports.dta", clear;
keep  hserno persno walk2ml sport othsport tv radio records books visit garden;
replace tv = (tv==1 & tv~=.);
replace radio = (radio==1 & radio~=.);
replace book = (books==1 & books~=.);
replace visit = (visit==1 & visit~=.);
replace records = (records ==1 & records ~=.);
replace garden = (garden==1 & garden~=.);
sort hserno persno;
save temp8, replace;
****added by Paul*******;
use "path1\Data\UKGHS9091\UKDA-2937-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp8; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact 
hohcni tothhcni
unembn edlev2 edlev tea uge  ugi
walk2ml sport othsport tv radio records books visit garden
hohspugi tothhcni hohcni
selfemp selfempa empstat econsta suppen dmonth workhrs;
gen datyear =90;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

replace hyear=90 if hyear==.;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;


save "path1\Stata\Devereux\paper\temp\ghhs90.dta", replace;

tab dobirthy;
su;


**************;
** 91 - 92 ***;
**************;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr cne cnemnj89 cnemnj90 cnemnjb cni89 cni90 cni dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mainjbcn mainjbug mumlftsc tea91 tea uge89 uge90 
uge ugemnj89 ugemnj90 ugi89 ugi90 ugi sege selfemp selfempe ecstaa;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\educmast.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft quals;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\empleinc.dta", clear;
keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\empmast.dta", clear;
keep hserno persno lookwork worklwk unemwtjb selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\househld.dta", clear;
keep hdayno hmonth insuphs hyear hserno region hhld hohpgi89 hohpgi90 hohpugi reg17 reg2 thhcni89 thhcni90 thhugi89 thhugi90 tothhcni hohcni;
rename insuphs suppen;
rename hohpugi hohspugi;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\incmast.dta", clear;
keep hserno persno unembn;
sort hserno persno;
save temp7, replace;
****added by Paul*******;
use "path1\Data\UKGHS9192\UKDA-2986-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;


use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
lookwork agelftsc furthred lastsch agelftft quals
genhlth illness limitact 
hohcni tothhcni
unembn edlev2 edlev tea uge  ugi
selfemp selfempa empstat econsta suppen dmonth workhrs;
gen datyear =91;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

replace hyear=91 if hyear==.;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;

save "path1\Stata\Devereux\paper\temp\ghhs91.dta", replace;



**************;
** 92 - 93 ***;
**************;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr geind gincind grosspay grprofit takhmpay dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mumlftsc tea 
sege cigsaday cigsmkng cgsmwday cgsmwend selfempe ecstaa;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\educmast.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft qualsb;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\income.dta", clear;
keep  statbnm1 statbnm2 hserno persno pyperiod takehome grossam grossam;
*keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\empmast.dta", clear;
keep hserno persno  worklwk unemwtjb selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\househld.dta", clear;
keep insup92 hdayno hmonth hyear hserno region hhld  ninchhld ninchoh ginchhld ginchoh reg17 reg2;
*hohpgi89 hohpgi90 hohpugi ;
rename insup92 suppen;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\smoking.dta", clear;
keep  hserno persno smokever cignow qtywkend qtywkday cigever;
sort hserno persno;
save temp7, replace;
****added by Paul*******;
use "path1\Data\UKGHS9293\UKDA-3166-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

gen unembn = 1 if statbnm1==4 | statbnm2==4;
replace unembn = 2 if unembn~=1;
rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact
geind gincind ninchhld ninchoh ginchhld ginchoh
smokever cignow qtywkend qtywkday cigeve
edlev2 edlev tea
selfempa selfemp econsta empstat suppen dmonth workhrs;
gen datyear =92;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;

save "path1\Stata\Devereux\paper\temp\ghhs92.dta", replace;


**************;
** 93 - 94 ***;
**************;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr geind gincind grosspay grprofit takhmpay dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mumlftsc tea selfempe ecstaa
sege smkeith smokesta;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\educmast.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft qualsb;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\income.dta", clear;
keep  statbnm1 statbnm2 hserno persno pyperiod takehome grossam grossam;
*keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\empmast.dta", clear;
keep hserno persno  worklwk unemwtjb selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\househld.dta", clear;
keep  insup92 hdayno hmonth hyear hserno region hhld  ninchhld ninchoh ginchhld ginchoh reg17 reg2;
*hohpgi89 hohpgi90 hohpugi ;
rename insup92 suppen;
sort hserno;;
save temp6, replace;

use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\sports.dta", clear;
keep  hserno persno walk2ml sport tv radio records books visit garden;
replace tv = (tv==1 & tv~=.);
replace radio = (radio==1 & radio~=.);
replace book = (books==1 & books~=.);
replace visit = (visit==1 & visit~=.);
replace records = (records ==1 & records ~=.);
replace garden = (garden==1 & garden~=.);
sort hserno persno;
save temp7, replace;
****added by Paul*******;
use "path1\Data\UKGHS9394\UKDA-3170-stata8\stata8\employed.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

gen unembn = 1 if statbnm1==4 | statbnm2==4;
replace unembn = 2 if unembn~=1;
rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact
geind gincind ninchhld ninchoh ginchhld ginchoh
smkeith smokesta
edlev2 edlev tea
walk2ml sport tv radio records books visit garden
selfempa selfemp econsta empstat suppen dmonth workhrs;
gen datyear =93;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;

save "path1\Stata\Devereux\paper\temp\ghhs93.dta", replace;



**************;
** 94 - 95 ***;
**************;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr geind gincind grosspay grprofit takhmpay dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mumlftsc tea selfempe ecstaa
sege smkeith smokesta;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\educmast.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft qualsb;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\income.dta", clear;
keep  statbnm1 statbnm2 hserno persno pyperiod takehome grossam grossam;
*keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\empmast.dta", clear;
keep hserno persno  worklwk1 unemwtj1 selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\househld.dta", clear;
keep insup92 hdayno hmonth hyear hserno region hhld  ninchhld ninchoh ginchhld ginchoh reg17 reg2;
*hohpgi89 hohpgi90 hohpugi ;
rename insup92 suppen;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\smoking.dta", clear;
keep  hserno persno smokever cignow qtywkend qtywkday cigever;
sort hserno persno;
save temp7, replace;
****added by Paul*******;
use "path1\Data\UKGHS9495\UKDA-3538-stata8\stata8\empmast.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

gen unembn = 1 if statbnm1==4 | statbnm2==4;
replace unembn = 2 if unembn~=1;
rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact
geind gincind ninchhld ninchoh ginchhld ginchoh
smkeith smokesta smokever cignow qtywkend qtywkday cigeve
edlev2 edlev tea
selfempa selfemp econsta empstat suppen dmonth workhrs;

gen datyear =94;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;

save "path1\Stata\Devereux\paper\temp\ghhs94.dta", replace;
su;


**************;
** 95 - 96 ***;
**************;

use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\person.dta", clear;
keep  hserno persno sex dobirthd dobirthm dobirthy age marstat cob arruk origin 
age1marr geind gincind grosspay grprofit takhmpay dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mumlftsc tea selfempe ecstaa
sege smkeith smokesta;
rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\educmast.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft qualsb;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\income.dta", clear;
keep  statbnm1 statbnm2 hserno persno pyperiod takehome grossam grossam;
*keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\empmast.dta", clear;
keep hserno persno  worklwk1 unemwtj1 selfemp empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\househld.dta", clear;
keep insup92 hdayno hmonth hyear hserno region hhld  ninchhld ninchoh ginchhld ginchoh reg17 reg2;
*hohpgi89 hohpgi90 hohpugi ;
rename insup92 suppen;
sort hserno;
save temp6, replace;
****added by Paul*******;
use "path1\Data\UKGHS9596\UKDA-3690-stata8\stata8\empmast.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;

use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;


gen unembn = 1 if statbnm1==4 | statbnm2==4;
replace unembn = 2 if unembn~=1;
rename hmonth dmonth;

keep hserno persno sex age hyear marstat region cob origin worklwk dobirthy dobirthm
agelftsc furthred lastsch agelftft quals
genhlth illness limitact
geind gincind ninchhld ninchoh ginchhld ginchoh
edlev2 edlev tea
selfempa selfemp econsta empstat suppen dmonth workhrs;

gen datyear =95;
keep if age>=16 & age<=65;

gen dobirthy_paul = hyear-age if (dobirthy==. | dobirthy<0) & dmonth>=7;
replace dobirthy_paul = hyear-age-1 if (dobirthy==. | dobirthy<0) & dmonth<=6;

** added by me **;
gen dobyear = dobirthy;
gen dobmonth = dobirthm;

save "path1\Stata\Devereux\paper\temp\ghhs95.dta", replace;


**************;
** 96 - 97 ***;
**************;


use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\person.dta", clear;
keep  hserno persno sex dob age marstat cob arruk origin 
age1marr geind gincind grosspay grprofit takhmpay dadlftsc edhusb edlev1 edlev2 edlev7 edlev 
edwife lasted longill lsill mumlftsc tea selfempe ecstaa
segeadg smkeith smokesta hmonth hyear;

rename selfempe selfempa;
rename ecstaa econsta;
sort hserno persno;
save temp1, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\eduction.dta", clear;
keep hserno persno agelftsc furthred lastsch agelftft qualsb;
sort hserno persno;
save temp2, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\income.dta", clear;
keep  statbnm1 statbnm2 hserno persno pyperiod takehome grossam grossam;
*keep hserno persno pygrsusl pyprdusl payusual incchka payperd paynet paygross pynetusl pygrsusl pyprdusl;
sort hserno persno;
save temp3, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\empmast.dta", clear;
keep hserno persno  worklwk1 unemwtj1 stat iempst;
rename stat selfemp;
rename iempst empstat;
sort hserno persno;
save temp4, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\health.dta", clear;
keep hserno persno genhlth illness limitact;
sort hserno persno;
save temp5, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\househld.dta", clear;
keep qtrint insup92 intdate hserno region hhld ninchhld ninchoh ginchhld ginchoh reg17 reg2;
*hohpgi89 hohpgi90 hohpugi;
rename insup92 suppen;
sort hserno;
save temp6, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\smoking.dta", clear;
keep  hserno persno smokever cignow qtywkend qtywkday cigever;
sort hserno persno;
save temp7, replace;

use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\sports.dta", clear;
keep  hserno persno anywalks tv radio records books visit garden;
replace tv = (tv==1 & tv~=.);
replace radio = (radio==1 & radio~=.);
replace book = (books==1 & books~=.);
replace visit = (visit==1 & visit~=.);
replace records = (records ==1 & records ~=.);
replace garden = (garden==1 & garden~=.);
sort hserno persno;
save temp8, replace;
****added by Paul*******;
use "path1\Data\UKGHS9697\UKDA-3804-stata8\stata8\empmast.dta", clear;
keep hserno persno workhrs;
sort hserno persno;
save temp10, replace;


use temp1, clear;
merge hserno persno using temp2; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp3; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp4; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp5; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp7; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp8; tabulate _merge; drop _merge; sort hserno persno;
merge hserno persno using temp10; tabulate _merge; drop _merge; sort hserno persno;

merge hserno using temp6; tabulate _merge; drop _merge; sort hserno persno;

gen unembn = 1 if statbnm1==4 | statbnm2==4;
replace unembn = 2 if unembn~=1;

keep hserno persno sex age intdate marstat region cob origin worklwk dob 
agelftsc furthred lastsch agelftft quals
genhlth illness limitact
geind gincind ninchhld ninchoh ginchhld ginchoh smkeith smokesta smokever cignow qtywkend qtywkday cigeve
edlev2 edlev tea
selfempa selfemp econsta empstat suppen qtrint workhrs intdate hyear hmonth;
su;

gen datyear =96;
replace hyear = 96;
replace hyear = 97 if qtrint == 4;

replace hyear = 96 if hyear==.;

gen dobirthy = hyear-age;

gen dobirthy_paul=hyear-age if qtrint==2 | qtrint==3;
replace dobirthy_paul=hyear-age-1 if qtrint==1 | qtrint==4;

keep if age>=16 & age<=65;


save "path1\Stata\Devereux\paper\temp\ghhs96.dta", replace;

**************;
** 98 - 99 ***;
**************;

use "path1\Data\UKGHS9899\UKDA-4134-stata8\stata8\ghs98hh.dta", clear;
su;

keep  hserial grhhold grihoh grihoh sampmth;
sort hserial;
save temp1, replace;

use "path1\Data\UKGHS9899\UKDA-4134-stata8\stata8\ghs98ind2.dta", clear;

keep marstat hserial age sex persno grearn grind grhhold grihoh ntind groth ntihoh nthhold birth 
startdat sampyear arruk ethnic grind1 ntind1 genhlth illness lmatnum icd1 limitact 
country1 hohseg3 edlev edlev2 tea smokever cignow qtywkend qtywkday cigever 
looked wrking seg agelftsc ednow agelftft furthred lastsch yrmar dobirthy
cob1 qualsb qualsc qualsd edlev1 edlev7 edlev pyperiod takehome grossam grosspay
selfempe stat birth workhrs;

rename selfempe selfempa;
gen selfemp=selfempa;
sort hserial;
save temp2, replace;

merge hserial using temp1; tabulate _merge; drop _merge;

keep hserial persno sex age sampyear marstat wrking dobirthy
agelftsc furthred lastsch agelftft edlev2 country1
genhlth illness limitact
grhhold grihoh grosspay grearn
smokever cignow qtywkend qtywkday cigeve
edlev tea
selfempa selfemp birth cob1 sampmth workhrs grind;

gen datyear =98;

gen hyear=98;

rename sampmth dmonth;
gen dobirthm=month(birth);
gen dobirthyb=year(birth);

** added by me **;
gen dobyear = dobirthyb;
gen dobmonth = dobirthm;

keep if age>=16 & age<=65;

save "path1\Stata\Devereux\paper\temp\ghhs98.dta", replace;
tab dobmonth;


************************************************************;
******MERGING THE FILES*************************************;
************************************************************;

use path2/Stata/Devereux/paper/temp/ghhs83, clear;
append using path2/Stata/Devereux/paper/temp/ghhs84;
append using path2/Stata/Devereux/paper/temp/ghhs85;
append using path2/Stata/Devereux/paper/temp/ghhs86;
append using path2/Stata/Devereux/paper/temp/ghhs87;
append using path2/Stata/Devereux/paper/temp/ghhs88;
append using path2/Stata/Devereux/paper/temp/ghhs89;
append using path2/Stata/Devereux/paper/temp/ghhs90;
append using path2/Stata/Devereux/paper/temp/ghhs91;
append using path2/Stata/Devereux/paper/temp/ghhs92;
append using path2/Stata/Devereux/paper/temp/ghhs93;
append using path2/Stata/Devereux/paper/temp/ghhs94;
append using path2/Stata/Devereux/paper/temp/ghhs95;
append using path2/Stata/Devereux/paper/temp/ghhs96;
append using path2/Stata/Devereux/paper/temp/ghhs98;

append using path2/Stata/Devereux/paper/temp/ghhs79;
append using path2/Stata/Devereux/paper/temp/ghhs80;
append using path2/Stata/Devereux/paper/temp/ghhs81;
append using path2/Stata/Devereux/paper/temp/ghhs82;

replace dobirthy = dobirthy-1900 if dobirthy>1000;

rename dobirthy yob;
rename hyear year;

*1996 has no dmonth but has qtrint;
replace dmonth=5 if qtrint==1;
replace dmonth=8 if qtrint==2;
replace dmonth=11 if qtrint==3;
replace dmonth=2 if qtrint==4;

keep if (yob==. | yob<0 | (yob>=15 & yob<=55));
keep if datyear>=79 & datyear<=98;
keep if dmonth>=1 & dmonth<=12;

*whether British Born;
gen brit=0;replace brit=1 if (cob>=1 & cob<=3) | (cob1==1);
* modified by me to include younger cohorts for graphs;
*keep if age>=28 & age<=64;
keep if age>=19 & age<=64;


*Whether have educational qualification;
gen qualification=0;replace qualification=1 if hedqual>=1 & hedqual<=11;
replace qualification=1 if edlev>=1 & edlev<=12;




****************************************************************************;
************************PAUL'S EARNINGS MEASURE*****************************;
****************************************************************************;

******MEASURES ARE 
*79-82	PAYWEEK
*84-91  	UGE
*92-97	GEIND
*98+     	GREARN
;

replace uge=. if uge<=0;
replace geind=. if geind<=0;
replace payweek=. if payweek<=0;
replace grearn=. if grearn<=0;


****PAYWEEK excludes self employment income so adding it here;
replace incself=. if incself<0;
replace payweek=payweek+incself if incself~=.;
replace payweek=incself if payweek==.;

gen weekearn=uge if datyear>=84 & datyear<=91;
replace weekearn=geind if datyear>=92 & datyear<=97;
replace weekearn=grearn if datyear>=98;
replace weekearn=payweek*100 if datyear>=79 & datyear<=82;

*************CREATING UGE FOR 1983*****************************************;

replace paygross=. if paygross<0;
replace payperd=. if payperd<0;
replace pygrsusl=. if pygrsusl<0;
replace pyprdusl=. if pyprdusl<0;

su year paygross payperd pygrsusl pyprdusl uge;
tab payperd;
tab pyprdusl;

***Employed;
gen pay=pygrsusl if payusual==2;
gen wkpay=pygrsusl if pyprdusl==1 & payusual==2;
replace wkpay=pygrsusl/2 if pyprdusl==2 & payusual==2;
replace wkpay=pygrsusl/3 if pyprdusl==3 & payusual==2;
replace wkpay=pygrsusl/4 if pyprdusl==4 & payusual==2;
replace wkpay=(pygrsusl*12)/52 if pyprdusl==5 & payusual==2;
replace wkpay=pygrsusl/13 if pyprdusl==6 & payusual==2;

replace pay=paygross if payusual==1 | payusual==3;
replace wkpay=paygross if payperd==1 & (payusual==1 | payusual==3);
replace wkpay=paygross/2 if payperd==2 & (payusual==1 | payusual==3);
replace wkpay=paygross/3 if payperd==3 & (payusual==1 | payusual==3);
replace wkpay=paygross/4 if payperd==4 & (payusual==1 | payusual==3);
replace wkpay=(paygross*12)/52 if payperd==5 & (payusual==1 | payusual==3);
replace wkpay=paygross/13 if payperd==6 & (payusual==1 | payusual==3);

***Bonuses;
replace grsbonus=0 if grsbonus<0;
replace netbonus=0 if netbonus<0;
replace wkpay=wkpay+ (grsbonus*100)/52 if paybonus==1 & wkpay>=0 & wkpay~=.;

***self employed;
replace wkpay=(grsprft*100)/52 if selfemp==2 & grsprft>=0;
replace prftshr=0 if prftshr<0;
replace wkpay=proftout +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==1;
replace wkpay=proftout/2 +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==2;
replace wkpay=proftout/3 +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==3;
replace wkpay=proftout/4 +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==4;
replace wkpay=(proftout*12/52) +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==5;
replace wkpay=proftout/13 +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==6;
replace wkpay=proftout/26 +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==7;
replace wkpay=proftout/52 +(prftshr/52) if noprofit==3 & regprft==1 & prftprd==8;

***Second Jobs;
replace sjgrspay=. if sjgrspay<0;
replace sjprfgrs=. if sjprfgrs<0;
gen secpay=sjgrspay if secjob==1 & sjprd==1;
replace secpay=sjgrspay/2 if secjob==1 & sjprd==2;
replace secpay=sjgrspay/3 if secjob==1 & sjprd==3;
replace secpay=sjgrspay/4 if secjob==1 & sjprd==4;
replace secpay=sjgrspay*12/52 if secjob==1 & sjprd==5;
replace secpay=sjgrspay/13 if secjob==1 & sjprd==6;
replace secpay=sjgrspay/26 if secjob==1 & sjprd==7;
replace secpay=sjgrspay/52 if secjob==1 & sjprd==8;

gen secpay2=sjprfgrs/7 if secjob==1 & sjprfprd==1;
replace secpay2=sjprfgrs/20 if secjob==1 & sjprfprd==2;
replace secpay2=sjprfgrs/33 if secjob==1 & sjprfprd==3;
replace secpay2=sjprfgrs/46 if secjob==1 & sjprfprd==4;
replace secpay2=sjprfgrs/52 if secjob==1 & sjprfprd==5;

gen secpayall=max(secpay,secpay2);
su secpay secpay2 secpayall;

replace wkpay=wkpay+secpayall if secpayall~=. & secpayall<100000;

replace weekearn=wkpay if datyear==83;

*****************************************************************************;

replace selfemp=. if selfemp<0;
replace workhrs=. if workhrs<0;

sort year dmonth;

tempfile ghs;
save `ghs', replace;


/*

**********************************************************;
******MERGE WITH RPI DATA*********************************;
**********************************************************;
use d:\hart\education\rpi_monthly;
su;
rename jan rpi1;
rename feb rpi2;
rename mar rpi3;
rename apr rpi4;
rename may rpi5;
rename jun rpi6;
rename jul rpi7;
rename aug rpi8;
rename sep rpi9;
rename oct rpi10;
rename nov rpi11;
rename dec rpi12;
su;
reshape long rpi, i(year) j(dmonth);
keep if year>=1979 & year<=2006;
replace year=year-1900;
replace rpi=(rpi*100)/173.4;***renorm to December 2001 pounds;
sort year dmonth;

merge year dmonth using `ghs';
tab _merge;
keep if _merge==3 | _merge==2;
drop _merge;

gen lnearn=log(weekearn/rpi);replace lnearn=. if weekearn/rpi<1;***set to missing if less than ?1 in December 2001 terms;
gen lnwage=log((weekearn/workhrs)/rpi) if workhrs>=1;replace lnwage=. if (weekearn/workhrs)/rpi<1;***set to missing if less than ?1 in December 2001 terms;
gen wage2001=(weekearn/workhrs)/rpi;
gen earn2001=weekearn/rpi;

replace lnearn=. if wage2001>150 & wage2001~=.;
replace lnearn=. if wage2001<1;
replace lnwage=. if wage2001>150 & wage2001~=.;
replace lnwage=. if wage2001<1;

*/


*** Paul's RPI file not provided *** ;
*** Deflate Paul's Earnings measures with yearly RPI numbers used by Oreopoulos *** ;

*** !! Need to convert earnings measures from cent to Pounds !!!! ****;
replace weekearn = weekearn/100;

* generate deflated weekly earnings and hourly wages;
* make inflation adjusted variables using uk retail price index, with base=1998;

gen rweekearn = weekearn*162.9/67.3698 if datyear==79;
replace rweekearn = weekearn*162.9/75.5889 if datyear==80;
replace rweekearn = weekearn*162.9/82.01 if datyear==81;
replace rweekearn = weekearn*162.9/86.3 if datyear==82;
replace rweekearn = weekearn*162.9/86.27 if datyear==83;
replace rweekearn = weekearn*162.9/90.16 if datyear==84;
replace rweekearn = weekearn*162.9/94.84 if datyear==85;
replace rweekearn = weekearn*162.9/98.26 if datyear==86;
replace rweekearn = weekearn*162.9/101.9 if datyear==87;
replace rweekearn = weekearn*162.9/106.9 if datyear==88;
replace rweekearn = weekearn*162.9/115.2 if datyear==89;
replace rweekearn = weekearn*162.9/126.1 if datyear==90;
replace rweekearn = weekearn*162.9/133.5 if datyear==91;
replace rweekearn = weekearn*162.9/138.5 if datyear==92;
replace rweekearn = weekearn*162.9/140.7 if datyear==93;
replace rweekearn = weekearn*162.9/144.1 if datyear==94;
replace rweekearn = weekearn*162.9/149.1 if datyear==95;
replace rweekearn = weekearn*162.9/152.7 if datyear==96;
replace rweekearn = weekearn*162.9/157.5 if datyear==97;
replace rweekearn = weekearn*162.9/162.9 if datyear==98;

gen wage = (weekearn/workhrs)*162.9/67.3698 if datyear==79;
replace wage = (weekearn/workhrs)*162.9/75.5889 if datyear==80;
replace wage = (weekearn/workhrs)*162.9/82.01 if datyear==81;
replace wage = (weekearn/workhrs)*162.9/86.3 if datyear==82;
replace wage = (weekearn/workhrs)*162.9/86.27 if datyear==83;
replace wage = (weekearn/workhrs)*162.9/90.16 if datyear==84;
replace wage = (weekearn/workhrs)*162.9/94.84 if datyear==85;
replace wage = (weekearn/workhrs)*162.9/98.26 if datyear==86;
replace wage = (weekearn/workhrs)*162.9/101.9 if datyear==87;
replace wage = (weekearn/workhrs)*162.9/106.9 if datyear==88;
replace wage = (weekearn/workhrs)*162.9/115.2 if datyear==89;
replace wage = (weekearn/workhrs)*162.9/126.1 if datyear==90;
replace wage = (weekearn/workhrs)*162.9/133.5 if datyear==91;
replace wage = (weekearn/workhrs)*162.9/138.5 if datyear==92;
replace wage = (weekearn/workhrs)*162.9/140.7 if datyear==93;
replace wage = (weekearn/workhrs)*162.9/144.1 if datyear==94;
replace wage = (weekearn/workhrs)*162.9/149.1 if datyear==95;
replace wage = (weekearn/workhrs)*162.9/152.7 if datyear==96;
replace wage = (weekearn/workhrs)*162.9/157.5 if datyear==97;
replace wage = (weekearn/workhrs)*162.9/162.9 if datyear==98;


** Continue with Paul's Earnings measure creation ** ;

gen lnearn=log(rweekearn);
***set to missing if less than ?1 in December 2001 terms;
replace lnearn=. if rweekearn<1;
replace lnearn=. if (rweekearn/workhrs)>150 & (rweekearn/workhrs)~=.;
replace lnearn=. if (rweekearn/workhrs)<1;


gen lnwage=log(wage) if workhrs>=1;
***set to missing if less than ?1 in December 2001 terms;
replace lnwage=. if wage<1;
replace lnwage=. if workhrs<1;
replace lnwage=. if (rweekearn/workhrs)<1;
replace lnwage=. if (rweekearn/workhrs)>150 & rweekearn ~=.;




*********************PAYE THRESHOLD******************;
gen payet=675 if year==75;
replace payet=735 if year==76;
replace payet=945 if year==77;
replace payet=985 if year==78;
replace payet=1165 if year==79;
replace payet=1375 if year==80;
replace payet=1375 if year==81;
replace payet=1565 if year==82;
replace payet=1785 if year==83;
replace payet=2005 if year==84;
replace payet=2205 if year==85;
replace payet=2335 if year==86;
replace payet=2425 if year==87;
replace payet=2605 if year==88;
replace payet=2785 if year==89;
replace payet=3005 if year==90;
replace payet=3295 if year==91;
replace payet=3445 if year==92;
replace payet=3445 if year==93;
replace payet=3445 if year==94;
replace payet=3525 if year==95;
replace payet=3765 if year==96;
replace payet=4045 if year==97;
replace payet=4195 if year==98;
replace payet=4335 if year==99;
replace payet=4385 if year==100;
replace payet=4535 if year==101;

replace payet=payet/52;**weekly threshold;
gen belowpaye=0;replace belowpaye=1 if weekearn/100<payet;

*****************************************************************************;
*****************END OF (PAUL'S) EARNINGS CREATION************************************;
*****************************************************************************;

*PHIL'S EARNINGS STUFF;
replace uge=. if uge<=0;*this one added to deal with 1983;*not in Phil's program;
replace uge = paygross if paygross>0 & uge==. & datyear<=83;
replace uge = grind if grind>0 & uge==. & datyear>=98;
replace uge = grearn if grearn>0 & uge==. & datyear>=98;
replace uge = grearn if datyear==98;***in Oreopoulos (2006), 1998 data got dropped. In his new programs, he does this;
replace uge=geind if uge==.;
gen earn = (uge/100)*52 if uge>0 & uge<1000000;
gen learn = log(earn);

/*Phil's deflator*/;
* make inflation adjusted variables using uk retail price index, with base=1998;
gen rearn = exp(learn)*162.9/67.3698 if datyear==79;
replace rearn = exp(learn)*162.9/75.5889 if datyear==80;
replace rearn = exp(learn)*162.9/82.01 if datyear==81;
replace rearn = exp(learn)*162.9/86.3 if datyear==82;
replace rearn = exp(learn)*162.9/86.27 if datyear==83;
replace rearn = exp(learn)*162.9/90.16 if datyear==84;
replace rearn = exp(learn)*162.9/94.84 if datyear==85;
replace rearn = exp(learn)*162.9/98.26 if datyear==86;
replace rearn = exp(learn)*162.9/101.9 if datyear==87;
replace rearn = exp(learn)*162.9/106.9 if datyear==88;
replace rearn = exp(learn)*162.9/115.2 if datyear==89;
replace rearn = exp(learn)*162.9/126.1 if datyear==90;
replace rearn = exp(learn)*162.9/133.5 if datyear==91;
replace rearn = exp(learn)*162.9/138.5 if datyear==92;
replace rearn = exp(learn)*162.9/140.7 if datyear==93;
replace rearn = exp(learn)*162.9/144.1 if datyear==94;
replace rearn = exp(learn)*162.9/149.1 if datyear==95;
replace rearn = exp(learn)*162.9/152.7 if datyear==96;
replace rearn = exp(learn)*162.9/157.5 if datyear==97;
replace rearn = exp(learn)*162.9/162.9 if datyear==98;

replace rearn = exp(learn)*162.9/165.4 if datyear==99;
replace rearn = exp(learn)*162.9/170.3 if datyear==100;
replace rearn = exp(learn)*162.9/173.3 if datyear==101;
replace rearn = exp(learn)*162.9/176.2 if datyear==102;
replace rearn = exp(learn)*162.9/181.3 if datyear==103;
replace rearn = exp(learn)*162.9/186.7 if datyear==104;
replace rearn = exp(learn)*162.9/192.0 if datyear==105;
replace rearn = exp(learn)*162.9/198.1 if datyear==106;

gen lrearn = log(rearn); 




*****************************************************************************;
*****************AGE LEFT SCHOOL*********************************************;
*****************************************************************************;

/*Phil's age left school variable*/;
replace agelfted = tea if tea>=10 & tea<=35;
replace agelfted = agelftft if agelftft>=11 & agelftft<=35 & agelfted==.;
replace agelfted = agelftsc if agelftsc>=9 & agelftsc<=26 & agelfted==.;
replace agelfted=. if agelfted<10;

/*Paul's age left school variable*/;
gen age_school=agelftsc if datyear>=83;
replace age_school=agelfts if datyear<=82;
replace age_school=. if age_school<=9 | age_school>=25 | age_school>age;
tab age_school;

gen female=0;replace female=1 if sex==2;

su;
tab datyear;

****************************************************************;
*******************YOB VARIABLES********************************;
****************************************************************;
*Phil's;
gen yob_O=yob;
replace yob_O=dobirthy_phil if datyear>=87 & datyear<=89 & yob==.;
replace yob_O=datyear-age if yob_O==.;
*Paul's;
gen yob_D=yob;
replace yob_D=. if yob_D<0;
replace yob_D=dobirthy_paul if dobirthy_paul~=. & datyear==86;
replace yob_D=dobirthy_paul if datyear<86;
replace yob_D=yearborn if yearborn~=. & datyear>=83 & datyear<=85;
replace yob_D=dobirthyb-1900 if datyear==98;
replace yob_D=dobirthy_paul if datyear>=87 & datyear<=95 & yob_D==.;
replace yob_D=dobirthy_paul if datyear==96;
replace yob_D=datyear-age if yob_D==.;
replace yob=yob_D;drop yob_D;

replace dobirthm=monborn if dobirthm==.;
replace dobirthm=. if dobirthm<1 | dobirthm>12;

****************************************************************;

****************************************************************;
*******************EARNINGS VARIABLES***************************;
****************************************************************;
*Phil's;
gen lnearn_O=learn;
****************************************************************;

****************************************************************;
*******************EDUCATION VARIABLES**************************;
****************************************************************;
*Phil's;
gen school_O=agelfted;
****************************************************************;

****************************************************************;
*******************SAMPLE RESTRICTIONS**************************;
****************************************************************;
*Phil's;
gen drop_O=0;
* modified by me to generate earnings-age graphs for wider age groups;
*replace drop_O=1 if lnearn_O==. | school_O==. | (yob_O<21 | yob_O>51);
replace drop_O=1 if lnearn_O==. | school_O==. | yob_O<21;
gen drop_s_O=0;replace drop_s_O=1 if school_O==.;
gen drop_e_O=0;replace drop_e_O=1 if lnearn_O==.;
gen drop_y_O=0;
*replace drop_y_O=1 if (yob_O<21 | yob_O>51);
replace drop_y_O=1 if yob_O<21;


****************************************************************;


******************************************************************************;
***********THE COMPULSORY SCHOOLING LAW***************************************;
******************************************************************************;
*Phil's;
gen law_O=0 if yob_O<33;
replace law_O=1 if yob_O>=33;
*Paul's;
gen law=0 if yob<33;
replace law=.75 if yob==33;
replace law=1 if yob>33;
****************************************************************;

gen yob2=(yob*yob)/100;
gen yob3=(yob*yob*yob)/1000;
gen yob4=(yob*yob*yob*yob)/10000;

gen age2=(age*age)/100;
gen age3=(age*age*age)/1000;
gen age4=(age*age*age*age)/10000;

gen drop14=0;replace drop14=1 if age_school<=14;replace drop14=. if age_school==.;
gen drop15=0;replace drop15=1 if age_school<=15;replace drop15=. if age_school==.;

/*
keep datyear year age female dmonth yob dobirthm qtrint selfemp age_school payet weekearn lnwage lnearn workhrs drop14 drop15 
law belowpaye brit yob2 yob3 yob4 age2 age3 age4
lnearn_O school_O yob_O drop_O drop_s_O drop_e_O drop_y_O law_O qualification;
*/



***********************************************************************;
*** Define More Schooling Intake Related Variables (added by Liana Jacobi ***;
***********************************************************************;


* Generate cohort defining variables for year when subject turns 14 (without 1900);
gen yearat14 = yob + 14;


* Generate polcity indicator: face new school leaving age of 15 if age 14 in 1947 and later;
gen z = 0;
replace z = 1 if yearat14 >=47;


* Generate categorical school leaving variable (for graph, type analysis) ;
* based on Devereux's schooling variable;

gen leave14 = 0;
replace leave14 = 1 if age_school == 14; 
gen leave15 = 0;
replace leave15 = 1 if age_school == 15; 
gen leaveafter15 = 0;
replace leaveafter15 = 1 if age_school > 15; 
 

* Generate Intake variable x ;
gen x = 0 if leave14 == 1;
replace x = 1 if leave15 == 1;


** Renaming and Redefining Variables for Earnings and Type model;
gen male = 1;
replace male =0 if sex==2;
gen married = 0;
replace married =1 if marstat==1;



*************************************************************************;
*** Define qualification/education related variables (added my by me) ***:
*************************************************************************;

* Define variable concerning any qualification (school certificate, professional certificates etc);
gen noqual = 1 if hedqual == 15 & year<83;
replace noqual = 0 if hedqual < 15 & year<83;
replace noqual = . if (hedqual == 16 | hedqual<0 | hedqual > 16) & year<83;

replace noqual = 1 if quals ==2 & year>82 & year < 92;
replace noqual = 0 if quals ==1 & year>82 & year < 92;
replace noqual = .  if quals<0 & year < 92;

replace noqual = 1 if qualsb ==2 & year > 91;
replace noqual = 0 if qualsb ==1 & year > 91;
replace noqual = .  if qualsb <0 & year > 91;


* Define variable concerning any higher qualification (beyond school qualification - attended technical college etc);

gen highqual = 1 if (hedqual == 3 | hedqual == 4| hedqual == 5) & year<83;
replace highqual = 0 if (hedqual<3 | hedqual > 5) & year<83;
replace highqual = . if (hedqual == 16 | hedqual<0 | hedqual > 16) & year<83;

replace highqual = 1 if (lastsch==3 | lastsch==4 | lastsch==5)  & year>82;
replace highqual = 0 if (lastsch==1 | lastsch==2) & year>82;
replace highqual = . if lastsch < 0 & year>82;


* Define variable regarding University degree (attendance);

gen uni = 1 if (hedqual == 1 | hedqual == 2) & year<83;
replace uni = 0 if hedqual>2 & year<83;
replace uni = . if (hedqual == 16 | hedqual<0 | hedqual > 16) & year<83;

replace uni = 1 if lastsch == 2 & year>82;
replace uni = 0 if (lastsch == 1 | lastsch>2) & year>82;
replace uni = . if lastsch < 0 & year>82;


* Define variable for highest school type attended for year 83 and above;
gen schoolonly = 1 if lastsch==1 & year>82;
replace schoolonly = 0 if lastsch > 1 & year>82;
replace schoolonly = . if lastsch < 0 & year>82;
replace schoolonly = . if year<83;

* Define variable for school certificate or 0level or higher for up to 1982 survey;
gen Olevelhighest = 1 if (hedqual == 7 | hedqual == 8| hedqual == 9) & year<83;
replace Olevelhighest = 0 if (hedqual<7 | hedqual > 9) & year<83;
replace Olevelhighest = . if (hedqual == 16 | hedqual<0 | hedqual > 16) & year<83;

gen Alevelhighest = 1 if (hedqual == 6 ) & year<83;
replace Alevelhighest = 0 if (hedqual<6 | hedqual > 6) & year<83;
replace Alevelhighest = . if (hedqual == 16 | hedqual<0 | hedqual > 16) & year<83;

gen apprenthighest = 1 if (hedqual == 12 ) & year<83;
replace apprenthighest = 0 if (hedqual<12 | hedqual > 12) & year<83;
replace apprenthighest = . if (hedqual == 16 | hedqual<0 | hedqual > 16) & year<83;

save "path1\Stata\Devereux\paper\revise\ghhs7998_Devereux.dta", replace;

su;

********************************************************************************************************;
************************   Part 2   ****************************************************************;
********************************************************************************************************;


***********************************************************************************************************************
*********************************************************************************************************************** 
** 3 Year MALE SAMPLE : all males turning age 14 between October 1945 and September 1948, from 1986 to 1995 surveys 
**                    + all males age 14 in 1946 or age 14 in 1948 in 1979 to 1985 surveys and 1996/96 surveys     

** 3 year window around policy change with new forcing variable
** final sample now based only on subset of survey years with information on birth date (1986 to 1995 and 1998)              
*****************************************************************************************************************
***********************************************************************************************************************


use "path1\Stata\Devereux\paper\revisionJAE\ghhs7998_Devereux.dta" if brit==1, clear

* Create male sample with subjects leaving school before age 16 from 
* born between October 1931 and September 1934 
* 1.5 year window of subjects turning 14 on either side of the policy change in Aptril 1947

* 1986 to 1995 and 1998 surveys: use exact birth year and month information
replace dobyear = dobyear - 1900 if year == 98
sum dobyear dobmonth
drop if dobyear < 0 & (year > 85 & year ~= 96)
drop if dobmonth < 0 & (year > 85 & year ~= 96)
drop if dobmonth > 12 & (year > 85 & year ~= 96)
drop if dobyear < 31 & (year > 85 & year ~= 96)
drop if dobyear == 31 & dobmonth < 10 & (year > 85 & year ~= 96)
drop if dobyear > 34 & (year > 85 & year ~= 96)
drop if dobyear == 34 & dobmonth > 9 & (year > 85 & year ~= 96)


** turn 14 in 1946 or 1948 in 1979 to 1985 survey
** use reported age [age] and survey month [dmonth]
** lower bound of birth interval from April 1931 up to March 1934 

drop if (year-age-1) < 31 & (year < 86 | year == 96)
drop if (year-age-1) == 31 & dmonth < 4 & (year < 86 | year == 96)
drop if (year-age-1) > 34 & (year < 86 | year == 96)
drop if (year-age-1) == 34 & dmonth > 3 & (year < 86 | year == 96)

** exclude subjects for whom interval includes "policy change"
drop if (year-age-1) == 33 & dmonth < 4 & (year < 86 | year == 96)
drop if (year-age-1) == 32 & dmonth > 3 & (year < 86 | year == 96)

/*   

*** Auxilliary Sample for prior elicitation  ****

* Create male sample with subjects leaving school before age 16 for prior elicitation
* 1.5 year period around 3 year window
* based on born between April 1930 and September 1931 and born between April 1934 and September 1935

* 1986 to 1995 and 1998 surveys: use exact birth year and month information
replace dobyear = dobyear - 1900 if year == 98
sum dobyear dobmonth
drop if dobyear < 0 & (year > 85 & year ~= 96)
drop if dobmonth < 0 & (year > 85 & year ~= 96)
drop if dobmonth > 12 & (year > 85 & year ~= 96)
drop if dobyear < 30 & (year > 85 & year ~= 96)
drop if dobyear == 30 & dobmonth < 4 & (year > 85 & year ~= 96)
drop if dobyear == 31 & dobmonth > 9 & (year > 85 & year ~= 96)
drop if dobyear == 32 & (year > 85 & year ~= 96)
drop if dobyear == 33 & (year > 85 & year ~= 96)
drop if dobyear == 34 & dobmonth < 10 & (year > 85 & year ~= 96)
drop if dobyear == 36 & dobmonth > 3 & (year > 85 & year ~= 96)
drop if dobyear > 36  & (year > 85 & year ~= 96)

*/

drop if lnearn == .
des, short

* Generate forcing variable zstar (z in paper) for subjects from 1986-1995 surveys for updated rdd type model
** continuous forcing variable where zstar = T - tau
** distance measured in quarters between when a student turned 14 and when the new policy was introduce

** first create distance measure in terms of months
** main forcing variable used in revised version with tau = 0
gen zstarm = (dobyear-33)*12 + (dobmonth-4) if (year > 85 & year ~= 96)

* Define Policy Variable based on birth year and month and Schooling Intake
* in paper variable is defined as z>tau and z refers to the forcing variable
replace z = 0
replace z = 1 if dobyear == 34 & (year > 85 & year ~= 96)
replace z = 1 if dobyear == 33 & dobmonth > 3 &  (year > 85 & year ~= 96) 
replace z = 1 if (year-age-1) == 34 & (year < 86 | year == 96)
replace z = 1 if (year-age-1) == 33 & dmonth > 3 & (year < 86 | year == 96)

* Schooling variable
replace x = 0 if age_school == 14
replace x = 1 if age_school == 15
replace x = . if age_school == . | age_school < 14 | age_school >15


*** drop subjects that leave school beyond age 15 ***
drop if leaveafter15 == 1


*** use informtion reported in variable on age left full time educationl to double check (added July 2010) ***

*replace x = 1 if agelftft == 15 & x==0 
*drop if x == 0 & agelftft > 14 
*drop if x == 1 & agelftft > 15 


*** drop subjects 60 and above to avoid retirment issues ***
drop if age > 59


*** Sample Summary Tables *******

su lnwage lnearn x z yob age brit workhrs datyear if male == 1

*Summary Statistics of Male Sample Age 44 - 59
sum x z age married  datyear lnearn weekearn lnwage wage workhrs if male == 1 & x !=. & lnearn!= .  

* Distribution of Male Sample Age 44 - 59
tab z x if male==1 & x != .  & lnearn!= . 


** Information on qualification/information in the sample by intake and policy

su noqual highqual uni schoolonly Olevelhighest Alevelhighest apprenthighest if male == 1 & x !=. & lnearn!= .

tab noqual x if male == 1 & x !=. & lnearn!= . , column

tab highqual x if male == 1 & x !=. & lnearn!= . , column

tab uni x if male == 1 & x !=. & lnearn!= . , column

tab schoolonly x if male == 1 & x !=. & lnearn!= . , column

tab apprenthighest x if male == 1 & x !=. & lnearn!= . , column

tab Olevelhighest x if male == 1 & x !=. & lnearn!= . , column



tab noqual z if male == 1 & x !=. & lnearn!= . , column

tab highqual z if male == 1 & x !=. & lnearn!= . , column

tab uni z if male == 1 & x !=. & lnearn!= . , column

tab schoolonly z if male == 1 & x !=. & lnearn!= . , column

tab apprenthighest z if male == 1 & x !=. & lnearn!= . , column

tab Olevelhighest z if male == 1 & x !=. & lnearn!= . , column




drop if male == 0 

* Sample of Males between 44 and 59 years
save "path1\Stata\Devereux\paper\revise\ghhs7995_9698_male_3year_4648.dta", replace


** Sample Statistics and graphical summaries and Further Sample Restrictions

tab2 age x


save "path1\Stata\Devereux\paper\revise\ghhs7995_9698_male_3year_4648_age4459.dta", replace


** summaries of log weekly earnings by age and intake

tabulate age, summarize(lnearn)

tabstat lnearn if x==0 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)

tabstat lnearn if x==1 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)


** summaries of log hourly wages by age and intake

tabulate age, summarize(lnwage)

tabstat lnwage if x==0 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)

tabstat lnwage if x==1 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)


** Log Weekly Earnings

#delimit ;
graph twoway (scatter lnearn age, sort msize(large)) 
,scheme(s2mono) xlabel( 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61) ylabel(5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9) 
legend(cols(1)) legend(ring(0) pos(5));
#delimit cr
*graph export "path1\mytexfiles\UKGHS7995_9698_4648_age4561_lnearnAge.eps", as(eps) preview(on) replace

#delimit ;
graph twoway (scatter lnwage age, sort msize(large)) 
,scheme(s2mono) xlabel( 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61) ylabel(1.4 1.6 1.8 2 2.2) 
legend(cols(1)) legend(ring(0) pos(5));
#delimit cr
*graph export "path1\mytexfiles\UKGHS7995_9698_4648_age4561_lnwageAge.eps", as(eps) preview(on) replace



****************************************
*** Data Files for Gauss Estimation ****
****************************************



*** Smaller 3year Male Sample with data on zstar *** 
*** Needed for updated model for JAE Revision of paper

******* Sample of 44 to 59 year olds ******

use "path1\Stata\Devereux\paper\revise\ghhs7995_9698_male_3year_4648_age4459.dta", clear

** Sample Restrictions : omit observations with invalid information on key variables

drop if x ==.
drop if lnearn ==.
drop if z ==.
drop if workhrs ==.

drop if zstarm ==.

tab z x if  age < 60 & workhrs<84 

sum x z age married year lnearn lnwage workhrs if age < 60 & workhrs<84 

#delimit ;
outfile lnearn x z age male married 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_3year_age4459_yxzw.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

#delimit ;
outfile year 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_3year_age4459_year.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

#delimit ;
outfile lnwage 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_3year_age4459_lnwage.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

#delimit ;
outfile zstarm 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_3year_age4459_zstarm.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr



***********************************************************************************************************************
*********************************************************************************************************************** 
**4 year MALE SAMPLE : all males turning age 14 between April 1945 and March 1949, from 1986 to 1995 surveys 
**                    + all males age 14 in 1946 or age 14 in 1948 in 1979 to 1985 surveys and 1996/96 surveys     

** 4 year window around policy change with new forcing variable
** final sample now based only on subset of survey years with information on birth date (1986 to 1995 and 1998)              
*****************************************************************************************************************
***********************************************************************************************************************

use "path1\Stata\Devereux\paper\revisionJAE\ghhs7998_Devereux.dta" if brit==1, clear

* Create male sample with subjects leaving school before age 16 from 
* born between October 1931 and September 1934 
* 1.5 year window of subjects turning 14 on either side of the policy change in Aptril 1947
* (born between October 31 and September 34)
** EXTEND to 2 year window of subjects turning 14 on either side of the policy change in Aptril 1947
* (born between April 31 and March 35)

* 1986 to 1995 and 1998 surveys: use exact birth year and month information
replace dobyear = dobyear - 1900 if year == 98
sum dobyear dobmonth
drop if dobyear < 0 & (year > 85 & year ~= 96)
drop if dobmonth < 0 & (year > 85 & year ~= 96)
drop if dobmonth > 12 & (year > 85 & year ~= 96)
drop if dobyear < 31 & (year > 85 & year ~= 96)
drop if dobyear == 31 & dobmonth < 4 & (year > 85 & year ~= 96)
drop if dobyear > 35 & (year > 85 & year ~= 96)
drop if dobyear == 35 & dobmonth > 3 & (year > 85 & year ~= 96)


** turn 14 in 1946 or 1948 in 1979 to 1985 survey
** use reported age [age] and survey month [dmonth]
** LOWER BOUND of birth interval from October 1930 up to September 1934 

drop if (year-age-1) < 30 & (year < 86 | year == 96)
drop if (year-age-1) == 30 & dmonth < 10 & (year < 86 | year == 96)
drop if (year-age-1) == 34 & dmonth > 9 & (year < 86 | year == 96)
drop if (year-age-1) > 34 & (year < 86 | year == 96)

** exclude subjects for whom interval includes "policy change"
drop if (year-age-1) == 33 & dmonth < 4 & (year < 86 | year == 96)
drop if (year-age-1) == 32 & dmonth > 3 & (year < 86 | year == 96)

drop if lnearn == .
des, short

/*  

******** Training Sample for Prior Elicitation *********************

* Create male sample with subjects leaving school before age 16 for prior elicitation
* 1.5 year period around 4 year window sample
* born between October 1929 and March 1931 and born between April 1935 and September 1936
* (old: born between April 1930 and September 1931 and born between April 1934 and September 1935)


* 1986 to 1995 and 1998 surveys: use exact birth year and month information
drop if dobyear < 0 & (year > 85 & year ~= 96)
drop if dobmonth < 0 & (year > 85 & year ~= 96)
drop if dobmonth > 12 & (year > 85 & year ~= 96)
drop if dobyear < 29
drop if dobyear == 29 & dobmonth < 10 & (year > 85 & year ~= 96)
drop if dobyear == 31 & dobmonth > 3 & (year > 85 & year ~= 96)
drop if dobyear == 32 & (year > 85 & year ~= 96)
drop if dobyear == 33 & (year > 85 & year ~= 96)
drop if dobyear == 34 & (year > 85 & year ~= 96)
drop if dobyear == 35 & dobmonth < 4 & (year > 85 & year ~= 96)
drop if dobyear == 36 & dobmonth > 9 & (year > 85 & year ~= 96)
drop if dobyear > 36  & (year > 85 & year ~= 96)


*/



* Generate forcing variable zstar (z in paper) variable for subjects from 1986-1995 surveys for updated rdd type model
** continuous forcing variable where zstar = T - tau, and z=I[z*>0] 
** distance measured in quarters between when a student turned 14 and when the new policy was introduce

** first create distance measure in terms of months relative to April 1933
** main forcing variable used in revised version with tau = 0
gen zstarm = (dobyear-33)*12 + (dobmonth-4) if (year > 85 & year ~= 96)

* Define Policy Variable based on birth year and month and Schooling Intake
* in paper variable is defined as z>tau and z refers to the forcing variable
replace z = 0
replace z = 1 if dobyear == 35 & (year > 85 & year ~= 96)
replace z = 1 if dobyear == 34 & (year > 85 & year ~= 96)
replace z = 1 if dobyear == 33 & dobmonth > 3 &  (year > 85 & year ~= 96) 
replace z = 1 if (year-age-1) == 34 & (year < 86 | year == 96)
replace z = 1 if (year-age-1) == 33 & dmonth > 3 & (year < 86 | year == 96)

* Schooling Indicator
replace x = 0 if age_school == 14
replace x = 1 if age_school == 15
replace x = . if age_school == . | age_school < 14 | age_school >15


*** drop subjects that leave school beyond age 15 ***
drop if leaveafter15 == 1


*** drop subjects 60 and above to avoid retirment issues ***
drop if age > 59


*** Sample Summary Tables *******

su lnwage lnearn x z yob age brit workhrs datyear if male == 1

*Summary Statistics of Male Sample Age 44 - 59
su x z age married  datyear lnearn weekearn wage workhrs if male == 1 & x !=. & lnearn!= .  

* Distribution of Male Sample Age 44 - 59
tab z x if male==1 & x != .  & lnearn!= . 


** Information on qualification/information in the sample by intake and policy

su noqual highqual uni schoolonly Olevelhighest Alevelhighest apprenthighest if male == 1 & x !=. & lnearn!= .

tab noqual x if male == 1 & x !=. & lnearn!= . , column

tab highqual x if male == 1 & x !=. & lnearn!= . , column

tab uni x if male == 1 & x !=. & lnearn!= . , column

tab schoolonly x if male == 1 & x !=. & lnearn!= . , column

tab apprenthighest x if male == 1 & x !=. & lnearn!= . , column

tab Olevelhighest x if male == 1 & x !=. & lnearn!= . , column



tab noqual z if male == 1 & x !=. & lnearn!= . , column

tab highqual z if male == 1 & x !=. & lnearn!= . , column

tab uni z if male == 1 & x !=. & lnearn!= . , column

tab schoolonly z if male == 1 & x !=. & lnearn!= . , column

tab apprenthighest z if male == 1 & x !=. & lnearn!= . , column

tab Olevelhighest z if male == 1 & x !=. & lnearn!= . , column


save "path1\Stata\Devereux\paper\revisionJAE\ghhs7995_9698_extApr45Mar49_age4459.dta", replace

drop if male == 0 

* Sample of Males between 44 and 59 years

** Sample Statistics and graphical summaries and Further Sample Restrictions

tab2 age x

save "path1\Stata\Devereux\paper\revisionJAE\ghhs7995_9698_male_extApr45Mar49_age4459.dta", replace


** summaries of log weekly earnings by age and intake

tabulate age, summarize(lnearn)

tabstat lnearn if x==0 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)

tabstat lnearn if x==1 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)


** summaries of log hourly wages by age and intake

tabulate age, summarize(lnwage)

tabstat lnwage if x==0 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)

tabstat lnwage if x==1 & workhrs<84 & workhrs ~=., by (age) stats(mean sd min max count)



***** Earnings Graphs *****

use "path1\Stata\Devereux\paper\revisionJAE\ghhs7995_9698_extApr45Mar49_age4459.dta", clear


gen lnearn0 = lnearn if x ==0
gen lnearn1 = lnearn if x ==1

gen lnwage0 = lnwage if x ==0
gen lnwage1 = lnwage if x ==1


collapse (mean) lnearn lnearn0  lnearn1 lnwage lnwage0 lnwage1 if workhrs<84 & workhrs ~=. , by(age)

label var lnearn  " log earnings "
label var lnwage  " log wage "
label var age "age "


** Log Weekly Earnings

#delimit ;
graph twoway (scatter lnearn age, sort msize(large)) 
,scheme(s2mono) xlabel( 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61) ylabel(5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9) 
legend(cols(1)) legend(ring(0) pos(5));
#delimit cr
*graph export "path1\mytexfiles\UKGHS7995_9698_4648_age4561_lnearnAge.eps", as(eps) preview(on) replace

#delimit ;
graph twoway (scatter lnwage age, sort msize(large)) 
,scheme(s2mono) xlabel( 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61) ylabel(1.4 1.6 1.8 2 2.2) 
legend(cols(1)) legend(ring(0) pos(5));
#delimit cr
*graph export "path1\mytexfiles\UKGHS7995_9698_4648_age4561_lnwageAge.eps", as(eps) preview(on) replace



****************************************
*** Data Files for Gauss Estimation ****
****************************************


**** MALE SAMPLE 4 YEAR WINDOW 1986 - 1995 SURVEY YEARS WITH ZSTAR VARIABLE *******

use "path1\Stata\Devereux\paper\revisionJAE\ghhs7995_9698_male_extApr45Mar49_age4459.dta", clear

******* Sample of 44 to 59 year olds ******
** only truncate age above ! *

** Sample Restrictions : omit observations with invalid information on key variables

drop if x ==.
drop if lnearn ==.
drop if z ==.
drop if workhrs ==.

drop if zstarm ==.
tab z x if  age < 60 & workhrs<84 

sum x z  zstarm age married year lnearn lnwage workhrs if age < 60 & workhrs<84 


#delimit ;
outfile lnearn x z age male married 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_extApr45Mar49_age4459_yxzw.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

#delimit ;
outfile zstarm 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_extApr45Mar49_age4459_zstarm_tau0.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

#delimit ;
outfile year 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_extApr45Mar49_age4459_year.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

#delimit ;
outfile lnwage 
using "path1\Stata\Devereux\paper\revisionJAE\ukghs_7995_male_extApr45Mar49_age4459_lnwage.raw" 
if age < 60 & workhrs<84 , replace ;
#delimit cr

log close













































