/*******************************************************************
 This file extracts the data from the original source files and
 generates the necessary variables for the estimations and figures.
 
 My data preparation steps largely follow the programs used in the study 
 by Pischke & von Wachter (2008).
 
 Reference:
 Pischke, J.-S. and T. von Wachter (2008). Zero returns to compulsory 
 schooling in Germany: Evidence and interpretation. Review of 
 Economics and Statistics 90(3), 592–598.
 
 I gretefully thank Steve Pischke for sharing his STATA code with me.
********************************************************************/


********************** survey year 1979 **********************
use "$data79\$file79", clear

/* income: assign midpoints */

gen inc = 200 if v435 == 1
replace inc =   500 if v435 == 2
replace inc =   700 if v435 == 3
replace inc =   900 if v435 == 4
replace inc =  1125 if v435 == 5
replace inc =  1375 if v435 == 6
replace inc =  1625 if v435 == 7
replace inc =  1875 if v435 == 8
replace inc =  2250 if v435 == 9
replace inc =  2750 if v435 == 10
replace inc =  3500 if v435 == 11
replace inc =  4500 if v435 == 12
replace inc =  7500 if v435 == 13
replace inc = . if v435 == 99
gen lny = ln(inc)
gen top = v435 == 13

/* schooling */

gen school = 9 if v3 == 1
replace school = 8 if v3 == 1 & v437 <= 50 
replace school = 10 if v3 == 2
replace school = 12 if v3 == 3
replace school = 13 if v3 == 4         
replace school = . if v3 == 9          

gen german = v5 if v5 < 6
gen math = v6 if v6 < 6

/* training: first code dummies for non appr. training types */

gen berufs  = v22 >= 27 & v22 <=98 
gen berufsf = v23 >= 27 & v23 <=98
gen master  = v24 >= 27 & v24 <=98
gen technic = v25 >= 27 & v25 <=98
gen gesundh = v26 >= 27 & v26 <=98
gen beamten = v27 >= 27 & v27 <=98
gen fachsch = v28 >= 27 & v28 <=98
gen ba      = v29 >= 27 & v29 <=98
gen fh      = v30 >= 27 & v30 <=98
gen uni     = v31 >= 27 & v31 <=98
gen other   = v32 >= 27 & v31 <=98 

replace berufs  = . if v22 == 99
replace berufsf = . if v23 == 99
replace master  = . if v24 == 99
replace technic = . if v25 == 99
replace gesundh = . if v26 == 99
replace beamten = . if v27 == 99
replace fachsch = . if v28 == 99
replace ba      = . if v29 == 99
replace fh      = . if v30 == 99
replace uni     = . if v31 == 99
replace other   = . if v32 == 99

/* create training variable */

gen train = 0
replace train = 1 if (v7 == 2 | v7 == 3) & v8 == 1
replace train = 2 if (v7 == 2 | v7 == 3) & (v8 == 2 | v8 == 3)
replace train = . if v7 == 9
replace train = 1 if other == 1 & (train < 2 | train == .)
replace train = 2 if gesundh == 1 & (train < 2 | train == .)
replace train = 2 if beamten == 1 & (train < 2 | train == .)
replace train = 2 if berufsf == 1 & (train < 2 | train == .)

replace train = train + 2 if ba == 1
replace train = train + 1 if ba == 0 & fachsch == 1

replace train = train + 5 if uni == 1
replace train = train + 4 if uni == 0 & fh == 1

replace train = train + 1.5 if master == 1 | technic == 1

replace train = . if v22 == 99
replace train = . if v23 == 99
replace train = . if v24 == 99
replace train = . if v25 == 99
replace train = . if v26 == 99
replace train = . if v27 == 99
replace train = . if v28 == 99
replace train = . if v29 == 99
replace train = . if v30 == 99
replace train = . if v31 == 99
replace train = . if v32 == 99

gen ed = school + train
replace ed = 18 if ed > 18 & ed ~= .

/* experience */

gen age = 79 - v437
replace age = . if age < 0

gen exp = age - ed - 5
replace ed = age - 5 if exp < 0
replace exp = 0 if exp < 0
gen exp2 = (exp^2)/100
gen exp3 = (exp^3)/10000
gen exp4 = (exp^4)/1000000

gen female = v438 - 1

/* further training */

gen didtrain = 0
replace didtrain = 1 if v36 == 2
replace didtrain = . if v36 == 9

gen ltrain = 0.5 if v43 == 1            /* in weeks */
replace ltrain = 2.5 if v43 == 2
replace ltrain = 9   if v43 == 3
replace ltrain = 19.5 if v43 == 4
replace ltrain = 39  if v43 == 5
replace ltrain = 65  if v43 == 6
replace ltrain = 91  if v43 == 7
replace ltrain = 110 if v43 == 8

replace ltrain = ltrain/2 if v42 == 1 | v42 == 3 | v42 == 5
replace ltrain = .   if v43 == 9 | v42 == 9 | v36 == 9
replace ltrain = 0   if v36 == 1
replace ltrain = ltrain / 52


/* hours */

gen hours = v71 if v71 <= 100 
replace hours = . if hours == 0
gen lnhrs = ln(hours)
gen full = hours >= 35 & hours ~= .
replace full = . if hours == .
gen w = inc/(hours*4.333)
gen lnw = ln(w)

/* self employed and civil servants */

gen self = v68 >= 30 & v68 <= 45
gen civil = v68 >= 20 & v68 <= 23
replace self = . if v68 == 99 | v68 == 0
replace civil = . if v68 == 99 | v68 == 0

gen numempl = v421 if v421 < 9

gen empl = 1 if v64 == 1
replace empl = 0 if v64 == 2
replace empl = . if v64 == 9

compress
save "${sample}\data79.dta", replace

********************** survey year 1985/86 **********************
use "$data85\$file85", clear

*change variable names from "v" to "mv"
foreach vari of varlist v1-v190 {
rename `vari' m`vari'
}

/* income: assign midpoints */

gen inc = 200 if mv126 == 1
replace inc =   500 if mv126 == 2
replace inc =   700 if mv126 == 3
replace inc =   900 if mv126 == 4
replace inc =  1125 if mv126 == 5
replace inc =  1375 if mv126 == 6
replace inc =  1625 if mv126 == 7
replace inc =  1875 if mv126 == 8
replace inc =  2125 if mv126 == 9
replace inc =  2375 if mv126 == 10
replace inc =  2625 if mv126 == 11
replace inc =  2875 if mv126 == 12
replace inc =  3250 if mv126 == 13
replace inc =  3750 if mv126 == 14
replace inc =  4250 if mv126 == 15
replace inc =  4750 if mv126 == 16
replace inc =  5250 if mv126 == 17
replace inc =  5750 if mv126 == 18
replace inc =  7000 if mv126 == 19
replace inc =  9000 if mv126 == 20
replace inc = 12500 if mv126 == 21
replace inc = 17500 if mv126 == 22
replace inc = . if mv126 == 99
gen lny = ln(inc)
gen top = mv126 == 22

/* schooling */

gen school = 9 if mv131 == 1 | mv131 == 2
replace school = 8 if (mv131 == 1 | mv131 == 2) & mv176 <= 50 
replace school = 10 if mv131 == 3
replace school = 12 if mv131 == 4
replace school = 13 if mv131 == 5
replace school = 10 if mv131 == 6
replace school = . if mv131 == 9

/* training */

gen train = 0
replace train = 1 if mv142 == 1 | mv143 == 1  /* B-vorber. od. grundb.jahr */
replace train = train + 2 if mv133 == 1 | mv133 == 2
replace train = train + 2 if mv146 == 1 & mv133 == 3
replace train = train + 2 if mv147 == 1 & mv133 == 3 & mv146 == 9
replace train = train + 2 if mv145 == 1 & mv133 == 3 & mv146 == 9 & mv147 == 9
replace train = train + 1 if mv144 == 1 & mv133 == 3 & mv146 == 9 & mv147 == 9 & mv145 > 1

replace train = train + 2 if mv149 == 1
replace train = train + 1 if mv148 == 1 & mv149 == 9

replace train = train + 5 if mv151 == 1
replace train = train + 4 if mv150 == 1 & mv151 > 1

replace train = train + 1.5 if mv152 >= 1 & mv152 <= 3

replace train = . if mv133 == 9

gen ed = school + train
gen ed_85_mis=ed==.
replace ed = 18 if ed > 18

/* experience */

gen age = 86 - mv176
replace age = . if age < 15 | age > 65

gen exp = age - ed - 5
replace ed = age - 5 if exp < 0
replace exp = 0 if exp < 0
gen exp2 = (exp^2)/100
gen exp3 = (exp^3)/10000
gen exp4 = (exp^4)/1000000

gen female = mv175 - 1

/* further training */

gen didtrain = 0
replace didtrain = 1 if mv155 >= 1 & mv155 <= 3
replace didtrain = . if mv155 == 9

gen ltrain = 0.5 if mv158 == 1
replace ltrain = 1.5 if mv158 == 2
replace ltrain = 3   if mv158 == 3
replace ltrain = 9   if mv158 == 4
replace ltrain = 19.5 if mv158 == 5
replace ltrain = 39  if mv158 == 6
replace ltrain = 78  if mv158 == 7
replace ltrain = 110 if mv158 == 8

replace ltrain = ltrain/2 if mv157 == 2
replace ltrain = .   if mv158 == 9 | mv157 == 9
replace ltrain = 0   if mv155 == 4
replace ltrain = ltrain / 52

/* hours */

gen hours = mv4 if mv4 <= 99
gen lnhrs = ln(hours)
gen w = inc/(4.333*hours)
gen lnw = ln(w)
gen full = hours >= 35 & hours < .
replace full = . if mv4 > 99

/* self employed and civil servants */

gen self = mv6 >= 40 & mv6 <= 45
gen civil = mv6 >= 30 & mv6 <= 33
replace self = . if mv6 == 99
replace civil = . if mv6 == 99

gen numempl = mv129 if mv129 < 7
replace numempl = 5 if numempl == 6

compress
save "${sample}\data85.dta", replace

********************** survey year 1991/92 **********************
use "$data92\$file92", clear
renvars _all , lower

keep if ostwest <= 2
gen german = ostwest == 2

/* income: assign midpoints */

gen inc = 300 if f17 == 1
replace inc =   800 if f17 == 2
replace inc =  1250 if f17 == 3
replace inc =  1750 if f17 == 5
replace inc =  2250 if f17 == 7
replace inc =  2750 if f17 == 8
replace inc =  3250 if f17 == 9
replace inc =  3750 if f17 == 10
replace inc =  4250 if f17 == 11
replace inc =  4750 if f17 == 12
replace inc =  5250 if f17 == 13
replace inc =  5750 if f17 == 14
replace inc =  6500 if f17 == 16
replace inc =  7500 if f17 == 17
replace inc = 12500 if f17 == 18
replace inc = . if f17 == 99
gen lny = ln(inc)
gen top = f17 == 18

/* schooling */

gen school = 9 if f39 == 1 | f39 == 6
replace school = 8 if (f39 == 1 | f39 == 6) & f93 <= 50
replace school = 10 if f39 == 2
replace school = 12 if f39 == 3
replace school = 13 if f39 == 4
replace school = 10 if f39 == 5
replace school = . if f39 == 9

/* training */

gen train = 0
replace train = 1 if f5801 == 1  /* B-vorber. od. grundb.jahr */
replace train = train + .67 if f45 <= 3 & f46 == 1     /* Lehre, use */
replace train = train + 1 if f45 <= 3 & f46 == 2       /* coded length */
replace train = train + 1.33 if f45 <= 3 & f46 == 3
replace train = train + 1.67 if f45 <= 3 & f46 == 4
replace train = train + 2 if f45 <= 3 & (f46 == 5 | f46 == 9) 
replace train = train + 2.33 if f45 <= 3 & f46 == 6

replace train = . if f41 == 9 | f45 == 9

replace train = train + 2 if f41 == 2 & f5809 == 1      /* beamtenausbildung */
replace train = train + 2 if f41 == 2 & f5809 == 0 & f5803 == 1         /* gesundheitswesen */
replace train = train + 2 if f41 == 2 & f5809 == 0 & f5803 == 1 & f5802 == 1    /* berufsfachschule */

replace train = train + 2 if f5805 == 1                  /* berufsakademie */
replace train = train + 1 if f5805 == 0 & f5804 == 1 & f56 >= 4   /* fachschule */

replace train = train + .5 if f5808 ==  1     /* referendariat */

replace train = train + 5 if f5807 == 1                 /* uni */
replace train = train + 4 if f5807 == 0 & f5806 == 1    /* fh */

replace train = train + 1.5 if f56 >= 1 & f56 <= 4

replace train = . if f5810 == 9


gen ed = school + train
replace ed = 18 if ed > 18 
replace ed = . if train == .

/* experience */

gen age = 92 - f93
replace age = . if age < 15 | age > 65

gen exp = age - ed - 5
replace ed = age - 5 if exp < 0
replace exp = 0 if exp < 0
gen exp2 = (exp^2)/100
gen exp3 = (exp^3)/10000
gen exp4 = (exp^4)/1000000

gen female = f89 - 1


/* further training */

gen didtrain = 0
replace didtrain = 1 if f71 == 1
replace didtrain = . if f71 == 9

gen ltrain = 0.2 if f77 == 1
replace ltrain = 0.7 if f77 == 2
replace ltrain = 1.5 if f77 == 3
replace ltrain = 3   if f77 == 4
replace ltrain = 9   if f77 == 5
replace ltrain = 19.5 if f77 == 6
replace ltrain = 39  if f77 == 7
replace ltrain = 78  if f77 == 8
replace ltrain = 110 if f77 == 9

replace ltrain = ltrain/2 if f76 == 2
replace ltrain = .   if f77 == 9 | f76 == 9
replace ltrain = 0   if didtrain ~= 1
replace ltrain = ltrain / 52


/* hours */

gen hours = f01/10 if f01 <= 800
gen lnhrs = ln(hours)
gen w = inc/(4.333*hours)
gen lnw = ln(w)
gen full = hours >= 35
replace full = . if hours == .

/* self employed and civil servants */

gen self = f08 == 3
gen civil = f11 >= 30 & f11 <= 33
replace self = . if f08 == 9
replace civil = . if f11 == 99

gen numempl = f65 if f65 < 5

compress
save "${sample}\data92.dta", replace

********************** survey year 1998/99 **********************
use "$data99\$file99", clear

* recode missing values to match the code by PvW
recode v431 v433 v348 v346 v432  v430 (0 =.) /*0 is now coded as missing*/

/* keep only west germans */
keep if v3 == 1
gen german = v780 == 1
replace german = . if v780 == 9

/* income: assign midpoints */

gen inc = 300 if v147 == 1
replace inc =   800 if v147 == 2
replace inc =  1250 if v147 == 3
replace inc =  1750 if v147 == 4
replace inc =  2250 if v147 == 5
replace inc =  2750 if v147 == 6
replace inc =  3250 if v147 == 7
replace inc =  3750 if v147 == 8
replace inc =  4250 if v147 == 9
replace inc =  4750 if v147 == 10
replace inc =  5250 if v147 == 11
replace inc =  5750 if v147 == 12
replace inc =  6500 if v147 == 13
replace inc =  7500 if v147 == 14
replace inc =  8500 if v147 == 15
replace inc =  9500 if v147 == 16
replace inc =  12500 if v147 == 17
replace inc =  17500 if v147 == 18
replace inc = . if v147 >= 98 | v147 == 0
gen lny = ln(inc)
gen top = v147 == 18

/* schooling */ 

gen school = 10 if v342 == 1 | v342 == 8 /*Sonder, Other*/
replace school = 9 if v342 == 2 | v342 == 3
replace school = 8 if (v342 == 2 | v342 == 3) & (v348 <= 1950 | v346 <= 1950) 
replace school = 10 if v342 == 4
replace school = 12 if v342 == 5 | v342 == 6
replace school = 13 if v342 == 7
replace school = 8 if v342 == 9 /* ohne Abschluss */

replace school = . if v342 == 10 | v342 == 99 | v342 == .

/* training */

gen train = 0
replace train = 1 if v383 == 1 | v384 == 1 | v386 == 1 /* B-vorber. od. grundb.jahr od Praktikum */
replace train = train + 1 if v385 == 1 /* 1 year berufsfachschule */

/* length of training */

gen length = v433 - v431 if v433 < 2000 & v431 < 2000 
replace length = length + (v432 - v430)/12 if v432 <= 12 & v430 <= v432
replace length = 0 if length < 0
replace length = 6 if length > 6 

replace train = train + (2/3)*length if v405 == 1     /* Lehre, use */

replace train = train + length if v405 >= 2 & v405 <= 4 /* berufsfachsch, beamtenausb, uni */

replace train = . if v405 >= 5 & v405 <= 9 
replace train = . if v405 == 99

drop length


gen ed = school + train
gen ed_99_mis=ed==.
replace ed = 18 if ed > 18 
replace ed = . if train == .

/* experience */

gen age = v8
replace age = . if age < 15 | age > 65

gen exp = age - ed - 5
replace ed = age - 5 if exp < 0
replace exp = 0 if exp < 0
gen exp2 = (exp^2)/100
gen exp3 = (exp^3)/10000
gen exp4 = (exp^4)/1000000

gen female = v5 - 1


/* further training */

gen didtrain = 0
replace didtrain = 1 if v716 == 1
replace didtrain = . if v716 == 9


/* hours */
* recode working hours to match the code by PvW
* hours are not coded in 10 hours any more
replace v29=round(v29/10 ,0.1)  if v29<9999        
replace v29=99.9 if v29==9999

gen hours = v29*10 if v29 < 99
replace hours = 99 if hours > 99      
gen hours_99 = v29*10 if v29 != 99.9

gen lnhrs = ln(hours)
gen w = inc/(4.333*hours)
gen lnw = ln(w)
gen full = hours >= 35
replace full = . if hours == .

/* self employed and civil servants */

gen self = v117 == 4 | v117 == 5 | v117 == 6
gen civil = v117 == 3
replace self = . if v117 == 9
replace civil = . if v117 == 9

gen numempl = v740 if v740 <= 5

compress
save "${sample}\data99.dta", replace


********************** pool data 1979-2012 **********************
use "$sample\data79.dta", clear

gen yob = v437 if v437<99
gen year = 79
gen bula = v439
gen grad = v4 if v4 <= 79

/* Abschluss 1= kein, 2= Haupt, 3= Real, 4= FHreife, 5= Abitur, 6= other */

gen abschl = v3 + 1 if v3 < 9

gen length = v4 - yob - 6 if v4 <= 79

gen appr = v8 == 2 | v8 == 3 if v8 ~= 9   /* completed apprenticeship */

rename v441 weight

keep if lny ~= .

/* highest school degree */
recode v3 (9=.), gen(degree)
lab var degree "School degree"


keep w inc lnw lny length abschl train appr age female yob year grad bula ///
	weight top self civil degree* hours*  
compress
save "$sample\temp.dta", replace


use "$sample\data85.dta", clear

gen yob = mv176 if mv176<90
gen year = 85
gen bula = mv188
replace bula = 11 if bula == 0
gen grad = mv132 if mv132 <= 86

gen length = mv132 - yob - 6 if mv132 <= 86
gen abschl = mv131 if mv131 < 9
gen appr = mv133 <= 2 if mv133 ~= 9

rename mv190 weight

keep if lny ~= .

/* highest school degree */
recode mv131 ( 1=0) (2=1) (3=2) (4=3) (5=4) (6=5) (else=.), gen(degree)

keep w inc lnw lny length abschl train appr age female yob year grad bula ///
	weight top self civil degree* hours* ed_85_mis  
compress
append using "$sample\temp.dta"

save "$sample\temp.dta", replace


use "$sample\data92.dta", clear


keep if german == 1

gen yob = f93
gen year = 92
gen grad = f40 if f40 <= 92

gen abschl = f39 + 1 if f39 <= 5
replace abschl = 1 if f39 == 6

gen length = f40 - yob - 6 if f40 <= 92

gen appr = f41 == 1 & f41 ~= 9

keep if lny ~= .

rename hr weight

/* highest school degree */
recode f39 (6 =0) (1=1) (2=2) (3=3) (4=4) (5=5) (else=.), gen(degree)

keep w inc lnw lny length abschl train appr age female yob year grad bula ///
	weight top self civil degree* hours* 
compress
append using "$sample\temp.dta"

save "$sample\temp.dta", replace


use "$sample\data99.dta", clear

keep if german == 1
keep if v347 == 2 | v349 == 2    /* school leaver in former west Germany */

gen yob = v7 - 1900 if v7< 9999
gen year = 99


gen grad = v348 - 1900 if v348 <= 1999 
replace grad = v346 - 1900 if grad == . & v346 <= 1999 
gen bula = v782
replace bula = 11 if bula == 0


gen abschl = 1 if v343 == 9 /*ohne Abschluss */
replace abschl = 2 if v343 == 2 | v343 == 3
replace abschl = 3 if v343 == 4
replace abschl = 4 if v343 == 5
replace abschl = 5 if v343 == 6 | v343 == 7
replace abschl = 6 if v343 == 1 | v343 == 8 /* anderer Abschluss */

gen length = grad - yob - 6

gen appr = (v405==1 & v418==1) | (v473==1 & v486==1) | (v541==1 & v554==1) | ///
			(v607==1 & v620==1) | (v668==1 & v681==1) if v405 ~= .

rename v789 weight

keep if lny ~= .


/* highest school degree*/
recode v342 (9 =0) (2 3=1) (4=2) (5 6=3) (7=4) (1 8=5) (else=.), gen(degree) 
recode v343 (10 9 =0) (2 3=1) (4=2) (5 6=3) (7=4) (1 8=5) (else=.), gen(degree_) 

keep w inc* lnw lny length abschl train appr age female yob year grad bula ///
	weight top self civil degree* hours* ed_99_mis 
compress
append using "$sample\temp.dta"

save "$sample\temp.dta", replace



gen school = 9 if abschl == 1 | abschl == 2
replace school = 10 if abschl == 3
replace school = 12 if abschl == 4
replace school = 13 if abschl == 5

/* switch to 9th year in basic track */

gen switch = 1
replace switch = 0 if bula == 1 & grad <= 55   /* Schleswig Holstein: 1940 first cohort w/ 9 years */
replace switch = 0 if bula == 2 & grad <= 48   /* Hamburg: 1933 first cohort w/ 9 years */
replace switch = 0 if bula == 3 & grad <= 61   /* Niedersachsen: 1946 first cohort w/ 9 years */
replace switch = 0 if bula == 4 & grad <= 57   /* Bremen: 1942 first cohort w/ 9 years */
replace switch = 0 if bula == 5 & grad <= 66   /* NRW: 1952 first cohort w/ 9 years */
replace switch = 0 if bula == 6 & grad <= 66   /* Hessen: 1952 first cohort w/ 9 years */
replace switch = 0 if bula == 7 & grad <= 66   /* RPF: 1952 first cohort w/ 9 years */
replace switch = 0 if bula == 8 & grad <= 66   /* Baden-Wü: 1952 first cohort w/ 9 years */
replace switch = 0 if bula == 9 & grad <= 69   /* Bayern: 1954 first cohort w/ 9 years */
replace switch = 0 if bula == 10 & grad <= 63   /* Saarland: 1948 first cohort w/ 9 years */
replace switch = 0 if bula == 11 & grad <= 50   /* Berlin: don't know, switch before 1960 */

gen switch2 = 1
replace switch2 = 0 if bula == 1 & yob <= 40   /* Schleswig Holstein: 1940 first cohort w/ 9 years */
replace switch2 = 0 if bula == 2 & yob <= 33   /* Hamburg: 1933 first cohort w/ 9 years */
replace switch2 = 0 if bula == 3 & yob <= 46   /* Niedersachsen: 1946 first cohort w/ 9 years */
replace switch2 = 0 if bula == 4 & yob <= 42   /* Bremen: 1942 first cohort w/ 9 years */
replace switch2 = 0 if bula == 5 & yob <= 52   /* NRW: 1952 first cohort w/ 9 years */
replace switch2 = 0 if bula == 6 & yob <= 52   /* Hessen: 1952 first cohort w/ 9 years */
replace switch2 = 0 if bula == 7 & yob <= 52   /* RPF: 1952 first cohort w/ 9 years */
replace switch2 = 0 if bula == 8 & yob <= 52   /* Baden-Wü: 1952 first cohort w/ 9 years */
replace switch2 = 0 if bula == 9 & yob <= 54   /* Bayern: 1954 first cohort w/ 9 years */
replace switch2 = 0 if bula == 10 & yob <= 48   /* Saarland: 1948 first cohort w/ 9 years */
replace switch2 = 0 if bula == 11 & yob <= 35   /* Berlin: don't know, switch before 1960 */


gen school2 = school
replace school = 8 if school == 9 & switch == 0
replace school2 = 8 if school2 == 9 & switch2 == 0

gen basic = school2 <= 9

gen ed = school + train
gen ed2 = school2 + train

gen exp = age - ed2 - 6
replace exp = 0 if exp < 0
gen exp2 = (exp^2)/100
gen exp3 = (exp^3)/10000
gen exp4 = (exp^4)/1000000

gen age2 = (age^2)/100
gen age3 = (age^3)/10000
gen age4 = (age^4)/1000000

gen school8 = school == 8
gen school9 = school == 9
gen school10 = school == 10
gen school12 = school == 12
gen school13 = school == 13

gen year79 = year == 79
gen year85 = year == 85
gen year92 = year == 92
gen year99 = year == 99

* clustering variable for SEs
egen clust=group(bula yob)

sort bula grad

* save the final data
save "$sample\compuls.dta", replace

*clean up
erase "$sample\temp.dta"
erase "${sample}\data79.dta"
erase "${sample}\data85.dta"
erase "${sample}\data92.dta"
erase "${sample}\data99.dta"


