**********************************************************************************************************************
*************************Locus of control***********************************************************************
**********************************************************************************************************************
use "$datapath\vp.dta", clear
	rename sample1 vsample1	
	
*LOCUS OF CONTROL*
	forvalues x=1/9{
	rename vp1270`x' Loc`x'
	}
	rename vp12710 Loc10
	
keep hhnr persnr vsample1 vhhnr Loc*
sort hhnr persnr
save "$datapatha\noncog.dta", replace
cap saveold "$datapatha\noncog.dta", replace


use "$datapath\bap.dta", clear
	rename sample1 basample1	
		
*LOCUS OF CONTROL*
	forvalues x=1/9{
	rename bap020`x' Loc2010_`x'
	}
	rename bap0210 Loc2010_10
	
keep hhnr persnr basample1 bahhnr Loc2010*
sort hhnr persnr
save "$datapatha\noncog2010.dta", replace
cap saveold "$datapatha\noncog2010.dta", replace

***

***************************************************************OUTCOMES (all years since 2003********************************************************
use "$datapathl\pl.dta" if syear>2003, clear
sort  syear
rename syear svyyear

	*Give me a list of variables we want (Sample, Sex, Yob, employment status, in training, university education, contracted number of hours,
	*actual number of hours, gross monthly earnings, gross yearly earnings, blue collar worker, current position, marital status(p0571), 
	*number of sisters, number of brothers
		local pvars sample1 hid pla0009 ple0010 plb0022 plg0012 plg0014 plb0176 plb0186 plc0013 plb0471 plb0065 plb0057 plb0058  plb0059 plb0061 plb0060 plb0064 pld0131 pld0030 pld0032

	*replace negative entries with missings (needed for collapsing later)*
		mvdecode `pvars', mv(-1=.a \ -2=.b \ -3=.c \ -8=.d) 

	*We need to know which year the wageinfo is from (to inflation adjust)
		gen o_wageinfo=0
		foreach x of numlist  10 11{
		replace o_wageinfo=10 if o_wageinfo==0 & plc0013<100000000 & plb0186<100000000 & svyyear==20`x'
		}
		foreach x of numlist  9 8 7 6 5 4{
		replace o_wageinfo=`x' if o_wageinfo==0 & plc0013<100000000 & plb0186<100000000 & svyyear==200`x'
		}


	*save labels
		 foreach var of local pvars{
			 foreach val of local `var'_levels {       /* loop over all values in local list `var'_levels */
			 local `var'vl`val' : label `var' `val'    /* create macro that contains label for each value */
		       }
		 }

		  foreach v of local pvars{
			local l`v' : variable label `v'
			if `"`l`v''"' == "" {
				local l`v' "`v'"
			}
		  }


	*collapse using only the latest nonmissing value of each variable per person
		#del ;
		collapse (lastnm) `pvars' o_wageinfo, by(pid);
		#del cr

		 foreach var of local pvars{
		 label values `var' `var'
		 }
		 
		  foreach v of local pvars{
		 	label var `v' "`l`v''"
		  }


	*general info we may need
		rename pla0009 b_sex
		rename ple0010 b_yob
		rename plb0022 o_empst
		rename plg0012 o_train
		rename plg0014 o_univ 

		gen b_age=2011-b_yob
		lab var b_age "age of ind in 2011"
		
	*code hourly earnings: no imputed values	
		rename plb0176 o_hrsw
		rename plb0186 o_hrswa
		rename plc0013 o_grearn
		rename plb0471 o_grsal
		rename plb0065 o_civil

		replace o_hrsw=. if o_hrsw<0
		replace o_hrswa=. if o_hrswa<0
		gen o_refus_w=0
		replace o_refus_w=1 if o_grearn==.a
		lab var o_refus_w "Individual refuses to state a wage"
		gen o_hrsm=(o_hrsw/7)*30.5
		gen o_hrsma=(o_hrswa/7)*30.5
		lab var o_hrsm "contracted no of hours worked per month, 20"
		lab var o_hrsma "actual no of hours worked per month, 20"
		replace o_grearn=. if o_grearn<0
		gen o_hrlywage=o_grearn/o_hrsm
		gen o_hrlywagea=o_grearn/o_hrsma
		gen o_hrlygrsal=o_grsal/o_hrsma
		lab var o_hrlywage "hourly wage, contracted"
		lab var o_hrlywagea "hourly wage, actual"
		lab var o_hrlygrsal "hourly wage/salary, actual"
	
  
	*code blue collar white collar occypation (note: we can only have that for people that are employed), include workers, landwirte, sonstige selbststndige 
		*Attention!: the 0s include unemployed people and people that do not want to work
		gen o_bluecol=1
		replace o_bluecol=0 if plb0058>10000 
		lab var o_bluecol "working in blue collar occupation" 
	*code white collar as: selbst. in freien Berufen (Apotheker, Techniker, Architekt etc), Angestellter, beamter
		gen o_whitecol=1
		replace o_whitecol=0 if plb0057>10000 & plb0064>10000 & o_civil>10000
		lab var o_whitecol "working in white collar occypation

	*code self-employment (self-employed landwirte, self-employed freie berufe, self-employed other)
		*Attention: 0s include unemployed people!
		gen o_selfemp=1 
		replace o_selfemp=0 if plb0059>10000 & plb0060>10000 & plb0061>10000
		lab var o_selfemp "self-employed individuals"
		
	*marital status
		gen b_married=0
		replace b_married=1 if pld0131==1 | pld0131==2 
		lab var b_married "Individual is married"
		
	*number of siblings
		rename pld0030 b_sisters 
		rename pld0032 b_brothers
		replace b_sisters=0 if b_sisters==.b
		replace b_brothers=0 if b_brothers==.b

		rename hid hhnr
		rename pid persnr
		sort hhnr persnr
		keep hhnr persnr o_* b_* sample1
		save "$datapatha\outcomes1.dta", replace
		cap saveold "$datapatha\outcomes1.dta", replace

***************************************************************OUTCOMES2*******************************************************************************
*******************************************************************************************************************************************************

use "$datapathl\pgen.dta", clear
rename syear svyyear
sort  svyyear

	*Give me a list of variables we want
		local pvars hid pgerwtyp pgsbil pgbbil02 pgbilzt pgisced pgcasmin pgemplst pgoeffd pgexpft pglfs pgexppt pgexpue pgnation pgsbilo pgbbilo

	*replace negative entries with missings (needed for collapsing later)*
		mvdecode `pvars', mv(-1=.a \ -2=.b \ -3=.c)

	*save labels
		 foreach var of local pvars{
			 foreach val of local `var'_levels {       
			 local `var'vl`val' : label `var' `val'    
		       }
		 }

		  foreach v of local pvars{
			local l`v' : variable label `v'
			if `"`l`v''"' == "" {
				local l`v' "`v'"
			}
		  }

	*collapse using only the latest nonmissing value of each variable per person
		#del ;
		collapse (lastnm) `pvars', by(pid);
		#del cr

		 foreach var of local pvars{
		 label values `var' `var'
		 }
		 
		  foreach v of local pvars{
		 	label var `v' "`l`v''"
		  }
	
*general info we may need	
	rename pgerwtyp  o_erwt  
	rename pgsbil    o_hgc
	rename pgbbil02  o_collgr
	rename pgbilzt  o_hgcyears
	rename pgisced o_isced
	rename pgcasmin o_casmin
	rename pgnation o_nation
	rename pgsbilo o_scheast
	rename pgbbilo o_ausbeast
	
*code unemployment
	gen o_olf=0
	replace o_olf=1 if pglfs<6 | pglfs==8 | pglfs==9 | pglfs==10 | pglfs==12
	replace o_olf=. if pglfs==.
	lab var o_olf "Out of the labor force"
	
	gen o_unemp=0
	replace o_unemp=1 if pglfs==6
	replace o_unemp=. if pglfs<0


*code full-time/part-time
	gen o_ft=. 
	replace o_ft=1 if pgemplst==1
	replace o_ft=0 if pgemplst>1 & pgemplst!=.
	lab var o_ft "ind is working full time" 
	gen o_pt=. 
	replace o_pt=1 if pgemplst==2
	replace o_pt=0 if (pgemplst>2 | pgemplst==1) & pgemplst!=. 
	lab var o_pt "individual is working part time"
	gen o_ftpt=.
	replace o_ftpt=1 if o_ft==1
	replace o_ftpt=0 if o_pt==1
	lab var o_ftpt "given that individual works, he/she works full time"
	rename  pgemplst o_empst2

*public sector employment
	gen o_pubsec=.
	replace o_pubsec=1 if pgoeffd==1
	replace o_pubsec=0 if pgoeffd==2
	lab var o_pubsec "individual works in the public sector"


*code experience
	rename pgexpft o_expft
	rename pgexppt o_exppt
	rename pgexpue o_expue
	gen o_exp=o_expft+o_exppt
	lab var o_exp "total (full time and part time) lm experience until 2010"
	
*Code nationality	
	gen b_german=0	
	replace b_german=1 if o_nation==1
	replace b_german=. if o_nation==.
	lab var b_german "individual is of german nationality"
	
	rename hid hhnr
	rename pid persnr

	sort hhnr persnr
	keep hhnr persnr o_* b_* 
	save "$datapatha\outcomes2.dta", replace
	cap saveold "$datapatha\outcomes2.dta", replace 

***************************************************************OUTCOMES 3*******************************************************************************
*******************************************************************************************************************************************************
	use "$datapath\ppfad.dta", clear
	gen b_eduwest=.
	replace b_eduwest=1 if (loc1989==2 | loc1989==3)
	replace b_eduwest=0 if loc1989==1
	rename loc1989 b_loc1989
	sort hhnr persnr
	keep hhnr persnr b_*
	save "$datapatha\outcomes3.dta", replace
	cap saveold "$datapatha\outcomes3.dta", replace
	
********************************************************************************************************
***************get background information on the parents, Background 3*************************
use "$datapath\bioparen.dta", clear

	rename nums b_nums
	rename numb b_numb
	replace b_nums=0 if b_nums==-2
	replace b_numb=0 if b_numb==-2

	gen b_bioparensample=1
*coding fathers education (note: I allow for a don't know category because we do not want to kick out people that grew up without a father)
	gen b_fedu_dk=.
	replace b_fedu_dk=1 if vsbil==0
	replace b_fedu_dk=0 if vsbil>0 & vsbil!=.
	lab var b_fedu_dk "father's education 'don't know'"
	gen b_fedu_hs=.
	replace b_fedu_hs=1 if vsbil==1
	replace b_fedu_hs=0 if (vsbil>1 | vsbil==0) & vsbil!=.
	lab var b_fedu_hs "father's education 'Hauptschule'"
	gen b_fedu_rs=.
	replace b_fedu_rs=1 if vsbil==2
	replace b_fedu_rs=0 if (vsbil>2 | vsbil==0 | vsbil==1) & vsbil!=.
	lab var b_fedu_rs "father's education 'Realschule'"
	gen b_fedu_gym=.
	replace b_fedu_gym=1 if vsbil==3 | vsbil==4
	replace b_fedu_gym=0 if (vsbil>4 | vsbil==0 | vsbil==1 | vsbil==2) & vsbil!=.
	lab var b_fedu_gym "father's education 'Gymnasium/Fachhochschule'"
	gen b_fedu_o=.
	replace b_fedu_o=1 if vsbil==5
	replace b_fedu_o=0 if (vsbil>5 | vsbil==0 | vsbil==1 | vsbil==2 | vsbil==3 | vsbil==4) & vsbil!=.
	lab var b_fedu_o "father's education 'other degree'"
	gen b_fedu_do=.
	replace b_fedu_do=1 if vsbil==6
	replace b_fedu_do=0 if (vsbil>6 | vsbil==0 | vsbil==1 | vsbil==2 | vsbil==3 | vsbil==4 | vsbil==5) & vsbil!=.
	lab var b_fedu_do "father's education 'dropout'"

*coding mothers education (note: I allow for a don't know category because we do not want to kick out people that grew up without a mother)
	gen b_medu_dk=.
	replace b_medu_dk=1 if msbil==0
	replace b_medu_dk=0 if msbil>0 & msbil!=.
	lab var b_medu_dk "mother's education 'don't know'"
	gen b_medu_hs=.
	replace b_medu_hs=1 if msbil==1
	replace b_medu_hs=0 if (msbil>1 | msbil==0) & msbil!=.
	lab var b_medu_hs "mother's education 'Hauptschule'"
	gen b_medu_rs=.
	replace b_medu_rs=1 if msbil==2
	replace b_medu_rs=0 if (msbil>2 | msbil==0 | msbil==1) & msbil!=.
	lab var b_medu_rs "mother's education 'Realschule'"
	gen b_medu_gym=.
	replace b_medu_gym=1 if msbil==3 | msbil==4
	replace b_medu_gym=0 if (msbil>4 | msbil==0 | msbil==1 | msbil==2) & msbil!=.
	lab var b_medu_gym "mother's education 'Gymnasium/Fachhochschule'"
	gen b_medu_o=.
	replace b_medu_o=1 if msbil==5
	replace b_medu_o=0 if (msbil>5 | msbil==0 | msbil==1 | msbil==2 | msbil==3 | msbil==4) & msbil!=.
	lab var b_medu_o "mother's education 'other degree'"
	gen b_medu_do=.
	replace b_medu_do=1 if msbil==6
	replace b_medu_do=0 if (msbil>6 | msbil==0 | msbil==1 | msbil==2 | msbil==3 | msbil==4 | msbil==5) & msbil!=.
	lab var b_medu_do "mother's education 'dropout'"

*Recode other degree
	replace b_fedu_gym=1 if b_fedu_o==1 & (vbbil>30 & vbbil!=.)   
	replace b_fedu_o=0 if b_fedu_o==1 & (vbbil>30 & vbbil!=.) 	

	replace b_medu_gym=1 if b_medu_o==1 & (mbbil>30 & mbbil!=.)   
	replace b_medu_o=0 if b_medu_o==1 & (mbbil>30 & mbbil!=.) 
	rename vsbil b_feduc
	rename msbil b_meduc

*code percentage of time between 0 and 15 spent in a broken home (JJH loves this). Note: there are many missings and i could not figure out why
	replace living1=. if living1<0
	gen b_pctbroken=1-(living1/15) 
	lab var b_pctbroken "percentage of youth spent in a broken home"

*place where people grew up
	gen b_lcity=.
	replace b_lcity=1 if ortkindh==1
	replace b_lcity=0 if ortkindh>1 & ortkindh!=-1
	lab var b_lcity "most childhood spent in large city"
	gen b_mcity=.
	replace b_mcity=1 if ortkindh==2
	replace b_mcity=0 if (ortkindh>2 | ortkindh==1) & (ortkindh!=-1)
	lab var b_mcity "most childhood spent in medium sized city"
	gen b_scity=.
	replace b_scity=1 if ortkindh==3
	replace b_scity=0 if (ortkindh>3 | ortkindh==1 | ortkindh==2) & (ortkindh!=-1)
	lab var b_scity "most childhood spent in small city"
	gen b_country=.
	replace b_country=1 if ortkindh==4
	replace b_country=0 if (ortkindh>4 | ortkindh==1 | ortkindh==2 | ortkindh==3) & (ortkindh!=-1)
	lab var b_country "most childhood spent in the countryside"
	rename ortkindh b_placechildhood


	keep hhnr persnr b_*
	sort hhnr persnr
	save "$datapatha\background_new.dta", replace
	cap saveold "$datapatha\background_new.dta", replace


****************other background variables************************************
	use "$datapath\biosoc.dta", clear
			rename bsschwo b_edulast
			rename bsschla b_bulaschool
		keep hhnr persnr b_*
		sort hhnr persnr
	save "$datapatha\background2.dta", replace
	cap saveold "$datapatha\background2.dta", replace

****************Bundesland**************************************************
use "$datapathl\hgen.dta", clear
rename syear svyyear
sort  svyyear

	*Give me a list of variables we want
		local pvars hgnuts1

	*replace negative entries with missings (needed for collapsing later)*
		mvdecode `pvars', mv(-1=.a \ -2=.b \ -3=.c)
		
	*Which year does the bula info come from?
		gen o_bulainfo=0
		foreach x of numlist 9 8 7 6 5 4 3 2 1 {
		replace o_bulainfo=`x' if o_bulainfo==0 & hgnuts1<100000000 & svyyear==200`x'
		}
		foreach x of numlist 11 10 {
		replace o_bulainfo=`x' if o_bulainfo==0 & hgnuts1<100000000 & svyyear==20`x'
		}

	*save labels
		 foreach var of local pvars{
			 foreach val of local `var'_levels {       /* loop over all values in local list `var'_levels */
			 local `var'vl`val' : label `var' `val'    /* create macro that contains label for each value */
		       }
		 }


		  foreach v of local pvars{
			local l`v' : variable label `v'
			if `"`l`v''"' == "" {
				local l`v' "`v'"
			}
		  }

		  foreach v of local pvars{
		 	label var `v' "`l`v''"
		  }
		 
	*collapse using only the latest nonmissing value of each variable per person
		#del ;
		collapse (lastnm) `pvars' o_bulainfo, by(hid);
		#del cr

		 foreach var of local pvars{
		 label values `var' `var'
		 }

		rename hgnuts1 b_bula 
		rename hid hhnr

**************************code BULA*******************************************************

	*region (Bundesland)
		gen b_r1=.
		replace b_r1=1 if b_bula==3
		replace b_r1=0 if b_bula!=3 & b_bula!=.
		lab var b_r1 "living in Berlin(Ost+West) in "
		gen b_r2=.
		replace b_r2=1 if b_bula==15 
		replace b_r2=0 if b_bula!=15 & b_bula!=.
		lab var b_r2 "living in SSH in "
		gen b_r3=.
		replace b_r3=1 if b_bula==5 | b_bula==6 | b_bula==9 
		replace b_r3=0 if b_bula!=5 & b_bula!=6 & b_bula!=9 &  b_bula!=.
		lab var b_r3 "living in Bremen/Hamburg/Niedersachsen in "
		gen b_r4=.
		replace b_r4=1 if b_bula==10 
		replace b_r4=0 if b_bula!=10 &  b_bula!=.	
		lab var b_r4 "living in NRW in "
		gen b_r5=.
		replace b_r5=1 if b_bula==7 
		replace b_r5=0 if b_bula!=7 &  b_bula!=.
		lab var b_r5 "living in Hessen in "
		gen b_r6=.
		replace b_r6=1 if b_bula==11 | b_bula==12  
		replace b_r6=0 if b_bula!=11 & b_bula!=12 &  b_bula!=.
		lab var b_r6 "living in Rheinland-Pfalz/Saarl in "
		gen b_r7=.
		replace b_r7=1 if b_bula==1  
		replace b_r7=0 if b_bula!=1 &  b_bula!=.
		lab var b_r7 "living in BW in "
		gen b_r8=.
		replace b_r8=2 if b_bula==2  
		replace b_r8=0 if b_bula!=2 &  b_bula!=.	
		lab var b_r8 "living in Bavaria in "
		gen b_r9=.
		replace b_r9=1 if b_bula==8  
		replace b_r9=0 if b_bula!=8 &  b_bula!=.	
		lab var b_r9 "living in Mecklenburg-Vorpommern in "
		gen b_r10=.
		replace b_r10=1 if b_bula==4  
		replace b_r10=0 if b_bula!=4 &  b_bula!=.	
		lab var b_r10 "living in Brandenburg in "
		gen b_r11=.
		replace b_r11=1 if b_bula==14  
		replace b_r11=0 if b_bula!=14 &  b_bula!=.	
		lab var b_r11 "living in Sachsen-Anhalt in "
		gen b_r12=.
		replace b_r12=1 if b_bula==16  
		replace b_r12=0 if b_bula!=16 &  b_bula!=.
		lab var b_r12 "living in Thueringen in "
		gen b_r13=.
		replace b_r13=1 if b_bula==13  
		replace b_r13=0 if b_bula!=13 &  b_bula!=.
		lab var b_r13 "living in Sachsen in "
	
*********************************************generate dummies for bula for adults****************************************
		forvalues x=1/13{
		gen b_bula_`x'=0
		replace b_bula_`x'=1 if b_bula==`x'
		replace b_bula_`x'=. if b_bula==.
		lab var b_bula_`x' "dummy for region"
		}
***********************************************************************************************************************
	save "$datapatha\bula.dta", replace
	cap saveold "$datapatha\bula.dta", replace
	
****************code number of dependent children (KIndergeldkinder)**************************************************
use "$datapathl\hl.dta" if syear>2003, clear
rename syear svyyear
sort  svyyear

	*Give me a list of variables we want 
	*number of sisters, number of brothers
		local pvars hlc0043

	*replace negative entries with missings (needed for collapsing later)*
		mvdecode `pvars', mv(-1=.a \ -2=.b \ -3=.c) 

	*save labels
		 foreach var of local pvars{
			 foreach val of local `var'_levels {       
			 local `var'vl`val' : label `var' `val'    
		       }
		 }

	*collapse using only the latest nonmissing value of each variable per person
		#del ;
		collapse (lastnm) `pvars', by(hid);
		#del cr

		 foreach var of local pvars{
		 label values `var' `var'
		 }

	rename hid hhnr

		rename hlc0043  b_kidskg
		lab var b_kidskg "number of children with Kindergeld in HH"
		sort hhnr
		keep hhnr b_kidskg
		replace b_kidskg=0 if b_kidskg==.b
		replace b_kidskg=0 if b_kidskg==.a
	save "$datapatha\kids.dta", replace
	cap saveold "$datapatha\kids.dta", replace	
	
	***
					
use "$datapath\BIOBRTHM.DTA", clear
	rename sumkids b_kids 
 	lab var b_kids "number of children"	
 	keep hhnr persnr b_kids kidgeb*
	save "$datapatha\kidsm.dta", replace
	cap saveold "$datapatha\kidsm.dta", replace
	
use "$datapath\BIOBIRTH.DTA", clear
	rename sumkids b_kids
	lab var b_kids "number of children, female individuals"	
	keep hhnr persnr b_kids kidgeb*
	save "$datapatha\kidsf.dta", replace
cap saveold "$datapatha\kidsf.dta", replace
 	
**********************************************************MERGE ALL VARIABLES SO FAR******************************************************************************************
	use "$datapatha\background2.dta", clear
	local adatafiles noncog noncog2010 outcomes1 outcomes2 outcomes3 background_new kidsf kidsm
	local bdatafiles bula kids
	cd "$datapatha"

*merge hhnr persnr using noncog.dta
	foreach var of local adatafiles{
	merge persnr using "`var'.dta", update sort 
	cap erase "`var'.dta"
	drop _merge
	sort hhnr persnr
	}
	sort hhnr
	foreach var of local bdatafiles{
	merge hhnr using "`var'.dta", nokeep update
	cap erase "`var'.dta"
	drop _merge
	sort hhnr persnr
	}

*replace all don't knows and no answers with missings
	qui mvdecode _all, mv(-1=.a \ -2=.b \ -3=.c)

***************************************************code education********************************************************

	gen o_educ=.
	replace o_educ=4 if o_collgr<1000 | (o_hgc==5 & o_hgcyears>16)
	replace o_educ=3 if ((o_hgc==3 | o_hgc==4) | (o_hgc==5 & o_hgcyears>=12)) & o_educ==.
	replace o_educ=2 if (o_hgc==2) | (o_hgc==5 & o_hgcyears>9) & o_educ==.
	replace o_educ=1 if ((o_hgc==1 | o_hgc==6) | (o_hgc==5)) & o_educ==.
	lab var o_educ "Educational degree"
	
	label define edu 1 "Dropout/Hauptschule" 2 "Realschule" 3 "Fachhochschulreife/Abitur" 4 "Fachhochschule/Uni"
	label values o_educ edu
	
	forvalues x=1/4{
	gen o_educ_`x'=. 
	replace o_educ_`x'=1 if o_educ==`x'
	replace o_educ_`x'=0 if o_educ!=`x' & o_educ!=.
	}
	lab var o_educ_1 "Dropout/Hauptschule"
	lab var o_educ_2 "Realschule"
	lab var o_educ_3 "Fachhochschulreife/Abitur"
	lab var o_educ_4 "Fachhochschule/Uni"

**********************************************Code outcomes***************************************++
	replace o_hrlywage=. if o_hrlywage==0
	replace o_hrlywagea=. if o_hrlywagea==0
	
	*inflation adjust wages (to 2009 levels), use CPI information for this, which can be retrieved e.g. 
	*from http://research.stlouisfed.org/fred2/series/DEUCPIALLAINMEI#
	replace o_hrlywagea=1.086294416*o_hrlywagea if o_wageinfo==4 		
	replace o_hrlywagea=1.07*o_hrlywagea if o_wageinfo==5
	replace o_hrlywagea=1.053149606*o_hrlywagea if o_wageinfo==6
	replace o_hrlywagea=1.029836381*o_hrlywagea if o_wageinfo==7
	replace o_hrlywagea=1.003752345*o_hrlywagea if o_wageinfo==8
	replace o_hrlywagea=0.988909427*o_hrlywagea if o_wageinfo==10	
	replace o_hrlywagea=0.96287964*o_hrlywagea if o_wageinfo==11	

***********************************************trim wage information for males/females separately******************************************************
	replace o_hrlywagea=. if (o_empst!=1 & o_empst!=2)
	replace o_hrlywage=. if (o_empst!=1 & o_empst!=2)

	replace o_hrlywagea=.a if o_hrlywagea==0
	sum o_hrlywagea if b_sex==1,det
	gen lowpercentile=r(p1)
	gen highpercentile=r(p99)
	replace o_hrlywagea=. if o_hrlywagea<lowpercentile & b_sex==1
	replace o_hrlywagea=. if o_hrlywagea>highpercentile & b_sex==1
	drop lowpercentile
	drop highpercentile
	
	sum o_hrlywagea if b_sex==2,det
	gen lowpercentile=r(p1)
	gen highpercentile=r(p99)
	replace o_hrlywagea=. if o_hrlywagea<lowpercentile & b_sex==2
	replace o_hrlywagea=. if o_hrlywagea>highpercentile & b_sex==2
	drop lowpercentile
	drop highpercentile
	replace o_hrlywagea=0 if o_hrlywagea==.a

	
	replace o_hrlywagea=.a if o_hrlywagea==0 
	// Generate log wages
	gen o_lnhrlywagea=log(o_hrlywagea)	// generate log wage
	lab var o_lnhrlywagea "log hourly wage (actual)" 	
	replace o_lnhrlywagea=0 if o_hrlywagea==.a	
	replace o_hrlywagea=0 if o_hrlywagea==.a

*code participation: employed/unemployed (count everybody as employed who has a positive wage, exclude those that refused to state a wage)
	gen o_lmpart=0
	gen o_lmparta=0 
	replace o_lmparta=1 if o_hrlywagea>0 & o_hrlywagea!=.
	replace o_lmpart=1 if o_hrlywage>0 & o_hrlywage!=.
	replace o_lmparta=. if o_refus_w==1
	replace o_lmpart=. if o_refus_w==1 
	replace o_lmparta=. if ((o_grearn>0 & o_grearn!=.)) & o_lmparta==0
	replace o_lmpart=. if ((o_grearn>0 & o_grearn!=.)) & o_lmpart==0
	lab var o_lmpart "labor market participation (=positive wage)"
	lab var o_lmparta "labor market participation (=positive wage)" 

*******************************************************generate one bula variable************************************************	
	gen b_bula_all=b_bula	
	lab var b_bula_all "Bundesland of residence"
*******************************************************Cohort dummies************************************************

*generate cohort dummies
	gen b_cohort1=0
	replace b_cohort1=1 if b_age>=26 & b_age<36
	lab var b_cohort1 "People aged 26-35"
	gen b_cohort2=0
	replace b_cohort2=1 if b_age>40 & b_age<46
	lab var b_cohort2 "People aged 41-45"
	gen b_cohort3=0
	replace b_cohort3=1 if b_age>45 & b_age<56
	lab var b_cohort3 "People aged 46-55"
	gen b_cohort4=0
	replace b_cohort4=1 if b_age>55 & b_age<65
	lab var b_cohort4 "People aged 56-65"
	
	gen b_age26_30=0
	replace b_age26_30=1 if b_age>=26 & b_age<31
	replace b_age26_30=. if b_age==. 
	lab var b_age26_30 "Individuals aged 26-30"
		
	gen b_age31_35=0
	replace b_age31_35=1 if b_age>=31 & b_age<36
	replace b_age31_35=. if b_age==. 
	lab var b_age31_35 "Individuals aged 31-35"	
	
	gen b_age36_40=0
	replace b_age36_40=1 if b_age>=36 & b_age<41
	replace b_age36_40=. if b_age==. 
	lab var b_age36_40 "Individuals aged 36-40"
	
***************************************************************Family background variables**********************************************************************+
*generate number of siblings
	gen b_nrsiblings=.
	replace b_nrsiblings=b_sisters+b_brothers
	replace b_nrsiblings=b_nums+b_numb if b_nrsiblings==.
	lab var b_nrsiblings "Number of siblings"

*code broken home as everybody who indicated they lived in a broken home (if people did not answer the question they get a zero)
	gen b_broken=0
	replace b_broken=1 if b_pctbroken>0 & b_pctbroken!=.
	lab var b_broken "broken home some time until 15 (note: no answer=0)"
	gen youth=0

***************Education in west Germany
	replace b_eduwest=1 if b_edulast==1
	replace b_eduwest=0 if b_edulast==2
	
	replace b_eduwest=0 if o_scheast>0 & o_scheast<6
	replace b_eduwest=1 if o_scheast==.b
	lab var b_eduwest "Educated in West Germany"
***
	
	