clear all
set more off

use nlsy_dl_021014

****** this file starts with original download from dbgator and recleans it for analysis of July 28 2014.

*** Step 1. merge original download with family background data downloaded Feb 21, 2014.

merge 1:1 CASEID using nlsy_fambground.dta

*** Step 2. rename and transform variables

* i. wages -- 
*    take income from salary pcy and divide by weeks worked pcy; take log

foreach num of numlist 79/81 {
gen avgwklyW`num'=.
replace avgwklyW`num'=Q13_5_`num'/WKSWK_PCY_`num' if WKSWK_PCY_`num'>0 & Q13_5_`num'>=0
}

foreach num of numlist 82/92 94 96 98  {
gen avgwklyW`num'=.
replace avgwklyW`num'=Q13_5_TRUNC_REVISED_`num'/WKSWK_PCY_`num' if WKSWK_PCY_`num'>0 & Q13_5_TRUNC_REVISED_`num'>=0
}

gen avgwklyW00=.
replace avgwklyW00=Q13_5_TRUNC_REVISED_00/WKSWK_PCY_00 if WKSWK_PCY_00>0 & Q13_5_TRUNC_REVISED_00>=0

gen avgwklyW93=.
replace avgwklyW93=R4295101_REVISED_93/WKSWK_PCY_93 if WKSWK_PCY_93>0 & R4295101_REVISED_93>=0

foreach num in 02 04 06 08 10 {
gen avgwklyW`num'=.
replace avgwklyW`num'=Q13_5_TRUNC_`num'/WKSWK_PCY_`num' if WKSWK_PCY_`num'>0 & Q13_5_TRUNC_`num'>=0
}

foreach num of numlist 79/94 96 98 {
gen LavgwklyW19`num'=log(avgwklyW`num') if avgwklyW`num'>0
}

foreach num in 00 02 04 06 08 10 {
gen LavgwklyW20`num'=log(avgwklyW`num') if avgwklyW`num'>0
}

* ii. experience
*      age minus hgc

foreach year of numlist 79/94 96 98 {
	gen exp19`year'=.
	replace exp19`year'=AGEATINT_`year'-HGCREV`year' if AGEATINT_`year'>=0 & HGCREV`year'>=1
}

foreach year in 00 02 04 06 08 10{
	gen exp20`year'=.
	replace exp20`year'=AGEATINT_`year'-HGCREV`year' if AGEATINT_`year'>=0 & HGCREV`year'>=1
}

* iii. Rotter items
*    recode binary items, align signs, and create average response


foreach num of numlist 1/4 {
	recode ROTTER_`num'A (2 = 0) (-10/-1 = .), gen(rotter`num')
} 
replace rotter4=1-rotter4

egen rotterbar=rmean(rotter1-rotter4)
egen rotterJi=rownonmiss(rotter1-rotter4)

* iv. Rosenberg items
*  recode binary items, align signs, and create average response

foreach num of numlist 1/3 6/9 {
	recode ROSENBERG_ESTEEM_00000`num'_87 (-10/-1 = .) (1 2 = 1) (3 4 = 0), gen(rosenberg`num')
}

foreach num of numlist 10/12 {
	recode ROSENBERG_ESTEEM_0000`num'_87 (-10/-1 = .) (1 2 = 1) (3 4 = 0), gen(rosenberg`num')
}

rename rosenberg11 rosenberg4
rename rosenberg12 rosenberg5

foreach num of numlist 3 5 8 9 10 {
	replace rosenberg`num'=1-rosenberg`num'
}

egen rosenbergbar=rmean(rosenberg1-rosenberg10 rosenberg4 rosenberg5)
egen rosenbergJi=rownonmiss(rosenberg1-rosenberg10 rosenberg4 rosenberg5)


* v. asvab items
*      recode binary items and create average response
*      create overall average as well as 4 subgroup averages

foreach num of numlist 1/30 {
	recode ASVAB_ARITHMETIC_REASONING_`num' (-5/-1 = .), gen(asvab_ar`num')
}
egen asvab_arbar=rmean(asvab_ar1-asvab_ar30)
egen asvab_arJi=rownonmiss(asvab_ar1-asvab_ar30)

foreach num of numlist 1/35 {
	recode ASVAB_WORD_KNOWLEDGE_`num' (-5/-1 = .), gen(asvab_wk`num')
}
egen asvab_wkbar=rmean(asvab_wk1-asvab_wk35)
egen asvab_wkJi=rownonmiss(asvab_wk1-asvab_wk35)

foreach num of numlist 1/15 {
	recode ASVAB_PARAGRAPH_COMPREHENSION_`num' (-5/-1 = .), gen(asvab_pc`num')
}
egen asvab_pcbar=rmean(asvab_pc1-asvab_pc15)
egen asvab_pcJi=rownonmiss(asvab_pc1-asvab_pc15)

foreach num of numlist 1/25 {
	recode ASVAB_MATHEMATICS_KNOWLEDGE_`num' (-5/-1 = .), gen(asvab_mk`num')
}
egen asvab_mkbar=rmean(asvab_mk1-asvab_mk25)
egen asvab_mkJi=rownonmiss(asvab_mk1-asvab_mk25)

egen asvabbar=rmean(asvab_mk1-asvab_mk25 asvab_pc1-asvab_pc15 asvab_wk1-asvab_wk35 asvab_ar1-asvab_ar30)
egen asvabJi=rownonmiss(asvab_mk1-asvab_mk25 asvab_pc1-asvab_pc15 asvab_wk1-asvab_wk35 asvab_ar1-asvab_ar30)* vi. some demographic variables
*     recode dummy values, etc.

gen sex=.
replace sex=0 if SAMPLE_SEX_79==2
replace sex=1 if SAMPLE_SEX_79==1

gen white=.
replace white=1 if SAMPLE_RACE_79==3
replace white=0 if SAMPLE_RACE_79==2 | SAMPLE_RACE_79==1 

*        categorical region into separate dummies
	
foreach year of numlist 79/94 96 98 {
	recode REGION_`year' (-10/0 = .) (1 3 4 = 0) (2 = 1), gen(region_nc19`year')
	recode REGION_`year' (-10/0 = .) (1 2 4 = 0) (3 = 1), gen(region_s19`year')
	recode REGION_`year' (-10/0 = .) (1 2 3 = 0) (4 = 1), gen(region_w19`year')
}

foreach year in 00 02 04 06 08 10 {
	recode REGION_`year' (-10/0 = .) (1 3 4 = 0) (2 = 1), gen(region_nc20`year')
	recode REGION_`year' (-10/0 = .) (1 2 4 = 0) (3 = 1), gen(region_s20`year')
	recode REGION_`year' (-10/0 = .) (1 2 3 = 0) (4 = 1), gen(region_w20`year')
}	
	
foreach year of numlist 79/94 96 98 {
	recode URBAN_RURAL_`year' (-10/-1 2 = .), gen(urban19`year')
}

foreach year in 00 02 04 06 08 10 {
	recode URBAN_RURAL_`year' (-10/-1 2 = .), gen(urban20`year')
}


* vii. rename some more vars to have consistent year subscript


foreach num of numlist 79/94 96 98 {
rename HGCREV`num' hgc19`num'
rename WKSWK_PCY_`num' weeks19`num'
rename HRSWK_PCY_`num' hours19`num'
}

foreach num in 00 02 04 06 08 10 {
rename HGCREV`num' hgc20`num'
rename WKSWK_PCY_`num' weeks20`num'
rename HRSWK_PCY_`num' hours20`num'
}

* viii. recode vars from family background file

foreach num of numlist 79/94 96 98 {
	recode AGEATINT_`num' (-5 -4 -3 -2 -1 = .), gen(age19`num')
}
foreach num in 00 02 04 06 08 10 {
	recode AGEATINT_`num' (-5 -4 -3 -2 -1 = .), gen(age20`num')
}

foreach num of numlist 79/94 96 98 {
	recode MARSTAT_KEY_`num' (-5 -4 -3 -2 -1 = .) (2 3 6 = 0), gen(married19`num')
}
foreach num in 00 02 04 06 08 10 {
	recode MARSTAT_KEY_`num' (-5 -4 -3 -2 -1 = .) (2 3 6 = 0), gen(married20`num')
}

foreach num of numlist 79/94 96 98 {
	recode NUMCH`num' (-5 -4 -3 -2 -1 = .), gen(childrenHH19`num')
}

foreach var of varlist HGC_FATHER_1979 HGC_MOTHER_1979{
mvdecode `var', mv(-4 -3 -2 -1)
}

recode SAMPLE_RACE_79 (1 3 = 0) (2 = 1), gen(black) 
recode SAMPLE_RACE_79 (2 3 = 0), gen(hispanic) 
recode FAM_6_79 (-3 2 3 = 0), gen(urbanat14) 


*** Step 3. reshape
reshape long age hgc weeks hours exp married childrenHH region_nc region_s region_w urban LavgwklyW, i(CASEID) j(year)

*** Step 4. save data

save nlsy_v1_full, replace
