*********************************************************************
*3/30/04 This program creates and cleans variables for use in estimation models
*of portfolio choice
*updated 7/12/06
*********************************************************************

clear
*set memory 256m
*use this memory setting if include pre-65 obs
set memory 256m
set matsize 80
set more off
cd "C:\Documents and Settings\maestas\My Documents\Medicare HMOs\Data\Latest Data"

*select dataset, 65+ or all ages
use hrs_cmsecdcbal_wide65_032305.dta, clear
*this dataset has weiss data but not hmo data, but is for analyzing those pre-65
*currently need to comment out two sections at end of program, processing of MSA and Financial industry vars
*use hrs_cmsecdcbal_wideall_weiss.dta

sort hhidpn
*merge nursinghome and other expectations variables onto file
merge hhidpn using "C:\Documents and Settings\maestas\My Documents\Medicare HMOs\Data\nursinghome.dta"
tab _merge
drop if _merge==2
drop _merge

sort hhidpn
*merge pension wealth variables onto file
merge hhidpn using "C:\Documents and Settings\maestas\My Documents\Medicare HMOs\Data\penwealth.dta"
tab _merge
drop if _merge==2
drop _merge


*************************
*Life Expectancy Variables
*************************

*32% of Ahead cohort missing on r5liv10r, 17-19% of CODA and HRS missing
*documentation suggests that those 90+ were skipped, but this doesn't reduce the high missing rate much
*about 10% of obs skipped due to skip patterns, the rest are DK's, RF, etc.
*create missing dummy for models

gen r5liv10r_rcd=r5liv10r
replace r5liv10r_rcd=-1 if r5liv10r>=.
gen r5livmiss=(r5liv10r>=.)


***************************
*Expected Bequests (Leaving)
***************************
*15% missing, choose this one bc has the lowest missing value rate in sample

gen r5beq10k_rcd=r5beq10k
replace r5beq10k_rcd=-1 if r5beq10k>=.
gen r5beqmiss=(r5beq10k>=.)

***************************
*Expected Bequests (Receiving)
***************************
*17% missing
gen r5inher_rcd=r5inher
replace r5inher_rcd=-1 if r5inher>=.
gen r5inhermiss=(r5inher>=.)

*********************************************
*SR Prob Enter Nursing Home in Next 5 Years
*********************************************

*Wave 4
*respondent level var
gen r4pnhm5y_rcd=r4pnhm5y if r4pnhm5y<=100
replace r4pnhm5y_rcd=-1 if r4pnhm5y>100
gen r4pnhm5y_miss=(r4pnhm5y>100)
tabmiss r4pnhm5y r4pnhm5y_rcd r4pnhm5y_miss

*household level variable 
gen h4pnhm5y=r4pnhm5y if r4marr==0
replace h4pnhm5y=r4pnhm5y+s4pnhm5y if r4marr==1
replace h4pnhm5y=r4pnhm5y if r4marr==1 & s4pnhm5y==.
replace h4pnhm5y=s4pnhm5y if r4marr==1 & r4pnhm5y==.
gen h4pnhm5y_rcd=h4pnhm5y if h4pnhm5y<=100
replace h4pnhm5y_rcd=-1 if h4pnhm5y>100
gen h4pnhm5y_miss=(h4pnhm5y>100)
tabmiss h4pnhm5y h4pnhm5y_rcd h4pnhm5y_miss

*Wave 5
*respondent level var
gen r5pnhm5y_rcd=r5pnhm5y if r5pnhm5y<=100
replace r5pnhm5y_rcd=-1 if r5pnhm5y>100
gen r5pnhm5y_miss=(r5pnhm5y>100)
tabmiss r5pnhm5y r5pnhm5y_rcd r5pnhm5y_miss

*make household level variable 
gen h5pnhm5y=r5pnhm5y if r5marr==0
replace h5pnhm5y=r5pnhm5y+s5pnhm5y if r5marr==1
replace h5pnhm5y=r5pnhm5y if r5marr==1 & s5pnhm5y==.
replace h5pnhm5y=s5pnhm5y if r5marr==1 & r5pnhm5y==.
gen h5pnhm5y_rcd=h5pnhm5y if h5pnhm5y<=100
replace h5pnhm5y_rcd=-1 if h5pnhm5y>100
gen h5pnhm5y_miss=(h5pnhm5y>100)
tabmiss h5pnhm5y h5pnhm5y_rcd h5pnhm5y_miss

sum r4pnhm5y s4pnhm5y h4pnhm5y h4pnhm5y_rcd r5pnhm5y s5pnhm5y h5pnhm5y h5pnhm5y_rcd

*************************
*Long-term Care Insurance
*************************
*household level measure
gen h4hiltc=(r4hiltc==1 | s4hiltc==1)
gen h5hiltc=(r5hiltc==1 | s5hiltc==1)

tab h4hiltc
tab r4hiltc
tab s4hiltc

tab h5hiltc
tab r5hiltc
tab s5hiltc

***************************
*County HMO Variables
***************************

gen dnumplans=numplans0-numplans98
gen dorg=numplans_org0-numplans_org98
gen drisk=numplans_risk0-numplans_risk98
gen droporg=dorg<0
gen droprisk=drisk<0
gen dropplans=dnumplans<0
gen addrisk=drisk>0
gen dpctpen=pctpen0-pctpen98

/*
gen dnumplans_cat=1 if dnumplans>0
replace dnumplans_cat=0 if dnumplans==0
replace dnumplans_cat=-1 if dnumplans<0
*/

/*
*plans per capita
gen planspcap2000=numplans2000/pop2000
*/


****************************
*Race/Ethnicity
****************************

*USE NEW RACE VAR WHEN RERUN
gen newrace=raracem if rahispan==0
replace newrace=4 if rahispan==1
recode newrace 1=1 2=2 4=3 3=4
label def racex 1 "1. White" 2 "2. Black" 3 "3. Hispanic" 4 "4. Other"
label values newrace racex

tab newrace, gen(race)
rename race1 white
rename race2 black
rename race3 hispanic
rename race4 other

***************************
*Education
***************************
label def educ 1 "1. < H.S." 2 "2. GED" 3 "3. H.S. Grad" 4 "4. Some Coll" 5 "5. Coll+"
label values raeduc educ

gen neweduc=raeduc
recode neweduc 1=1 2 3=2 4=3 5=4
label def educ2 1 "1. < H.S." 2 "2. H.S. Grad/GED" 3 "3. Some Coll" 4 "4. Coll+"
label values neweduc educ2

gen lowed=neweduc==1
gen lowedhs=neweduc==1 | neweduc==2

tab neweduc, gen(educ)

***************************
*Gender
***************************

tab ragender, gen(gender)

***************************
*Insurance Status
***************************

*hierarchy for original inscat var is: medicaid, employer supp, indiv supp, mcare only
label def inscat 1 "1. mcare only" 2 "2. mcare + employer ins" 3 "3. mcare + indiv supp" 4 "4. mcare + mdcaid"
label values r4inscat inscat
label values r5inscat inscat

*recode insurance category variable making hmo a mutually exclusive category
*new hierarchy is medicaid, employer supp, indiv supp, mcare hmo, mcare only

*Wave 5
gen r5inscatnew=1 if r5inscat==1 & r5hmo~=1
replace r5inscatnew=2 if r5inscat==1 & r5hmo==1
replace r5inscatnew=3 if r5inscat==2
replace r5inscatnew=4 if r5inscat==3
replace r5inscatnew=5 if r5inscat==4

*Wave 4
gen r4inscatnew=1 if r4inscat==1 & r4hmo~=1
replace r4inscatnew=2 if r4inscat==1 & r4hmo==1
replace r4inscatnew=3 if r4inscat==2
replace r4inscatnew=4 if r4inscat==3
replace r4inscatnew=5 if r4inscat==4


label def inscat2 1 "1. mcare ffs only" 2 "2. mcare hmo" 3 "3. mcare + employer ins" 4 "4. mcare + indiv supp" 5 "5. mcare + mdcaid"
label values r4inscatnew inscat2
label values r5inscatnew inscat2

tab r5inscat, gen(r5inscat)

*************************************************
*Change in insurance status, omit medicaid people 
*************************************************

gen changeplan=0 if r4inscatnew~=5
*reduce risk if change from FFS to HMO, E supp, or Medigap 
replace changeplan=-1 if r4inscatnew==1 & (r5inscatnew==2 | r5inscatnew==3 | r5inscatnew==4)
*reduce risk if change from Medigap to HMO or E supp
replace changeplan=-1 if r4inscatnew==4 & (r5inscatnew==2 | r5inscatnew==3)
*reduce risk if change from HMO to E supp
replace changeplan=-1 if r4inscatnew==2 & r5inscatnew==3
*increase risk if change from HMO, E supp, or Medigap to FFS
replace changeplan=1 if (r4inscatnew==2 | r4inscatnew==3 | r4inscatnew==4) & r5inscatnew==1
*increase risk if change from HMO or E supp to Medigap
replace changeplan=1 if (r4inscatnew==2 | r4inscatnew==3) & r5inscatnew==4
*increase risk if change from E supp to HMO
replace changeplan=1 if r4inscatnew==3 & r5inscatnew==2
*make dummies

gen morerisk=(changeplan==1)
gen lessrisk=(changeplan==-1)



gen dhmo=r5hmo-r4hmo
gen addhmo=dhmo==1
gen drophmo=dhmo==-1

*make household level hmo status (corr between R and Spouse is .76)
gen h5hmo=(r5hmo==1 | s5hmo==1) if r5hmo~=.

*focus only on those in mcare hmo's, adjusment below is relevant only to those with employer supplemental insurance
/*
replace r4hmo=s4hmo if r4hmo<. & r4getsps==1
replace r5hmo=s5hmo if r5hmo<. & r5getsps==1
*/

gen newsupp=( (r4inscat==1 | r4inscat==4) & (r5inscat==2 | r5inscat==3) ) if r4inscat~=. & r5inscat~=.
gen newmcaid=( (r4inscat>=1 & r4inscat<=3) & r5inscat==4 ) if r4inscat~=. & r5inscat~=.


***************************
*Marital Status
***************************

label def mstat 1 "1. married" 2 "2. married, sp absent" 3 "3. partnered" 4 "4. separated" 5 "5. divorced" 6 "6. separated/divorced" 7 "7. widowed" 8 "8. never married"
label values r4mstat mstat
label values r5mstat mstat

gen r4married=(r4mstat>=1 & r4mstat<=3)
gen r4divorced=(r4mstat>=4 & r4mstat<=6)
gen r4widowed=(r4mstat==7)
gen r4nevmarr=(r4mstat==8)

gen r5married=(r5mstat>=1 & r5mstat<=3)
gen r5divorced=(r5mstat>=4 & r5mstat<=6)
gen r5widowed=(r5mstat==7)
gen r5nevmarr=(r5mstat==8)

gen dmstat=r5married-r4married

***************
*Pension Wealth
***************
*make household level variable
sum *5sswlth *5penwlth if r5married==1
tabmiss *5sswlth *5penwlth if r5married==1
gen r5nosswlth=(r5sswlth==0)
gen s5nosswlth=(s5sswlth==0)
gen s5nopenwlth=(s5penwlth==0)
gen r5nopenwlth=(r5penwlth==0)
sum *nosswlth if r5married==1 & r4agey_e>=65
sum *nosswlth if r5married==1 & r4agey_e>=65 & s4agey_e>=65
sum *nosswlth if r5married==1 & r4agey_e>=70 & s4agey_e>=70
*female SSW about 60% of male SSW, which is about right
sum *5sswlth if r5married==1 & ragender==1 & s4agey_e>=65

*only define variable for couples where both spouses are older than 65
gen h5sswlth=r5sswlth+s5sswlth if r5married==1 & r4agey_e>=65 & s4agey_e>=65
replace h5sswlth=r5sswlth if r5married==0

gen h5penwlth=r5penwlth+s5penwlth if r5married==1 & r4agey_e>=65 & s4agey_e>=65
replace h5penwlth=r5penwlth if r5married==0

*combine public and private pension wealth
gen h5totpenwlth=h5sswlth+h5penwlth
*inverse hyperbolic sine transform on pension wealth
gen h5ashtotpenw=ln( h5totpenwlth + (sqrt((h5totpenwlth^2) + 1)) )


sum h5totpenwlth [aw=r5wgtr], detail
gen h5totpenwlthcat=recode(h5totpenwlth, -1, 153261.7, 260143.8, 414977.9,7000000)
tab h5totpenwlthcat, gen(h5totpenwlthq)
sum h5totpenwlthq*


*******************
*Wealth
*******************

*make wealth quartiles--redo with log wealth?
summ h5atota [aw=r5wgtr], detail
gen h5wealthcat=recode(h5atota, -150000, 46300, 148000, 362000,6000000)
tab h5wealthcat
tab h5wealthcat, gen(h5wquart)
summ h5wquart*

*make wealth quartiles--redo with log wealth?
summ h4atota [aw=r4wgtr], detail
gen h4wealthcat=recode(h4atota, -900000, 48000,  139800, 329500,30000000)
tab h4wealthcat
tab h4wealthcat, gen(h4wquart)
summ h4wquart*

gen h4wealthcat5=recode(h4atota, -900000, 26000, 85000, 181600, 391000, 30000000)
tab h4wealthcat5, gen(h4wquint)

gen h4wealthcat10=recode(h4atota, -900000, 1600, 26000, 53014, 85000, 126000, 181600, 256000, 391000, 668100, 30000000)
tab h4wealthcat10, gen(h4wdecile)

*changes in wealth
*inflate 1998 wealth to 2000$
gen h4atotax=(h4atota*(172.3/163))
gen h4ashtotax=ln( h4atotax + (sqrt((h4atotax^2) + 1)) )
gen dashtota=h5ashtota-h4ashtotax

*make a linear spline in wealth with knots at quintiles 
gen h4atota_rescale=h4atota/10000
mkspline h4wealth 3=h4atota_rescale, pctile
/*
. mkspline h4test 5=h4atota, pctile

             |     knot1      knot2      knot3      knot4 
-------------+--------------------------------------------
     h4atota |     35000      89000     173000     350000 
*/

*********************
*Asset Ownership
*********************

gen h4ownhome=(h4atoth>0)
gen h5ownhome=(h5atoth>0)
gen h4ownbus=(h4absns>0)
gen h5ownbus=(h5absns>0)
gen h4ownre=(h4arles>0)
gen h5ownre=(h5arles>0)

gen downhome=h5ownhome-h4ownhome
gen downbus=h5ownbus-h4ownbus
gen downre=h5ownre-h4ownre


********************
*Risky Assets
********************

*recode those with no financial assets as holding zero risky assets
*note no missing obs on these vars
*root riskass is value of assets, root risky is fraction risky, anyrisky is holds positive risky assets
*need to recode h4safe1 and h5safe1
replace h5risky1=0 if h5riskass1==0
replace h5risky2=0 if h5riskass2==0
replace h5risky3=0 if h5riskass3==0
replace h5risky4=0 if h5riskass4==0

replace h4risky1=0 if h4riskass1==0
replace h4risky2=0 if h4riskass2==0
replace h4risky3=0 if h4riskass3==0
replace h4risky4=0 if h4riskass4==0

*no missings to worry about
gen h4anyrisky4=h4risky4>0 
gen h4anyrisky2=h4risky2>0 
gen h4anyrisky1=h4risky1>0 

gen h5anyrisky4=h5risky4>0 
gen h5anyrisky2=h5risky2>0 
gen h5anyrisky1=h5risky1>0 

*add new risky assets definition 6--bonds classified as safe
*denominator same as in definition 1, can't compare with def 2 bc don't know if IRAs have bonds/stocks

gen h4totfass6=h4astck+h4abond+h4achck+h4acd
gen h4riskass6=h4astck
gen h4risky6=h4riskass6/h4totfass6
replace h4risky6=0 if h4riskass6==0
gen h4anyrisky6=h4risky6>0

gen h5totfass6=h5astck+h5abond+h5achck+h5acd
gen h5riskass6=h5astck
gen h5risky6=h5riskass6/h5totfass6
replace h5risky6=0 if h5riskass6==0
gen h5anyrisky6=h5risky6>0

*add new risky assets definition 5--include business, omit non-prim real estate
gen h4totfass5=h4astck+h4abond+h4achck+h4acd+h4aira+h4absns
gen h4riskass5=h4astck+h4abond+h4aira+h4absns
gen h4risky5=h4riskass5/h4totfass5 if h4totfass5>0
replace h4risky5=0 if h4riskass5==0
gen h4anyrisky5=h4risky5>0

gen h5totfass5=h5astck+h5abond+h5achck+h5acd+h5aira+h5absns
gen h5riskass5=h5astck+h5abond+h5aira+h5absns     
gen h5risky5=h5riskass5/h5totfass5 if h5totfass5>0
replace h5risky5=0 if h5riskass5==0                                              
gen h5anyrisky5=h5risky5>0                                                       

gen h5addrisky2=(h5anyrisky2==1) if h4anyrisky2==0
gen h5droprisky2=(h5anyrisky2==0) if h4anyrisky2==1

gen drisky1=h5risky1-h4risky1
gen drisky2=h5risky2-h4risky2
gen drisky3=h5risky3-h4risky3
gen drisky4=h5risky4-h4risky4
gen drisky5=h5risky5-h4risky5

*set up censoring flag for tobit models on the change in assets
*no obs have both zero, half have both one
gen bothzero if h4risky2=0 & h5risky2==0
gen bothone if h4risky2=1 & h5risky2==1
gen drisky2_cen=0 if h4risky2~=0 & h4risky2~=1
replace drisky2_cen=0 if h4risky2==0 & h5risky2==0
replace drisky2_cen=0 if h4risky2==1 & h5risky2==1
replace drisky2_cen=1 if h5risky2==1 & h4risky2~=1
replace drisky2_cen=-1 if h5risky2==0 & h4risky2~=0
replace drisky2_cen=-1 if h4risky2==1 & h5risky2~=1
replace drisky2_cen=1 if h4risky2==0 & h5risky2~=0

gen h4anyfass1=h4totfass1>0
gen h4anyfass2=h4totfass2>0
gen h4anyfass3=h4totfass3>0
gen h4anyfass4=h4totfass4>0

gen h5anyfass1=h5totfass1>0 
gen h5anyfass2=h5totfass2>0 
gen h5anyfass3=h5totfass3>0 
gen h5anyfass4=h5totfass4>0 

*ratio of business assets to wealth
gen bustowlth=h5absns/h5atotn if h5absns>0

*make asset shares for descriptives
gen h5shchck=h5achck/h5totfass2
replace h5shchck=0 if h5achck==0
gen h5shcd=h5acd/h5totfass2
replace h5shcd=0 if h5acd==0
gen h5shstck=h5astck/h5totfass2
replace h5shstck=0 if h5astck==0
gen h5shbond=h5abond/h5totfass2
replace h5shbond=0 if h5abond==0
gen h5shira=h5aira/h5totfass2
replace h5shira=0 if h5aira==0

gen h5cshchck=h5achck/h5totfass2 if h5totfass2>0
gen h5cshcd=h5acd/h5totfass2 if h5totfass2>0
gen h5cshstck=h5astck/h5totfass2 if h5totfass2>0
gen h5cshbond=h5abond/h5totfass2 if h5totfass2>0
gen h5cshira=h5aira/h5totfass2 if h5totfass2>0
*this next line is same as gen h5crisky2Q=(h5astck+ h5abond + h5aira)/h5totfass2 if h5totfass2>0
gen h5crisky2=h5risky2 if h5totfass2>0

*another defn of assets that includes house value, watch 13 hh's have negative house values
gen h5totfass7=h5totfass4+h5atoth
gen h5shhouse7=h5atoth/h5totfass7
replace h5shhouse7=0 if h5atoth==0
gen h5shbsns7=h5absns/h5totfass7
replace h5shbsns7=0 if h5absns==0
gen h5shrles7=h5arles/h5totfass7
replace h5shrles7=0 if h5arles==0

gen h5riskass7=h5astck+h5abond+h5aira+h5absns+h5arles+h5atoth     
gen h5risky7=h5riskass7/h5totfass7
replace h5risky7=0 if h5riskass7==0
gen h5anyrisky7=h5risky7>0

*change in holding any risky assets
gen danyrisky2=h5anyrisky2-h4anyrisky2


**************************
*Work
**************************

gen r4wk=r4wkft==1 | r4wkpt==1
gen r5wk=r5wkft==1 | r4wkpt==1
gen dwk=r5wk-r4wk

**********************
*Life Insurance
**********************

*64% have life insurance, a lot of don't know's on value, need to impute
recode r4anylife 5=0 8=. 9=.
recode r5anylife 5=0 8=. 9=.
replace r4lifeins=. if r4lifeins>9999996
replace r5lifeins=. if r5lifeins>9999996

gen danylife=r5anylife-r4anylife

*******************
*Health
*******************

*if health condition in 1998 for R or spouse
gen h4hibpe=(r4hibpe==1 | s4hibpe==1)
gen h4diabe= (r4diabe==1 | s4diabe==1)
gen h4cancre=(r4cancre==1 | s4cancre==1)
gen h4lunge= (r4lunge==1 | s4lunge==1)
gen h4hearte=(r4hearte==1 | s4hearte==1)
gen h4stroke=(r4stroke==1 | s4stroke==1)
gen h4psyche=(r4psyche==1 | s4psyche==1)
gen h4arthre=(r4arthre==1 | s4arthre==1)

gen h5hibpe=(r5hibpe==1 | s5hibpe==1)
gen h5diabe= (r5diabe==1 | s5diabe==1)
gen h5cancre=(r5cancre==1 | s5cancre==1)
gen h5lunge= (r5lunge==1 | s5lunge==1)
gen h5hearte=(r5hearte==1 | s5hearte==1)
gen h5stroke=(r5stroke==1 | s5stroke==1)
gen h5psyche=(r5psyche==1 | s5psyche==1)
gen h5arthre=(r5arthre==1 | s5arthre==1)

*onset of health condition between 1998 & 2000 for R or spouse
gen h5hibps=(r5hibps==1 | s5hibps==1)
gen h5diabs= (r5diabs==1 | s5diabs==1)
gen h5cancrs=(r5cancrs==1 | s5cancrs==1)
gen h5lungs= (r5lungs==1 | s5lungs==1)
gen h5hearts=(r5hearts==1 | s5hearts==1)
gen h5stroks=(r5stroks==1 | s5stroks==1)
gen h5psychs=(r5psychs==1 | s5psychs==1)
gen h5arthrs=(r5arthrs==1 | s5arthrs==1)

*major and minor shocks between 1998-2000
gen h5majors=(h5cancrs==1 | h5lungs==1 | h5hearts==1 | h5stroks==1)
gen h5minors=(h5hibps==1 | h5diabs==1 | h5psychs==1 | h5arthrs==1)
gen r5majors=(r5cancrs==1 | r5lungs==1 | r5hearts==1 | r5stroks==1)
gen r5minors=(r5hibps==1 | r5diabs==1 | r5psychs==1 | r5arthrs==1)

*major conditions
gen h5majore=(h5cancre==1 | h5lunge==1 | h5hearte==1 | h5stroke==1)
gen h4majore=(h4cancre==1 | h4lunge==1 | h4hearte==1 | h4stroke==1)
gen r5majore=(r5cancre==1 | r5lunge==1 | r5hearte==1 | r5stroke==1)

*new health conditions
gen newcond=r5conds>0 if r5conds <.
tab newcond, missing


gen r4fairpoor=(r4shlt==4 | r4shlt==5)
gen r5fairpoor=(r5shlt==4 | r5shlt==5)

**********************
*Income
**********************

*remove capital income from income measure
gen h4inctot=h4itot-h4icap
gen h5inctot=h5itot-h5icap

*combine spousal SSA retirement income
gen h4isret=r4isret+s4isret if s4isret~=.
replace h4isret=r4isret if s4isret==.

gen h5isret=r5isret+s5isret if s5isret~=.
replace h5isret=r5isret if s5isret==.

gen h4ssainc=h4isret/h4itot
gen h5ssainc=h5isret/h5itot
sum h4ssainc h5ssainc 

*BUT FOR X-SEC MODELS I WANT 1998$
*inflate 1998 income to 2000$
replace h4inctot=(h4inctot*(172.3/163))
gen h4ashinctot=ln( h4inctot + (sqrt((h4inctot^2) + 1)) )
gen h5ashinctot=ln( h5inctot + (sqrt((h5inctot^2) + 1)) )
gen dashinctot=h5ashinctot-h4ashinctot

**make a linear spline in non-capital income with knots at quintiles 
gen h4inctot_rescale=h4inctot/1000
mkspline h4income 3=h4inctot_rescale, pctile
/*
. mkspline h4test 5=h4inctot, pctile displayknots

             |     knot1      knot2      knot3      knot4 
-------------+--------------------------------------------
    h4inctot |  11352.77   16495.13   23583.33   35642.21 
*/

gen h4incomecat5=recode(h4inctot, 0, 10464.85, 16386.89, 23547.17, 35926.35, 651146)
tab h4incomecat5, gen(h4incquint)

gen h4incomecat10=recode(h4inctot, 0, 7610.797, 10464.85, 13237.19, 16386.89, 19661.23, 23547.17, 28454.23, 35926.35, 50738.65, 651146)
tab h4incomecat10, gen(h4incdecile)


**********************
*Pension Holding
**********************

gen r4anypen=(r4ipena>0 | r4isret>0)
gen r5anypen=(r5ipena>0 | r5isret>0)
gen r4privpen=(r4ipena>0)
gen r5privpen=(r5ipena>0)

**************************
*Number of Residents in HH
**************************

gen dhhres=h5hhres-h4hhres

*******************************
*Out-of-pocket Medical Expenses
*******************************

*make household oop expenses
gen h5oopmd=r5oopmd
replace h5oopmd=r5oopmd+s5oopmd if s5oopmd~=.

*ratio of oop expenses to net worth
gen h5oopratio=h5oopmd/h5atota
gen r5oopratio=r5oopmd/h5atota

*REDO WEIGHTED
summ h5oopmd, detail
gen h5oopcat=recode(h5oopmd, -10, 600, 1950, 4610, 275000)
tab h5oopcat

*************************
*Recode Risk Aversion Var
*************************
*Can't use risk aversion variables; AHEAD cohort wasn't asked, and they comprise 51% of the sample
*the RANDHRS variable r4risk6 that I added to file does not help matters because the HRS cohort was not re-asked, so have to use a wave 1 variable.
tab risk, gen(riskav)

replace risk=99 if risk==.
*there are a lot of missing values on the risk aversion variables  

********************************************************
*Make HHID for clustering on household, ID Hubs & Wives
********************************************************

gen hhid=int(hhidpn/1000)

sort hhid ragender
by hhid: gen wife=(_n==2 & ragender==2)
gsort +hhid -ragender
by hhid: gen husband=(_n==2 & ragender==1)

*********************************
*Urban/Rural Continuum, and Dummy
*********************************

*destring urban/rural continum vars
destring UrbRural0, gen(urban2000)
destring UrbRural98, gen (urban1998)
destring UrbRural96, gen (urban1996)
destring UrbRural94, gen (urban1994)

*dummy for largest MSA's only
gen mosturban=urban2000==0

****************************
*Destring MSA var, make MSA dummy
****************************

destring msa1997, replace
drop msa1998 msa1999 msa2000 msa2001 msa2002 msa2003

*MSA dummy
gen nomsa1997=(msa1997==.)
gen msa=nomsa1997==0


************************************
*Additional Financial Industry Vars
************************************

*recode emp52_1997 to take zero for missings so can 
*include in models with dummy var for missings

gen emp52_1997_rcd=emp52_1997
replace emp52_1997_rcd=0 if nomsa1997==1

*financial industry employment per capita
gen emp52percap=emp52_1997/pop1998

************************************
*Recode Self-Employment Vars
************************************

*recode missings to zero so can include in models
gen r4slfemp_rcd=(r4slfemp==1)
gen r5slfemp_rcd=(r5slfemp==1)


************************************
*SAVE DATA
************************************

save hrs_cmsec_clean.dta, replace
*save hrs_cmsec_all_clean.dta, replace
describe
