
clear all
capture log close
pause on 

import delimited "psiddata.csv"
save "psiddata", replace

gen year2007 = year==2007
gen year2009 = year==2009
gen year2011 = year==2011
gen year2013 = year==2013

*exclude those with low and high house values
centile rhousevalue if owner==1, centile(1 99)
gen rhousevalue_extreme = (rhousevalue<r(c_1)|rhousevalue>r(c_2))

centile rhhinc, centile(1 99)
gen rhhinc_extreme = (rhhinc<r(c_1)|rhhinc>r(c_2))

estimates use "estimates\first"
predict lnrcon_hat, xb

estimates use "estimates\fwl_first"
gen lnrcon_hat_scaled = lnrcon_hat/(e(r2))

gen proxiesnotmissing = lnrallfood+ lnrutilities !=.

log using "log\psidresults.txt", text replace

reg lnrcon lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates if everrenter9913==0 & everincomezero9913==0 & rhousevalue_extreme==0 & rhhinc_extreme==0 & evermoved9913==0 & inrange(year,2005,2011) & proxiesnotmissing==1, cluster(id)
estimates store lnrcon
reg lnrcon_hat lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates if everrenter9913==0 & everincomezero9913==0 & rhousevalue_extreme==0 & rhhinc_extreme==0 & evermoved9913==0 & inrange(year,2005,2011) & proxiesnotmissing==1, cluster(id)
estimates store lnrcon_hat
reg lnrcon_hat_scaled lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates if everrenter9913==0 & everincomezero9913==0 & rhousevalue_extreme==0 & rhhinc_extreme==0 & evermoved9913==0 & inrange(year,2005,2011) & proxiesnotmissing==1, cluster(id)
gen insample = e(sample)
estimates store lnrcon_hat_scaled

esttab lnrcon lnrcon_hat lnrcon_hat_scaled using "results\sample2.txt", se(%5.3f) b(%5.3f) replace
esttab lnrcon lnrcon_hat lnrcon_hat_scaled using "results\sample2.tex", se(%5.3f) b(%5.3f) replace tex

*Use FWL residualised variables to implement standard error correction
foreach var in lnrcon_hat_scaled lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue lnrallfood lnrutilities {
	qui reg `var' $npcovariates if insample==1
	qui predict fwl_`var' if insample==1, res 
}

estimates use "estimates\fwl_first.ster"
estimates store fwl_first

rrp $npcovariates lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue if insample==1, impute("lnrcon_hat_scaled2") proxies(lnrallfood lnrutilities) first(estimates\first.ster) partialrsq((e(r2))) cluster(id)

log close


************** Implement double length artificial regression *****************

*calculate overidentification statistic
matrix b = e(b)

foreach proxy of varlist lnrallfood lnrutilities {
	estimates use "results\reverse_fwl_`proxy'"
	matrix g_`proxy' = e(b)
	local reducedform_xb = 0
	foreach xvar of varlist lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue { 
		local reducedform_xb = `"`reducedform_xb' - b[1,"`xvar'"]*g_`proxy'[1,"fwl_lnrnondurable_psid"]*fwl_`xvar'"'
	}
	gen double v`proxy' = (fwl_`proxy' - `reducedform_xb') 
	local moments "`moments' v`proxy'"
}

log using "log\doublelength_artificialregression.txt", text replace
preserve
keep if insample==1
expand 2, gen(dup)
gen ndup = 1-dup
gen double v=vlnrallfood*(1-dup)+vlnrutilities*dup 
foreach xvar of varlist fwl_lnrhhinc_lag3 fwl_lnrhhinc_lag2 fwl_lnrhhinc_lag1 fwl_lnrhhinc fwl_lnrhhinc_lead1 fwl_lnrhousevalue {
	gen double `xvar'dup=`xvar'*dup
}
regress v ndup dup fwl_lnrhhinc_lag3 fwl_lnrhhinc_lag2 fwl_lnrhhinc_lag1 fwl_lnrhhinc fwl_lnrhhinc_lead1 fwl_lnrhousevalue fwl_lnrhhinc_lag3dup fwl_lnrhhinc_lag2dup fwl_lnrhhinc_lag1dup fwl_lnrhhincdup fwl_lnrhhinc_lead1dup fwl_lnrhousevaluedup, nocons robust
test ndup dup fwl_lnrhhinc_lag3 fwl_lnrhhinc_lag2 fwl_lnrhhinc_lag1 fwl_lnrhhinc fwl_lnrhhinc_lead1 fwl_lnrhousevalue fwl_lnrhhinc_lag3dup fwl_lnrhhinc_lag2dup fwl_lnrhhinc_lag1dup fwl_lnrhhincdup fwl_lnrhhinc_lead1dup fwl_lnrhousevaluedup
restore
log close

************** Impute consumption using year by year regressions from the CEX *****************

gen lnrcon_hat_ybyy = .
gen lnrcon_hat_scaled_ybyy = . 
gen partial_r2 = .

foreach Y in 2005 2007 2009 2011 2013 {
	estimates use "estimates\allfirst_`Y'"
	predict temp if year==`Y', xb
	replace lnrcon_hat_ybyy = temp if year==`Y'
	
	estimates use "estimates\all_fwl_first_`Y'"
	replace lnrcon_hat_scaled_ybyy = temp/(e(r2)) if year==`Y'
	replace partial_r2 = e(r2) if year==`Y'
	drop temp
}

*remove outliers before calculating standard deviation
centile rcon, centile(1 99)
gen tophalfpercent = (rcon<r(c_1)|rcon>r(c_2))

gen rp_corrected_forsd =  lnrcon_hat_scaled_ybyy*(partial_r2)^0.5

collapse (sd) sd_lnrcon = lnrcon sd_lnrcon_hat = lnrcon_hat_ybyy sd_lnrcon_hat_scaled = lnrcon_hat_scaled_ybyy sd_lnrcon_hat_scaled_corrected = rp_corrected_forsd if tophalfpercent==0, by(year)

#delimit ;
twoway (connected sd_lnrcon year, msymbol(square) lcolor(gs8) mlcolor(gs8) mcolor(gs8) msize(medlarge)) (connected sd_lnrcon_hat year, msymbol(X) lcolor(black) mlcolor(black) mcolor(black) msize(medlarge)) 
(connected sd_lnrcon_hat_scaled year, lcolor(black) mlcolor(black) mcolor(black) msize(medlarge)) (connected sd_lnrcon_hat_scaled_corrected year, msymbol(T) lpattern(dash) lcolor(black) mlcolor(black) mcolor(black) msize(medlarge))  if inrange(year,2005,2013), 
legend(order(1 "Actual" 2 "Imputed (RP)" 3 "Imputed (RRP)" 4 "Imputed and corrected (RRP)") ) graphregion(color(white)) ylabel(,nogrid) ytitle("SD log consumption") xtitle("Year");
graph export "graphs\sdconsumptionmeasures.pdf", replace;
#delimit cr
