clear
set more off
cap log close
set logtype text

cd "H:\Schweden\Final\Econometric\Referee_Reports_22112016\Reply\finaltorevision\finaltopublish\JAE_Data_Archive\JAE_Data_Archive_Raphael_20102017\US_census_data"

*log using "AngristEvans1980", replace

use AngristEvans1980.dta

// construct the sexes of 1st two kids;
destring sex2nd, replace
destring sex3rd, replace
destring ageq2nd, replace
destring ageq3rd, replace
destring agem, replace
destring aged, replace
destring qtrbthd, replace
destring asex2nd, replace
destring aage2nd, replace

// X
gen boy1st=(sexk==0)
gen boy2nd=(sex2nd==0)
gen boys2=((sexk==0) & (sex2nd==0))
gen girls2=((sexk==1) & (sex2nd==1))
gen samesex=((boys2==1) | (girls2==1))
gen morekids=(kidcount>2)
tab samesex if kidcount>1
gen twin2nd=(ageq2nd==ageq3rd)
gen samesextwin2nd=(sex2nd==sex3rd & ageq2nd==ageq3rd)

gen blackm=(racem==2)
gen hispm=(racem==12)
gen whitem=(racem==1)
gen othracem=1-blackm-hispm-whitem

gen yobd=80-aged if qtrbthd==0
replace yobd=79-aged if qtrbthd!=0

gen ageqm=4*(80-yobm)-qtrbthm-1
gen agefstm=int((ageqm-ageqk)/4)
gen ageqd=4*(80-yobd)-qtrbthd
gen agefstd=int((ageqd-ageqk)/4)


gen agefstm2=agefstm^2/100
replace agefstm=agefstm/100


// Y
gen workedm=(weeksm>0)
gen incomem=income1m+max(0,income2m)
replace incomem=incomem*2.099173554
gen lincomem=log(incomem)

// sample restrictions:
drop if agem==.
drop if agem<21
drop if agem>35
keep if kidcount>=2
drop if ageq2nd<=4
drop if agefstm<15/100
drop if aage==1
drop if aqtrbrth==1
drop if aage2nd==1
drop if asex==1
drop if asex2nd==1

assert twin2nd!=.
assert morekids!=.
assert worked!=.

// Define instruments:
rename twin2nd Zddot
rename samesextwin2nd Zdot
gen eddot=Zddot-Zdot
gen Zbar0=Zdot-eddot

global xs agem agefstm sexk sex2nd blackm hispm othracem
keep incomem workedm lincomem morekids Zddot Zdot Zbar0 samesex eddot $xs

compress
save ae98.dta, replace

// Find theta that minimizes selection on observables (overall F-statistic)

* create file that contains results
preserve
drop _all
set obs 210
gen theta=.
gen F_full=.
gen F_lincomem=.
save "res_selection_on_observables.dta", replace
restore

sum Zdot
local piZdot=r(mean)
sum eddot
local pieddot=r(mean)
local line=0
qui forvalues theta=-1(0.01)1.01 { // grid search	
	preserve
	local lambda=1+`theta'*((`piZdot'-`pieddot')/`pieddot')
	gen Zwdot=Zdot-`lambda'*eddot
	
	reg Zwdot $xs, robust // in full sample
	local F_full = e(F)
	
	reg Zwdot $xs if lincomem!=., robust // log(wage) sample
	local F_lincomem = e(F)
	
	use "res_selection_on_observables.dta", clear
	local line=`line'+1
	replace theta=`theta' in `line'
	replace F_full=`F_full' in `line'
	replace F_lincomem=`F_lincomem' in `line'
	
	save "res_selection_on_observables.dta", replace
	noi dis `theta'
	restore
	
}

preserve
use "res_selection_on_observables.dta", clear
sum F_full, d
sum theta if F_full==r(min)
local theta_min_full=r(mean) // final theta for full sample

sum F_lincomem, d
sum theta if F_lincomem==r(min)
local theta_min_lincomem=r(mean) // final theta for log(wage) sample

restore 

// construct Z_bar_min Variables
sum Zdot
local piZdot=r(mean)
sum eddot
local pieddot=r(mean)
local theta=`theta_min_full'
local lambda=1+`theta'*((`piZdot'-`pieddot')/`pieddot')
gen Zbarmin_full=Zdot-`lambda'*eddot

sum Zdot
local piZdot=r(mean)
sum eddot
local pieddot=r(mean)
local theta=`theta_min_lincomem'
local lambda=1+`theta'*((`piZdot'-`pieddot')/`pieddot')
gen Zbarmin_lincomem=Zdot-`lambda'*eddot

*** Table 2
** Panel 1: Yearly labor income (N=394,840)

* row 1: coefficients
regress incomem morekids $xs, robust 
ivregress 2sls incomem (morekids=Zddot) $xs, robust 
ivregress 2sls incomem (morekids=Zdot) $xs, robust 
ivregress 2sls incomem (morekids=Zbar0) $xs, robust
ivregress 2sls incomem (morekids=Zbarmin_full) $xs, robust  
ivregress 2sls incomem (morekids=samesex) $xs, robust

* row 2: First stage F
reg morekids Zddot $xs, robust
test Zddot 
reg morekids Zdot $xs, robust
test Zdot
reg morekids Zbar0 $xs, robust
test Zbar0
reg morekids Zbarmin_full $xs, robust
test Zbarmin_full 
reg morekids samesex $xs, robust
test samesex 

* row 3: selection on observables (Overall F-stat.)
qui reg Zddot $xs, robust 
dis e(F) 
reg Zdot $xs, robust
dis e(F)
reg Zbar0 $xs, robust
dis e(F)
reg Zbarmin_full $xs, robust
dis e(F)
reg samesex agem agefstm blackm hispm othracem, robust
dis e(F)

** Panel 2: Worked for pay in last year (N=394,840)

* row 1: coefficients
regress workedm morekids $xs, robust 
ivregress 2sls workedm (morekids=Zddot) $xs, robust 
ivregress 2sls workedm (morekids=Zdot) $xs, robust 
ivregress 2sls workedm (morekids=Zbar0) $xs, robust
ivregress 2sls workedm (morekids=Zbarmin_full) $xs, robust  
ivregress 2sls workedm (morekids=samesex) $xs, robust

* row 2: First stage F (same as for income)
reg morekids Zddot $xs, robust
test Zddot 
reg morekids Zdot $xs, robust
test Zdot
reg morekids Zbar0 $xs, robust
test Zbar0
reg morekids Zbarmin_full $xs, robust
test Zbarmin_full 
reg morekids samesex $xs, robust
test samesex 

* row 3: selection on observables (Overall F-stat.) (same as for income)
qui reg Zddot $xs, robust 
dis e(F) 
reg Zdot $xs, robust
dis e(F)
reg Zbar0 $xs, robust
dis e(F)
reg Zbarmin_full $xs, robust
dis e(F)
reg samesex agem agefstm blackm hispm othracem, robust
dis e(F)

** Panel 3: Log(Yearly labor income) (N=220,502)
* row 1: coefficients
regress lincomem morekids $xs, robust 
ivregress 2sls lincomem (morekids=Zddot) $xs, robust 
ivregress 2sls lincomem (morekids=Zdot) $xs, robust 
ivregress 2sls lincomem (morekids=Zbar0) $xs, robust
ivregress 2sls lincomem (morekids=Zbarmin_lincomem) $xs, robust  
ivregress 2sls lincomem (morekids=samesex) $xs, robust

* row 2: First stage F
reg morekids Zddot $xs if lincomem!=., robust
test Zddot 
reg morekids Zdot $xs if lincomem!=., robust
test Zdot
reg morekids Zbar0 $xs if lincomem!=., robust
test Zbar0
reg morekids Zbarmin_lincomem $xs if lincomem!=., robust
test Zbarmin_lincomem 
reg morekids samesex $xs if lincomem!=., robust
test samesex 

* row 3: selection on observables (Overall F-stat.)
qui reg Zddot $xs if lincomem!=., robust 
dis e(F) 
reg Zdot $xs if lincomem!=., robust
dis e(F)
reg Zbar0 $xs if lincomem!=., robust
dis e(F)
reg Zbarmin_lincomem $xs if lincomem!=., robust
dis e(F)
reg samesex agem agefstm blackm hispm othracem if lincomem!=., robust
dis e(F)


*** Bootstrap standard errors for Zbar_min (time intensive)

global R 10

preserve
drop _all
set obs $R
gen theta_min_lincomem=.
gen theta_min_full=.
gen beta_zbarmin_incomem=.
gen beta_zbarmin_workedm=.
gen beta_zbarmin_lincomem=.
save ae98_bs.dta, replace
restore

qui forvalues r=1/$R {
noi dis "bootstrap replicate `r' "

use ae98.dta, clear

bsample
******************
* find theta that minimizes overall F-statistic (like above)
preserve
drop _all
set obs 210
gen theta=.
gen F_full=.
gen F_lincomem=.
save "res_selection_on_observables_bs.dta", replace
restore

sum Zdot
local piZdot=r(mean)
sum eddot
local pieddot=r(mean)
local line=0
qui forvalues theta=-1(0.01)1.01 { // grid search	
    dis "grid search theta `theta'"
	preserve
	local lambda=1+`theta'*((`piZdot'-`pieddot')/`pieddot')
	gen Zwdot=Zdot-`lambda'*eddot
	
	reg Zwdot $xs, robust
	local F_full = e(F)
	
	reg Zwdot $xs if lincomem!=., robust
	local F_lincomem = e(F)
	
	use "res_selection_on_observables_bs.dta", clear
	local line=`line'+1
	replace theta=`theta' in `line'
	replace F_full=`F_full' in `line'
	replace F_lincomem=`F_lincomem' in `line'
	
	save "res_selection_on_observables_bs.dta", replace
	
	restore
	
}

preserve
use "res_selection_on_observables_bs.dta", clear
sum F_full, d
sum theta if F_full==r(min)
local theta_min_full_bs=r(mean)

sum F_lincomem, d
sum theta if F_lincomem==r(min)
local theta_min_lincomem_bs=r(mean)

restore 

sum Zdot
local piZdot=r(mean)
sum eddot
local pieddot=r(mean)
local theta=`theta_min_full_bs'
local lambda=1+`theta'*((`piZdot'-`pieddot')/`pieddot')
gen Zbarmin_full_bs=Zdot-`lambda'*eddot

sum Zdot
local piZdot=r(mean)
sum eddot
local pieddot=r(mean)
local theta=`theta_min_lincomem_bs'
local lambda=1+`theta'*((`piZdot'-`pieddot')/`pieddot')
gen Zbarmin_lincomem_bs=Zdot-`lambda'*eddot


ivregress 2sls incomem (morekids=Zbarmin_full_bs) $xs, robust  
local iv_beta_incomem=_b[morekids]
ivregress 2sls workedm (morekids=Zbarmin_full_bs) $xs, robust  
local iv_beta_workedm=_b[morekids]
ivregress 2sls lincomem (morekids=Zbarmin_lincomem_bs) $xs, robust  
local iv_beta_lincomem=_b[morekids]
*****************
use ae98_bs.dta, clear

replace theta_min_lincomem=`theta_min_lincomem_bs' in `r'
replace theta_min_full=`theta_min_full_bs' in `r'

replace beta_zbarmin_incomem=`iv_beta_incomem' in `r'
replace beta_zbarmin_workedm=`iv_beta_workedm' in `r'
replace beta_zbarmin_lincomem=`iv_beta_lincomem' in `r'

save ae98_bs.dta, replace

}

* standard deviations of beta vars are bootstrapped standard erros. Exclude outliers
use ae98_bs.dta, clear

sum beta_zbarmin_incomem if beta_zbarmin_incomem<0 & beta_zbarmin_incomem>-12000
sum beta_zbarmin_workedm if abs(beta_zbarmin_workedm)<1 
sum beta_zbarmin_lincomem if abs(beta_zbarmin_lincomem)<2, d  


*** Appendix B: Statistical Relevance - US data
* bootstrap IV estimates from Zddot and Zbar0
use ae98.dta, clear

global B 1000

preserve
clear all
set obs $B

gen beta_zddot=.
gen beta_zdot=.
gen beta_zbar0=.

save appendix_B_bootstrap.dta, replace
restore

keep incomem morekids Zddot Zdot Zbar0 $xs 

qui forvalues b=1/$B {
noi dis "bootstrap replicate `b' "
preserve
keep incomem morekids Zddot Zdot Zbar0 $xs
bsample

foreach iv of varlist Zddot Zdot Zbar0   {
ivregress 2sls incomem (morekids=`iv') $xs, robust 
local beta_`iv'=_b[morekids]
}

use appendix_B_bootstrap, clear
replace beta_zddot=`beta_Zddot' in `b'
replace beta_zdot=`beta_Zdot' in `b'
replace beta_zbar0=`beta_Zbar0' in `b'
save appendix_B_bootstrap, replace

restore
}

* count how often coefficient from Zbar0 is absolutely larger than
* coefficient when using Zddot
preserve
use appendix_B_bootstrap, clear

gen diff=beta_zbar0-beta_zddot
sum beta_zddot beta_zbar0 if diff<0
sum beta_zddot beta_zdot beta_zbar0

count if abs(beta_zbar0)>abs(beta_zddot)

corr beta_zddot beta_zbar0, cov
local Cov=r(cov_12)

* Figure B.1, left graph
twoway kdensity beta_zddot, lp(solid) lc(black) || kdensity beta_zbar0, lp(dash) lc(black)

restore

* original beta and se:
qui ivregress 2sls incomem (morekids=Zddot) $xs, robust 
local Var_zddot=_se[morekids]^2
local bhat_zddot=_b[morekids]

qui ivregress 2sls incomem (morekids=Zbar0) $xs, robust 
local Var_zbar0=_se[morekids]^2
local bhat_zbar=_b[morekids]

local numerator=`bhat_zddot'-`bhat_zbar'
local denominator2=`Var_zddot'+`Var_zbar0'-2*`Cov'

local ttest=`numerator'/sqrt(`denominator2')

* t-statistic
dis `ttest'

* p-value
dis 2*(1-normal(`ttest'))


**** Sensitivity Analysis Figure C.1
use ae98.dta, clear

preserve
drop _all
set obs 210
gen gamma=.
gen biv_workedm=.
gen biv_lincomem=.
gen seiv_workedm=.
gen seiv_lincomem=.
save "res_A3gen.dta", replace
restore

qui{
local line=0
forvalues gamma=0.80(0.01)1.2 {    
    preserve
    gen Zbar0_A3gen=Zdot-`gamma'*eddot
    ivregress 2sls workedm (morekids=Zbar0_A3gen) $xs, robust 
    local bworkedm=_b[morekids]
    local seworkedm=_se[morekids]
    ivregress 2sls lincomem (morekids=Zbar0_A3gen) $xs, robust
    local blincomem=_b[morekids]
    local selincomem=_se[morekids]
    use "res_A3gen.dta", clear
    local line=`line'+1
    replace gamma=`gamma' in `line'
    replace biv_workedm=`bworkedm' in `line'
    replace biv_lincomem=`blincomem' in `line'
    replace seiv_workedm=`seworkedm' in `line'
    replace seiv_lincomem=`selincomem' in `line'
    save "res_A3gen.dta", replace
    restore
}
}

* Figure C.1
preserve
use "res_A3gen.dta", clear
gen lower_workedm=biv_workedm-invnormal(0.95)*seiv_workedm
gen upper_workedm=biv_workedm+invnormal(0.95)*seiv_workedm
gen lower_lincomem=biv_lincomem-invnormal(0.95)*seiv_lincomem
gen upper_lincomem=biv_lincomem+invnormal(0.95)*seiv_lincomem
twoway (rarea lower_lincomem upper_lincomem gamma if gamma>0.8 & gamma<1.2, bcolor(gs14)) (line biv_lincomem gamma if gamma>0.8 & gamma<1.2), scheme(s2mono) saving(biv_lincomem_A3gen.gph, replace)
twoway (rarea lower_workedm upper_workedm gamma if gamma>0.8 & gamma<1.2, bcolor(gs14)) (line biv_workedm gamma if gamma>0.8 & gamma<1.2), scheme(s2mono) saving(biv_workedm_A3gen.gph, replace)
graph combine biv_lincomem_A3gen.gph biv_workedm_A3gen.gph, row(1)
restore 


