/*
This program imports the individual beta_i's from matlab, computes 
	the means by demographic groups (Table 6), p-values for 
	tests of differences in means across demographic groups 
	(also part of Table 6), and 25th, 50th, and 75th 
	percentiles by demographic groups.
*/

log using ${dopath}/p6_heterogeneity.log, replace


*****************************
* 1. Load individual beta_i's back into Stata, then merge back to GSF data with demographic characteristics
*****************************
import delimited ${mydatapath}/betais_ols.csv, clear
rename v1 bi_ols
save ${mydatapath}/heteroreturns_ols.dta, replace
import delimited ${mydatapath}/betais_ife.csv, clear
rename v1 bi_ife
save ${mydatapath}/heteroreturns_ife.dta, replace
import delimited ${mydatapath}/betais_cce.csv, clear
rename v1 bi_cce
save ${mydatapath}/heteroreturns_cce.dta, replace
import delimited ${mydatapath}/betais_cce2.csv, clear
rename v1 bi_cce2
save ${mydatapath}/heteroreturns_cce2.dta, replace
import excel ${mydatapath}/GSFpersonid_insampleBOTH2_1ch.xls, sheet("Sheet1") clear
rename A personid
save ${mydatapath}/heteroreturns_personid.dta, replace

merge 1:1 _n using ${mydatapath}/heteroreturns_ols.dta, gen(_mergeBetaiOLS)
merge 1:1 _n using ${mydatapath}/heteroreturns_ife.dta, gen(_mergeBetaiIFE)
merge 1:1 _n using ${mydatapath}/heteroreturns_cce.dta, gen(_mergeBetaiCCE)
merge 1:1 _n using ${mydatapath}/heteroreturns_cce2.dta, gen(_mergeBetaiCCE2)
save ${mydatapath}/heteroreturns.dta, replace


use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear
keep if insampleBOTH2_1ch==1
merge m:1 personid using ${mydatapath}/heteroreturns.dta, gen(_mergeFMreturns)
save ${mydatapath}/heteroreturns.dta, replace

keep if year==1978
save ${mydatapath}/heteroreturns1978.dta, replace



*****************************
* 2. Summarize individual beta_i's for Table 6 and 7
*****************************
*loop through groups and estimators; store mean, variance, p25, p50, and p75; place into matrix to eventually be output into spreadsheet
gen white=(race==1)
gen nonhispanic=(hispanic==0)
gen notforeign=(foreign_born==0)
gen bornpre50=(birthyear<1950)
gen born5054=(birthyear>=1950 & birthyear<=1954)
gen born5559=(birthyear>=1955 & birthyear<=1959)
gen bornpost59=(birthyear>1959)
gen HS=(educ_5cat==2)
gen SomeColl=(educ_5cat==3)
gen Bach=(educ_5cat==4)
gen Grad=(educ_5cat==5)

local byvars white black otherrace nonhispanic hispanic notforeign foreign_born bornpre50 born5054 born5559 bornpost59 HS SomeColl Bach Grad
local estimators ols ife cce cce2

foreach byvar of local byvars {
foreach est of local estimators {

	sum bi_`est' if `byvar'==1, detail
	local  mean_`est'_`byvar'=r(mean)
	local p25_`est'_`byvar'=r(p25)
	local p50_`est'_`byvar'=r(p50)
	local p75_`est'_`byvar'=r(p75)
	local N_`est'_`byvar'=r(N)
	local var_`est'_`byvar'=r(Var)

}
	matrix meanvar_`byvar'=(`mean_ols_`byvar'',`var_ols_`byvar'', ///
		`mean_ife_`byvar'',`var_ife_`byvar'', ///
		`mean_cce_`byvar'',`var_cce_`byvar'', ///
		`mean_cce2_`byvar'',`var_cce2_`byvar'',`N_cce2_`byvar'')
	matrix rownames meanvar_`byvar'=`byvar'
	matrix colnames meanvar_`byvar'=ols_mean ols_var ife_mean ife_var cce_mean cce_var cce2_mean cce2_var N
	
	matrix ptiles_`byvar'=(`p25_ols_`byvar'',`p50_ols_`byvar'',`p75_ols_`byvar'', ///
		`p25_ife_`byvar'',`p50_ife_`byvar'',`p75_ife_`byvar'', ///
		`p25_cce_`byvar'',`p50_cce_`byvar'',`p75_cce_`byvar'', ///
		`p25_cce2_`byvar'',`p50_cce2_`byvar'',`p75_cce2_`byvar'')
	matrix rownames ptiles_`byvar'=`byvar'
	matrix colnames ptiles_`byvar'=ols_p25 ols_p50 ols_p75 ife_p25 ife_p50 ife_p75 cce_p25 cce_p50 cce_p75 cce2_p25 cce2_p50 cce2_p75
}


* loop through test pairs and estimators, store ttest of means p-value, place into matrix to eventually be output into spreadsheet
gen whiteblack=.
replace whiteblack=1 if white==1
replace whiteblack=0 if black==1
gen whiteother=.
replace whiteother=1 if white==1
replace whiteother=0 if otherrace==1
gen nonhishis=(hispanic==0)
gen notfbfb=(foreign_born==0)
gen pre505054=.
replace pre505054=1 if birthyear<1950
replace pre505054=0 if birthyear>=1950 & birthyear<=1954
gen b50545559=.
replace b50545559=1 if birthyear>=1950 & birthyear<=1954
replace b50545559=0 if birthyear>=1955 & birthyear<=1959
gen b5559post59=.
replace b5559post59=1 if birthyear>=1955 & birthyear<=1959
replace b5559post59=0 if birthyear>1959
gen HSSomeColl=.
replace HSSomeColl=1 if HS==1
replace HSSomeColl=0 if SomeColl==1
gen SomeCollBach=.
replace SomeCollBach=1 if SomeColl==1
replace SomeCollBach=0 if Bach==1
gen BachGrad=.
replace BachGrad=1 if Bach==1
replace BachGrad=0 if Grad==1

local testpairs whiteblack whiteother nonhishis notfbfb pre505054 b50545559 b5559post59 HSSomeColl SomeCollBach BachGrad

foreach tpair of local testpairs {
foreach est of local estimators {
	
	ttest bi_`est', by(`tpair')
	local p_`est'_`tpair'=r(p)	
}
	matrix ttest_`tpair'=(`p_ols_`tpair'',`p_ife_`tpair'',`p_cce_`tpair'',`p_cce2_`tpair'')
	matrix rownames ttest_`tpair'=`tpair'
	matrix colnames ttest_`tpair'=ols ife cce cce2
}


* calculate percentiles for full sample, to go with percentiles by group
sum bi_ols, detail
local p25_ols=r(p25)
local p50_ols=r(p50)
local p75_ols=r(p75)
sum bi_ife, detail
local p25_ife=r(p25)
local p50_ife=r(p50)
local p75_ife=r(p75)
sum bi_cce, detail
local p25_cce=r(p25)
local p50_cce=r(p50)
local p75_cce=r(p75)
sum bi_cce2, detail
local p25_cce2=r(p25)
local p50_cce2=r(p50)
local p75_cce2=r(p75)
matrix ptiles=(`p25_ols',`p50_ols',`p75_ols', ///
	`p25_ife',`p50_ife',`p75_ife', ///
	`p25_cce',`p50_cce',`p75_cce', ///
	`p25_cce2',`p50_cce2',`p75_cce2')
matrix rownames ptiles=overall
matrix colnames ptiles=ols_p25 ols_p50 ols_p75 ife_p25 ife_p50 ife_p75 cce_p25 cce_p50 cce_p75 cce2_p25 cce2_p50 cce2_p75


* place matrices of results into larger matrices and export to spreadsheet
* Table 6
matrix Table6=(meanvar_white \ meanvar_black \ meanvar_otherrace \ ///
		meanvar_nonhispanic \ meanvar_hispanic \ ///
		meanvar_notforeign \ meanvar_foreign_born \ ///
		meanvar_bornpre50 \ meanvar_born5054 \ meanvar_born5559 \ meanvar_bornpost59 \ ///
		meanvar_HS \ meanvar_SomeColl \ meanvar_Bach \ meanvar_Grad)
matrix list Table6
putexcel set ${outputpath}/Table6.xls, replace
putexcel A3=matrix(Table6), names

* Corresponding p-values for Table 6
matrix Table6_pvalues=(ttest_whiteblack \ ttest_whiteother \ ///
		ttest_nonhishis \ ///
		ttest_notfbfb \ ///
		ttest_pre505054 \ ttest_b50545559 \ ttest_b5559post59 \ ///
		ttest_HSSomeColl \ ttest_SomeCollBach \ ttest_BachGrad)
matrix list Table6_pvalues
putexcel set ${outputpath}/Table6_pvalues.xls, replace
putexcel A3=matrix(Table6_pvalues), names

* Table 7
matrix Table7=(ptiles \ ///
		ptiles_white \ ptiles_black \ ptiles_otherrace \ ///
		ptiles_nonhispanic \ ptiles_hispanic \ ///
		ptiles_notforeign \ ptiles_foreign_born \ ///
		ptiles_bornpre50 \ ptiles_born5054 \ ptiles_born5559 \ ptiles_bornpost59 \ ///
		ptiles_HS \ ptiles_SomeColl \ ptiles_Bach \ ptiles_Grad)
matrix list Table7
putexcel set ${outputpath}/Table7.xls, replace
putexcel A3=matrix(Table7), names



log close
