

*	Daniel Kamhfer, Hendrik Schmitz
*	Reanalyzing Zero Returns to Education in Germany
*	February, 2015


**************
*** Output ***
**************

*Path
cd "$edit"


************************************************************************************
	*** Table 1 ***

*Controls
global controls "female state_school_d* year_birth_d*"

*Wage data set
use 02_clear_wage.dta

*First stage results
	reg edu_years compulsory $controls, vce(cluster group)
	reg edu_years num_inter_schools_area $controls, vce(cluster group)
	reg edu_years num_acad_schools_area $controls, vce(cluster group)

*Second stage results
	reg log_wage edu_years $controls, vce(cluster group)
	ivregress 2sls log_wage (edu_years = compulsory) $controls, vce(cluster group)
	ivregress 2sls log_wage (edu_years = num_inter_schools_area) $controls, vce(cluster group)
	ivregress 2sls log_wage (edu_years = num_acad_schools_area) $controls, vce(cluster group)

*First-stage F-statistic
	qui reg edu_years compulsory $controls, vce(cluster group)
	test compulsory
	qui reg edu_years num_inter_schools_area $controls, vce(cluster group)
	test num_inter_schools_area
	qui reg edu_years num_acad_schools_area $controls, vce(cluster group)
	test num_acad_schools_area

		
************************************************************************************
	*** Table 2 ***

*Cognitive skills data set
use 03_clear_skills.dta, clear

*First stage results
	reg edu_years compulsory $controls, vce(cluster group)
	reg edu_years num_inter_schools_area $controls, vce(cluster group)
	reg edu_years num_acad_schools_area $controls, vce(cluster group)

*Second stage results
	reg skills_score_ln edu_years $controls, vce(cluster group)
	ivregress 2sls skills_score_ln (edu_years = compulsory) $controls, vce(cluster group)
	ivregress 2sls skills_score_ln (edu_years = num_inter_schools_area) $controls, vce(cluster group)
	ivregress 2sls skills_score_ln (edu_years = num_acad_schools_area) $controls, vce(cluster group)


************************************************************************************
	*** Table A1 ***

*Wage data set
use 02_clear_wage.dta, clear

foreach x in wage income edu_years uni vocation female age edu_mother edu_father siblings srh_good obesity migration job_skill{
reg `x' haupt mittel gym, noconstant
mean `x'
}
count if haupt == 1
count if mittel == 1
count if gym == 1
count

*Cognitive skills data set
use 03_clear_skills.dta, clear

reg skills_score haupt mittel gym, noconstant
mean skills_score


************************************************************************************
	*** Table A2 ***

*Wage data set
use 02_clear_wage.dta, clear

*First stage results
*Log net hourly wage
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)
*Only school years
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg schoolyears compulsory $controls, vce(cluster group)
		reg schoolyears num_inter_schools_area $controls, vce(cluster group)
		reg schoolyears num_acad_schools_area $controls, vce(cluster group)
*Socio-economic controls
	*Controls
	global controls "female state_school_d* year_birth_d* migration edu_mother edu_father siblings uni vocation job_skill srh_good obesity"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)
*Institutional controls
	*Controls
	global controls "female state_school_d* year_birth_d* stud_per_basic_school stud_per_inter_school stud_per_acad_school"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)
*Female specification
	*Controls
	foreach x of varlist age state_school_d* year_birth_d*{
	gen femaleX`x' = female * `x'
	}
	global controls "state_school_d* year_birth_d* femaleX*"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)

*Second stage results
*Log net hourly wage
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg log_wage_net edu_years $controls, vce(cluster group)
		ivregress 2sls log_wage_net (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls log_wage_net (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls log_wage_net (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Only school years
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg log_wage schoolyears $controls, vce(cluster group)
		ivregress 2sls log_wage (schoolyears = compulsory) $controls, vce(cluster group)
		ivregress 2sls log_wage (schoolyears = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls log_wage (schoolyears = num_acad_schools_area) $controls, vce(cluster group)
*Socio-economic controls
	*Controls
	global controls "female state_school_d* year_birth_d* migration edu_mother edu_father siblings uni vocation job_skill srh_good obesity"
		*Regressions
		reg log_wage edu_years $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Institutional controls
	*Controls
	global controls "female state_school_d* year_birth_d* stud_per_basic_school stud_per_inter_school stud_per_acad_school"
		*Regressions
		reg log_wage edu_years $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Female specification
	*Controls
	global controls "state_school_d* year_birth_d* femaleX*"
		*Regressions
		reg log_wage edu_years $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls log_wage (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Reduced form
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg log_wage compulsory $controls, vce(cluster group)
		reg log_wage num_inter_schools_area $controls, vce(cluster group)
		reg log_wage num_acad_schools_area $controls, vce(cluster group)


************************************************************************************
	*** Table A3 ***

*Wage data set
use 03_clear_skills.dta, clear

*First stage results
*Only school years
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg schoolyears compulsory $controls, vce(cluster group)
		reg schoolyears num_inter_schools_area $controls, vce(cluster group)
		reg schoolyears num_acad_schools_area $controls, vce(cluster group)
*Socio-economic controls
	*Controls
	global controls "female state_school_d* year_birth_d* migration edu_mother edu_father siblings uni vocation job_skill srh_good obesity"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)
*Institutional controls
	*Controls
	global controls "female state_school_d* year_birth_d* stud_per_basic_school stud_per_inter_school stud_per_acad_school"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)
*Female specification
	*Controls
	foreach x of varlist age state_school_d* year_birth_d*{
	gen femaleX`x' = female * `x'
	}
	global controls "state_school_d* year_birth_d* femaleX*"
		*Regressions
		reg edu_years compulsory $controls, vce(cluster group)
		reg edu_years num_inter_schools_area $controls, vce(cluster group)
		reg edu_years num_acad_schools_area $controls, vce(cluster group)

*Second stage results
*Only school years
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg skills_score_ln schoolyears $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (schoolyears = compulsory) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (schoolyears = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (schoolyears = num_acad_schools_area) $controls, vce(cluster group)
*Socio-economic controls
	*Controls
	global controls "female state_school_d* year_birth_d* migration edu_mother edu_father siblings uni vocation job_skill srh_good obesity"
		*Regressions
		reg skills_score_ln edu_years $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Institutional controls
	*Controls
	global controls "female state_school_d* year_birth_d* stud_per_basic_school stud_per_inter_school stud_per_acad_school"
		*Regressions
		reg skills_score_ln edu_years $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Female specification
	*Controls
	global controls "state_school_d* year_birth_d* femaleX*"
		*Regressions
		reg skills_score_ln edu_years $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = compulsory) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = num_inter_schools_area) $controls, vce(cluster group)
		ivregress 2sls skills_score_ln (edu_years = num_acad_schools_area) $controls, vce(cluster group)
*Reduced form
	*Controls
	global controls "female state_school_d* year_birth_d*"
		*Regressions
		reg skills_score_ln compulsory $controls, vce(cluster group)
		reg skills_score_ln num_inter_schools_area $controls, vce(cluster group)
		reg skills_score_ln num_acad_schools_area $controls, vce(cluster group)

		
************************************************************************************
	*** Figure A1 ***

*** INTERMEDIATE SCHOOLS ***
	
cd "$edit"
use schooling_instruments.dta, replace

*Year-state dummies
foreach x in BW BY HE NI NW RP SH{
	forvalues y = 1950(1)1980{
		gen `x'_`y' = edu_ID == "`x'_`y'"
	}
}

*Each line needs to contain the number of studnets per year and state
foreach x in BW BY HE NI NW RP SH{
	forvalues y = 1950(1)1980{
		gen `x'_`y'_stud = students_total if edu_ID == "`x'_`y'"
		egen `x'_`y'_stud_max = max(`x'_`y'_stud)
		replace `x'_`y'_stud = `x'_`y'_stud_max if missing(`x'_`y'_stud) & !missing(`x'_`y'_stud_max)
	}
}

preserve

*Regression
reg num_inter_schools_area BW_1951 - SH_1980 students_total

*We only need one observation
keep if _n == 1

*Predicted values
*In general
foreach x in BW BY HE NI NW RP SH{
	forvalues y = 1951(1)1980{
		gen num_inter_schools_area_`x'_`y' = _b[_cons] + _b[`x'_`y'] + _b[students_total] * `x'_`y'_stud
	}
}

*Year 1951 but not for state BW
foreach x in BY HE NI NW RP SH{
	gen num_inter_schools_area_`x'_1950 = _b[_cons] + _b[`x'_1950] + _b[students_total] * `x'_1950_stud
}

*Base category BW 1950
gen num_inter_schools_area_BW_1950 = _b[_cons] + _b[students_total] * BW_1950_stud

*Reshape data
keep num_inter_schools_area_*
gen id = 1
reshape long num_inter_schools_area_, i(id) j(edu_ID) string
drop id
gen state = substr(edu_ID,1,2)
gen year = substr(edu_ID,4,7)
destring year, replace
drop edu_ID

*Grpah intermediate schools
#delimit ;
tw
(line num_inter_schools_area year if state == "BW", lcolor(cranberry) lpattern(solid) lwidth(thick))
(line num_inter_schools_area year if state == "BY", lcolor(midblue) lpattern(dash) lwidth(thick))
(line num_inter_schools_area year if state == "HE", lcolor(black) lpattern(dot) lwidth(thick))
(line num_inter_schools_area year if state == "NI", lcolor(pink) lpattern(dash_dot) lwidth(thick))
(line num_inter_schools_area year if state == "NW", lcolor(midgreen) lpattern(shortdash) lwidth(thick))
(line num_inter_schools_area year if state == "RP", lcolor(purple) lpattern(shortdash_dot) lwidth(thick))
(line num_inter_schools_area year if state == "SH", lcolor(gold) lpattern(longdash) lwidth(thick))
, legend(cols(4) subtitle("State of residence") label(1 "Baden-Wrttemberg") label(2 "Bavaria") label(3 "Hesse") label(4 "Lower Saxony") label(5 "North Rhine-Westphalia") label(6 "Rhineland-Palatinate") label(7 "Schleswig-Holstein")) ytitle("Number per 1,000 square km", size(large)) xtitle("", size(large)) title("{bf:(a)} Intermediate schools") xlabel(1950 1960 1970 1980, labsize(large)) ylabel(0 4 8 12 16 20, labsize(large)) name(part_a, replace) scheme(s1mono);
#delimit cr


*** ACADEMIC SCHOOLS ***

restore
preserve

*Regression
reg num_acad_schools_area BW_1951 - SH_1980 students_total

*We only need one observation
keep if _n == 1

*Predicted values
*In general
foreach x in BW BY HE NI NW RP SH{
	forvalues y = 1951(1)1980{
		gen num_acad_schools_area_`x'_`y' = _b[_cons] + _b[`x'_`y'] + _b[students_total] * `x'_`y'_stud
	}
}

*Year 1951 but not for state BW
foreach x in BY HE NI NW RP SH{
	gen num_acad_schools_area_`x'_1950 = _b[_cons] + _b[`x'_1950] + _b[students_total] * `x'_1950_stud
}

*Base category BW 1950
gen num_acad_schools_area_BW_1950 = _b[_cons] + _b[students_total] * BW_1950_stud

*Reshape data
keep num_acad_schools_area_*
gen id = 1
reshape long num_acad_schools_area_, i(id) j(edu_ID) string
drop id
gen state = substr(edu_ID,1,2)
gen year = substr(edu_ID,4,7)
destring year, replace
drop edu_ID

*Grpah academic schools
#delimit ;
tw
(line num_acad_schools_area year if state == "BW", lcolor(cranberry) lpattern(solid) lwidth(thick))
(line num_acad_schools_area year if state == "BY", lcolor(midblue) lpattern(dash) lwidth(thick))
(line num_acad_schools_area year if state == "HE", lcolor(black) lpattern(dot) lwidth(thick))
(line num_acad_schools_area year if state == "NI", lcolor(pink) lpattern(dash_dot) lwidth(thick))
(line num_acad_schools_area year if state == "NW", lcolor(midgreen) lpattern(shortdash) lwidth(thick))
(line num_acad_schools_area year if state == "RP", lcolor(purple) lpattern(shortdash_dot) lwidth(thick))
(line num_acad_schools_area year if state == "SH", lcolor(gold) lpattern(longdash) lwidth(thick))
, legend(subtitle("State of residence") label(1 "Baden-Wrttemberg") label(2 "Bavaria") label(3 "Hesse") label(4 "Lower Saxony") label(5 "North Rhine-Westphalia") label(6 "Rhineland-Palatinate") label(7 "Schleswig-Holstein")) ytitle("Number per 1,000 square km", size(large)) xtitle("", size(large)) title("{bf:(b)} Academic schools") xlabel(1950 1960 1970 1980, labsize(large)) ylabel(0 4 8 12 16 20, labsize(large)) name(part_b, replace) scheme(s1mono);
#delimit cr

*Common graph
grc1leg part_a part_b, cols(2) name(both_parts, replace)
graph display both_parts, xsize(4) ysize(1.5) scale(1.2) scheme(s1mono)


************************************************************************************
	*** Figure A2 ***

*** INTERMEDIATE SCHOOLS ***

restore

*Trend per state
foreach x in BW BY HE NI NW RP SH{
	gen state_`x' = state == "`x'"
	gen `x'_cohort_size = state_`x' * students_total
}

preserve

*Regression
reg num_inter_schools_area BW_1952 - SH_1980 students_total

*students_total mean value
gen example_stud_single = students_total if state == "BY" & year == 1960
egen example_stud = max(example_stud_single)

*We only need one observation
keep if _n == 1

*Predicted values
*In general
foreach x in BW BY HE NI NW RP SH{
	forvalues y = 1952(1)1980{
		gen num_inter_schools_area_`x'_`y' = _b[_cons] + _b[`x'_`y'] + _b[students_total] * example_stud
	}
}

*Years 1950 and 1951 but not for state BW
foreach x in BY HE NI NW RP SH{
	gen num_inter_schools_area_`x'_1950 = _b[_cons] + _b[`x'_1950] + _b[students_total] * example_stud
	gen num_inter_schools_area_`x'_1951 = _b[_cons] + _b[`x'_1951] + _b[students_total] * example_stud
}

*Base category BW 1950 and 1951
*gen num_inter_schools_area_BW_1950 = _b[_cons]
gen num_inter_schools_area_BW_1951 = _b[_cons] + _b[students_total] * example_stud

*Reshape data
keep num_inter_schools_area_*
gen id = 1
reshape long num_inter_schools_area_, i(id) j(edu_ID) string
drop id
gen state = substr(edu_ID,1,2)
gen year = substr(edu_ID,4,7)
destring year, replace
drop edu_ID

*Grpah intermediate schools with cohort size controls
#delimit ;
tw
(line num_inter_schools_area year if state == "BW", lcolor(cranberry) lpattern(solid) lwidth(thick))
(line num_inter_schools_area year if state == "BY", lcolor(midblue) lpattern(dash) lwidth(thick))
(line num_inter_schools_area year if state == "HE", lcolor(black) lpattern(dot) lwidth(thick))
(line num_inter_schools_area year if state == "NI", lcolor(pink) lpattern(dash_dot) lwidth(thick))
(line num_inter_schools_area year if state == "NW", lcolor(midgreen) lpattern(shortdash) lwidth(thick))
(line num_inter_schools_area year if state == "RP", lcolor(purple) lpattern(shortdash_dot) lwidth(thick))
(line num_inter_schools_area year if state == "SH", lcolor(gold) lpattern(longdash) lwidth(thick))
, legend(cols(4) subtitle("State of residence") label(1 "Baden-Wrttemberg") label(2 "Bavaria") label(3 "Hesse") label(4 "Lower Saxony") label(5 "North Rhine-Westphalia") label(6 "Rhineland-Palatinate") label(7 "Schleswig-Holstein")) ytitle("Number per 1,000 square km", size(large)) xtitle("", size(large)) title("{bf:(a)} Intermediate schools") xlabel(1950 1960 1970 1980, labsize(large)) ylabel(, labsize(large)) name(part_a_controls, replace) scheme(s1mono);
#delimit cr


*** ACADEMIC SCHOOLS ***

restore

*Regression
reg num_acad_schools_area BW_1952 - SH_1980 students_total

*students_total mean value
gen example_stud_single = students_total if state == "BY" & year == 1960
egen example_stud = max(example_stud_single)

*We only need one observation
keep if _n == 1

*Predicted values
*In general
foreach x in BW BY HE NI NW RP SH{
	forvalues y = 1952(1)1980{
		gen num_acad_schools_area_`x'_`y' = _b[_cons] + _b[`x'_`y'] + _b[students_total] * example_stud
	}
}

*Years 1950 and 1951 but not for state BW
foreach x in BY HE NI NW RP SH{
	gen num_acad_schools_area_`x'_1950 = _b[_cons] + _b[`x'_1950] + _b[students_total] * example_stud
	gen num_acad_schools_area_`x'_1951 = _b[_cons] + _b[`x'_1951] + _b[students_total] * example_stud
}

*Base category BW 1950 and 1951
*gen num_acad_schools_area_BW_1950 = _b[_cons]
gen num_acad_schools_area_BW_1951 = _b[_cons] + _b[students_total] * example_stud

*Reshape data
keep num_acad_schools_area_*
gen id = 1
reshape long num_acad_schools_area_, i(id) j(edu_ID) string
drop id
gen state = substr(edu_ID,1,2)
gen year = substr(edu_ID,4,7)
destring year, replace
drop edu_ID

*Grpah academic schools with cohort size controls
#delimit ;
tw
(line num_acad_schools_area year if state == "BW", lcolor(cranberry) lpattern(solid) lwidth(thick))
(line num_acad_schools_area year if state == "BY", lcolor(midblue) lpattern(dash) lwidth(thick))
(line num_acad_schools_area year if state == "HE", lcolor(black) lpattern(dot) lwidth(thick))
(line num_acad_schools_area year if state == "NI", lcolor(pink) lpattern(dash_dot) lwidth(thick))
(line num_acad_schools_area year if state == "NW", lcolor(midgreen) lpattern(shortdash) lwidth(thick))
(line num_acad_schools_area year if state == "RP", lcolor(purple) lpattern(shortdash_dot) lwidth(thick))
(line num_acad_schools_area year if state == "SH", lcolor(gold) lpattern(longdash) lwidth(thick))
, legend(subtitle("State of residence") label(1 "Baden-Wrttemberg") label(2 "Bavaria") label(3 "Hesse") label(4 "Lower Saxony") label(5 "North Rhine-Westphalia") label(6 "Rhineland-Palatinate") label(7 "Schleswig-Holstein")) ytitle("Number per 1,000 square km", size(large)) xtitle("", size(large)) title("{bf:(b)} Academic schools") xlabel(1950 1960 1970 1980, labsize(large)) ylabel(, labsize(large)) name(part_b_controls, replace) scheme(s1mono);
#delimit cr

*Common graph
grc1leg part_a_controls part_b_controls, cols(2) name(both_parts_controls, replace)
graph display both_parts_controls, xsize(4) ysize(1.5) scale(1.2) scheme(s1mono)


