cd "C:\Users\degroote\Sync\Research\Analysis\Elite\"

////MAKE DB
clear all
version 15
set seed 2910
set sortseed 50

/*
Do file that creates database to be used for "Tracking and Specialization of High Schools: Heterogeneous Effects of School Choice"
Authors: Olivier De Groote and Koen Declercq
Last update: 30 October 2019
*/

//conversions needed for stata 15
*unicode convertfile "Primaire databestanden\originalsource\6z_vpl_in_tabel_2_verrijkt+.csv" "Primaire databestanden\originalsource\6z_vpl_in_tabel_2_verrijkt+_utf8.csv", srcencoding(ISO-8859-1)
*unicode convertfile  "Primaire databestanden\originalsource\6z_vpl_in_tabel_2_verrijkt.csv"  "Primaire databestanden\originalsource\6z_vpl_in_tabel_2_verrijkt_utf8.csv", srcencoding(ISO-8859-1)
*unicode convertfile  "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt+.csv" "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt+_utf8.csv", srcencoding(ISO-8859-1)
*unicode convertfile  "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt.csv" "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt_utf8.csv", srcencoding(ISO-8859-1)
*unicode convertfile  "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt+_koepels.csv" "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt+_koepels_utf8.csv", srcencoding(ISO-8859-1)
*unicode convertfile  "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt+_scholengem.csv" "Primaire databestanden/originalsource/6z_scholen_in_tabel_2_verrijkt+_scholengem_utf8.csv", srcencoding(ISO-8859-1)

///students
*Loads and merges all data on students in bao (=primary education) and so (=secondary education) 
*and adds GOK data (=student characteristics) and performance outcomes
do "Do files/students"
clear all

///admgr
*"Administratieve groep" numbers refer to the chosen options in a detailled way (e.g. "Latijn-Wiskunde")
*this is used later to see in which track students are
do "Do files/admgr"
clear all

///schooldb
*Obtain info for each school (with the official id "nummer_instelling"), in particular address...
do "Do files/schooldb"
clear all

///geocode
*Create a new identifier for schools instead of the official one, based on identical address
do "Do files/geocode"
clear all

///merge
*Merges all previous results and adds some variables (important for the next do file: 
*it adds what options can be chosen in each school by looking at what is actually chosen)
do "Do files/merge"
clear all

///model
*Keeps relevant info for the model of the results of "merge" and adds the minimal distance information
do "Do files/model" 
clear all

///distance
*Calculates minimal distance to different types of schools (possible options and affiliation) for each statistical sector
do "Do files/distance"
do "Do files/distance_count" 

///census data
*Loads data at the statistical sector level to add local control variables
do "Do files/census"

//Final merge and cleaning
do "Do files/data_management"

clear all
