
#delimit;

**********************************************************************************
Preparation of a dataset for the estimations
**********************************************************************************;

clear;

set memory 700m;

set more off;

*local path "C:\path\";

local path "C:\dossiers\SpatialM\GMS_ANPE\publication_JAE_doc\";

local source "`path'de.dta";		* Source file;
local panel  "`path'panel.dta";	* Created dataset;
local dcom   "`path'datacom.dta";	* dataset containing variables at the municipality level;

use "`source'";

use idx ndem sexe age depcom nenf handic nation nivetude sitmat datins motins catregr motann datann
    using "`source'";

sort idx ndem;



**********************************************************************************
Spells in category 1, 2 or 3 immediately followed by spells of category 4 or 5 have
a specific type of reason for exit. We switch to exit of type 2 for category 4 and 
exit of type 1 for category 5. Most corrections are related to the rule 6 of the
Historical File Construction. In practice, there are only a very few changes.
**********************************************************************************;

* Note that spells of an individual are sorted with respect to ndem, which corresponds
to a decreasing data. ndem=1 : the most recent spell, ndem=2 : the second most recent
spell, etc, ...;

describe;

replace motann="02" if 
  idx==idx[_n-1]
& ndem==ndem[_n-1]+1
& (datann==datins[_n-1] | datann==datins[_n-1]+1)
& (catregr=="1" | catregr=="2" | catregr=="3")
& catregr[_n-1]=="4"
& motann~="03";

replace motann="01" if 
  idx==idx[_n-1]
& ndem==ndem[_n-1]+1
& (datann==datins[_n-1] | datann==datins[_n-1]+1)
& (catregr=="1" | catregr=="2" | catregr=="3")
& catregr[_n-1]=="5";

describe;



**********************************************************************************
Flow sampling.
We only keep observations for which the spell begins between January 1 and
June 30, 1996.
**********************************************************************************;

*keep if datins>=date("01-01-1996","dmy") & datins<=date("30-06-1996","dmy");	* Instruction pour Stata 9;
keep if datins>=td("01-01-1996") & datins<=td("30-06-1996");			* Instruction pour Stata 10;



**********************************************************************************
We add a dummy for the municipality of residence to belong to the Paris region
(Ile-de-france). Municipalities for which the municipality code does not 
correspond to a municipality in the Paris region have a wrong code.
**********************************************************************************;

destring(depcom), replace;
sort depcom;
merge depcom using "`dcom'", keep(depcom idf);

drop if _merge==2;
drop _merge;
replace idf=0 if idf==.;



**********************************************************************************
Creation of exit type:
e : finding a job
a : exit to non-employment
i : unknown exit
c : right-censorship
**********************************************************************************;

gen byte e=(motann=="01");
gen byte i=(motann=="09"|motann=="10"|motann=="11"|motann=="12"|motann=="13");
gen byte c=(motann=="06"|motann=="");
gen byte a=(e==0 & i==0 & c==0);



**********************************************************************************
Creation of the variable corresponding to the unemployment duration
**********************************************************************************;

gen t=datann-datins;



**********************************************************************************
Creation of variables
**********************************************************************************;

gen enf=real(nenf);
gen sex=real(sexe);
gen handi=(handic~="0");

gen str1 niv=substr(nivetude,1,1);
gen sccl=   (niv=="1" | niv=="2")
	 +2*(niv=="3")
	 +3*(niv=="4")
	 +4*(niv=="5" | niv=="6"); 
gen country=real(nation);
gen nat=    (country==1)
       +2*(country>1   & country<30)
	 +3*(country>=30 & country<40)
	 +4*(country>=40 & country<50)
	 +5*(country>50);

drop niv country;



**********************************************************************************
Sample Selection
**********************************************************************************;

keep if age<55;


**********************************************************************************
Creation of labels
**********************************************************************************;


label variable e     "Exit : job";
label variable a     "Exit : non-employment";
label variable i     "Exit : unknown";
label variable c     "Right-censoring";

label variable t     "Unemployment duration (in days)";
label variable sex   "Sex (0,1)";
label variable handi "Disabled";
label variable enf   "Number of children";
label variable sccl  "1=sup; 2=BAC; 3=Bep-Cap-2e-1e; 4=coll";
label variable nat   "1=fra; 2=europ; 3=maghreb; 4=afr(other); 5=other";

compress;



**********************************************************************************
Deleting observations with mistakes
**********************************************************************************;

drop if depcom==.;
drop if enf==.;
drop if sex==.;
drop if sccl==0;
drop if nation=="99";
drop if idf==0|idf==.;
drop if idf==.;
drop if t==.;


**********************************************************************************
Creation of explanatory variables
**********************************************************************************;

gen mat2=(sitmat=="M");

gen edu2=(sccl==2);
gen edu3=(sccl==3);
gen edu4=(sccl==4);

gen nat2=(nat==2);
gen nat3=(nat==3);
gen nat4=(nat==4);
gen nat5=(nat==5);

gen sex2=(sex==2);

gen enf1=(enf==1);
gen enf2=(enf==2);
gen enf3=(enf==3);
gen enf4=(enf==4);
gen enf5=(enf>=5);

gen age2=age*age;

gen mins2=(month(datins)==2);
gen mins3=(month(datins)==3);
gen mins4=(month(datins)==4);
gen mins5=(month(datins)==5);
gen mins6=(month(datins)==6);

gen msor2=(month(datann)==2);
gen msor3=(month(datann)==3);
gen msor4=(month(datann)==4);
gen msor5=(month(datann)==5);
gen msor6=(month(datann)==6);
gen msor7=(month(datann)==7);
gen msor8=(month(datann)==8);
gen msor9=(month(datann)==9);
gen msor10=(month(datann)==10);
gen msor11=(month(datann)==11);
gen msor12=(month(datann)==12);


gen asor1=(year(datann)==1996);
gen asor2=(year(datann)==1997);
gen asor3=(year(datann)==1998);
gen asor4=(year(datann)==1999);
gen asor5=(year(datann)==2000);
gen asor6=(year(datann)==2001);
gen asor7=(year(datann)==2002);
gen asor8=(year(datann)==2003);



**********************************************************************************
Creation of labels
**********************************************************************************;

label variable motann   "Exit type";

label variable depcom   "Municipality code";

label variable sex2	"Female";

label variable mat2 	"Married or in couple";

label variable enf1	"1 child";
label variable enf2	"2 children";
label variable enf3	"3 children";
label variable enf4	"4 children";
label variable enf5	"5 children or more";

label variable nat2	"European (not French)";
label variable nat3	"North African";
label variable nat4	"Subsaharan African";
label variable nat5	"Other";

label variable edu2	"BAC (diploma or not)";
label variable edu3	"BEP-CAP-2e-1e";
label variable edu4	"College (BEPC or not)";

label variable age	"age";
label variable age2	"squared age";

label variable mins2	"Entry month : February";
label variable mins3	"Entry month : March";
label variable mins4	"Entry month : April";
label variable mins5	"Entry month : May";
label variable mins6	"Entry month : June";



**********************************************************************************
Centering of variables
**********************************************************************************;

egen msex2=mean(sex2); replace sex2=sex2-msex2; drop msex2;

egen mmat2=mean(mat2); replace mat2=mat2-mmat2; drop mmat2;

egen menf1=mean(enf1); replace enf1=enf1-menf1; drop menf1;
egen menf2=mean(enf2); replace enf2=enf2-menf2; drop menf2;
egen menf3=mean(enf3); replace enf3=enf3-menf3; drop menf3;
egen menf4=mean(enf4); replace enf4=enf4-menf4; drop menf4;
egen menf5=mean(enf5); replace enf5=enf5-menf5; drop menf5;

egen mnat2=mean(nat2); replace nat2=nat2-mnat2; drop mnat2;
egen mnat3=mean(nat3); replace nat3=nat3-mnat3; drop mnat3;
egen mnat4=mean(nat4); replace nat4=nat4-mnat4; drop mnat4;
egen mnat5=mean(nat5); replace nat5=nat5-mnat5; drop mnat5;

egen medu2=mean(edu2); replace edu2=edu2-medu2; drop medu2;
egen medu3=mean(edu3); replace edu3=edu3-medu3; drop medu3;
egen medu4=mean(edu4); replace edu4=edu4-medu4; drop medu4;

egen mage=mean(age);   replace age=age-mage; drop mage;
egen mage2=mean(age2); replace age2=age2-mage2; drop mage2;

egen mhandi=mean(handi); replace handi=handi-mhandi; drop mhandi;

egen mmins2=mean(mins2); replace mins2=mins2-mmins2; drop mmins2;
egen mmins3=mean(mins3); replace mins3=mins3-mmins3; drop mmins3;
egen mmins4=mean(mins4); replace mins4=mins4-mmins4; drop mmins4;
egen mmins5=mean(mins5); replace mins5=mins5-mmins5; drop mmins5;
egen mmins6=mean(mins6); replace mins6=mins6-mmins6; drop mmins6;



**********************************************************************************
Panel used for the estimation
**********************************************************************************;

keep sex2 mat2 enf1 enf2 enf3 enf4 enf5 nat2 nat3 nat4 nat5 edu2 edu3 edu4 age age2 handi
     mins2 mins3 mins4 mins5 mins6 motann depcom t a e i c;

sort depcom t;

compress;

destring(motann), replace;

saveold "`path'panel.dta", replace;  * Panel used for the estimation;

gen nbcho_30=(t>=30);
gen nbcho_90=(t>=90);
gen nbcho_180=(t>=180);
gen nbcho_360=(t>=360);
gen nbcho_720=(t>=720);

collapse (count) age nbcho_30 nbcho_90 nbcho_180 nbcho_360 nbcho_720, by(depcom);
rename age vcom;

merge depcom using "`dcom'";
keep if _merge==1|_merge==3;

rename depcom dc;

saveold "`path'vcom.dta", replace;   * Dataset containing the number of observations in each municipality. It is used in the estimations;
