/**********************************************************************************************************
* Programm used to construct some inequality indices at the municipality level
***********************************************************************************************************/

new;

library pgraph,maxlik,optmum ;

dlibrary -a c:\temp\kernel_anpe.dll;

#include maxlik.ext;
#include optmum.ext;
maxset; graphset;

spath="C:\\dossiers\\SpatialM\\GMS_ANPE\\publication_JAE_doc\\";


/**********************************************************************************************************
* Data source
***********************************************************************************************************/

fich1=spath $+ "town_alpha_noncentered";

open f1=^fich1 for read;
fhs=readr(f1,rowsf(f1));
nvar1=getname(fich1);
close(f1);

clearg sex1, sex2, enf0, enf1, enf2, enf3, enf4, enf5, nat1, nat2, nat3, nat4, nat5, edu1, edu2, edu3, edu4, age, age2, mat1, mat2,
       handi, a, e, i, c, te, ta, tc, depcom, nbcho, pop90,
       txcho, tx_fra, tx_euro, tx_magr, tx_afr, tx_nat, tx_nodip, tx_tech, tx_bac, tx_univ, tot_nat, tot_dip, actocc, chomeur,
       ske6, sme6, smulte6, ske24, sme24, smulte24, ska6, sma6, smulta6, ska24, sma24, smulta24, 
       rtc45, rvp45;

makevars(fhs,0,nvar1);



/* Procedure allowing to construct the distribution of a variable  */
/* The output is a stepwise distribution                            */
               
proc (1) = freq(x,w);
local dist, xx, ww, rx;

xx=sortc(x,1);
ww=sortc(w~x,2); ww=ww[.,1];
rx=rows(x);
dist=zeros(rx,1);

dllcall freq_w(xx,rx,ww,dist);

retp(dist);
endp;



/* Determination of the bandwidth using Silverman rule-of-thumb */

proc (1)=hsilv(x,nbp);
local h,s,n;
n=rows(x);
s=sqrt(sumc((x-meanc(x)).*(x-meanc(x)))/(n-1));
h=(4*s^5/(3*nbp))^(1/5);
retp(h);
endp;



/* Procedure allowing to smooth a distribution */
/* The output is a smoothed distribution       */

proc (2) = kern(x,w,nbp,h);
local first, last, dist, xx, sx, rx, sdist;

xx=sortc(x,1);
dist=freq(x,w);

rx=rows(x);

sx=zeros(nbp,1);
sdist=zeros(nbp,1);

dllcall kernel_gauss_w(xx,dist,rx,h,nbp,sx,sdist);

retp(sx, sdist);
endp;



/* Computation of a Gini index weighted by the number of observations */

proc (1) = gini(x,w);
local mx,g,i,j;

mx=w'*x/sumc(w);
g=zeros(1,cols(x));
for i(1,rows(x),1); for j(1,rows(x),1);
    g=g+w[i,1]*w[j,1]*abs(x[i,.]-x[j,.]);
endfor; endfor;
g=g./(2*sumc(w)^2*mx);

retp(g');
endp;



/* Computation of a weighted coefficient of variation */

proc (1) = coeffvar(x,w);
local mx,cvar,k;
mx=w'*x/sumc(w);
cvar=zeros(1,cols(x));
for k(1,cols(x),1); cvar[1,k]=sqrt((x[.,k]-mx[1,k])'*(w.*(x[.,k]-mx[1,k]))/sumc(w))/mx[1,k]; endfor;
retp(cvar');
endp;



/* Parameter : number of points used to compute smoothed distributions */

nbp=1000000;

/**************************************************************************/
/* Inequality indices on variables at the municipality level, cf. Table 1 */
/**************************************************************************/

/* Unemployment rate */

x=txcho;
w=actocc+chomeur;

x=selif(x,(w.>0));
w=selif(w,(w.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variable: txcho; weight: labour force";
print ""; print results; print"";



/* Proportion of the population by citizenship */

x=tx_fra~tx_euro~tx_magr~tx_afr~tx_nat;
w=tot_nat;

x=selif(x,(w.>0));
w=selif(w,(w.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variables: tx_fra~tx_euro~tx_magr~tx_afr~tx_nat; weight: population";
print ""; print results; print"";



/* Proportion of the population by diploma */

x=tx_nodip~tx_tech~tx_bac~tx_univ;
w=tot_dip;

x=selif(x,(w.>0));
w=selif(w,(w.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variables: tx_nodip~tx_tech~tx_bac~tx_univ; weight: population more than 15 year old";
print ""; print results; print"";



/* 45mn job density by public transport */

x=rtc45;
w=actocc+chomeur;

w=selif(w,(x.>0));
x=selif(x,(x.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variable: rtc45; weight: labour force";
print ""; print results; print"";



/* 45mn job density by car */

x=rvp45;
w=actocc+chomeur;

w=selif(w,(x.>0));
x=selif(x,(x.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variable: rvp45; weight: labour force";
print ""; print results; print"";




/**************************************************************************/
/* Inequality indices on variables at the municipality level, cf. Table 2 */
/**************************************************************************/

/* Explanatory variables of the historical file of unemployment spells */

x=e~a~(1-e-a)~age~sex1~sex2~mat1~mat2~enf0~enf1~enf2~enf3~enf4~enf5~nat1~nat2~nat3~nat4~nat5~edu1~edu2~edu3~edu4~handi;

w=nbcho;

x=selif(x,(nbcho.>0));
w=selif(nbcho,(nbcho.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;


print "Number of observations";
print rows(x);
print "Variables: e~a~c~age~sex1~sex2~mat1~mat2~enf0~enf1~enf2~enf3~enf4~enf5~nat1~nat2~nat3~nat4~nat5~edu1~edu2~edu3~edu4~handi";
print ""; print results; print"";



/* Umployment duration when finding a job */

x=te;
w=nbcho;

x=selif(x,(e.>0));
w=selif(nbcho,(e.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variable: te";
print ""; print results; print"";



/* Unemployment duration when the spell is censored */

x=tc;
w=nbcho;

x=selif(x,(c.>0));
w=selif(nbcho,(c.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Nombre d'observations";
print rows(x);
print "Variable: tc";
print ""; print results; print"";



/* Unemployment duration when dropping out of the labour force */

x=ta;
w=nbcho;

x=selif(x,(a.>0));
w=selif(nbcho,(a.>0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variable: ta";
print ""; print results; print"";






/*****************************************************************/
/* Inequality indices on the exit from unemployment, cf. Table 4 */
/*****************************************************************/

x=(ske6~sme6~smulte6)~(ske24~sme24~smulte24)~(ska6~sma6~smulta6)~(ska24~sma24~smulta24);
w=nbcho;

missv=zeros(rows(x),1);
for i(1,rows(x),1); missv[i,1]=ismiss(x[i,.]); endfor;

w=selif(w,(missv .eq 0));
x=selif(x,(missv .eq 0));

results=zeros(cols(x),9);

for i(1,cols(x),1);
    h=hsilv(x[.,i],nbp);
    results[i,1]=x[.,i]'*w/sumc(w);
    {sx,sy}=kern(x[.,i],w,nbp,h);
    q10=0; q25=0; q75=0; q90=0;  
    for j(1,nbp-1,1);
        if (sy[j,1]<=0.1)*(sy[j+1,1]>0.1);   q10=sx[j,1]; endif;
        if (sy[j,1]<=0.9)*(sy[j+1,1]>0.9);   q90=sx[j,1]; endif;
        if (sy[j,1]<=0.25)*(sy[j+1,1]>0.25); q25=sx[j,1]; endif;
        if (sy[j,1]<=0.75)*(sy[j+1,1]>0.75); q75=sx[j,1]; endif;
    endfor;
    results[i,2]=q75/q25; results[i,3]=q75-q25; results[i,4]=q90/q10; results[i,5]=q90-q10; 
    results[i,6]=gini(x[.,i],w); results[i,7]=coeffvar(x[.,i],w);
    results[i,8]=sy[1,1]; results[i,9]=sy[rows(sy),1];
endfor;

print "Number of observations";
print rows(x);
print "Variables: (ske6~sme6~smulte6)~(ske24~sme24~smulte24)~(ska6~sma6~smulta6)~(ska24~sma24~smulta24); weight: number of unempployed workers";
print ""; print results; print"";
