/**********************************************************************************************************
* Program allowing to estimate the effect of local variables on the municipality fixed effects
***********************************************************************************************************/

new;

library pgraph,maxlik,optmum ;

dlibrary -a c:\temp\sple3f.dll;

#include maxlik.ext;
#include optmum.ext;
maxset; graphset;



/**********************************************************************************************************
* Reading the data
***********************************************************************************************************/

spath="C:\\dossiers\\SpatialM\\GMS_ANPE\\publication_JAE_doc\\";

/* Data at the local level */

fich3=spath $+ "town_alpha";

open f3=^fich3 for read;
vc=readr(f3,rowsf(f3));
nvar3=getname(fich3);
close(f3);

clearg lalphae, nbcho, xbe, tx_tech, tx_bac, tx_univ, tx_euro, tx_magr, tx_afr, tx_nat, rtc30, rvp30, rtc45, rvp45; 

makevars(vc,0,nvar3);

rcom=rows(nbcho);



/**************************************************************************************
* Weighted least square for the third stage regression
***************************************************************************************/
 
proc (4) = wls(y0,x0,w0);
local i,j,n0,rcomn,p,weight,xx,sig2,coeff,std,norm,proba,nomiss,r2,r2w,w,x,y;

n0=rows(w0);
nomiss=ones(n0,1);
for i(1,n0,1);
    nomiss[i,1]=ismiss(w0[i,1])+(w0[i,1] eq 0)+ismiss(y0[i,1])+ismiss(x0[i,.]);
    nomiss[i,1]=(nomiss[i,1] eq 0);
endfor;

x=selif(x0,nomiss);
y=selif(y0,nomiss);
w=selif(w0,nomiss);
rcomn=rows(w);

p=diagrv(eye(rcomn),w);

xx=inv(x'*p*x);
coeff=xx*x'*p*y;

sig2=(y-x*coeff-(w/sumc(w))'*(y-x*coeff))'*p*(y-x*coeff-(w/sumc(w))'*(y-x*coeff))/rcomn;

std=sqrt(sig2*diag(xx)); 
norm=abs(coeff./std);
proba=2*cdfnc(norm);
r2w=(x*coeff-(w/sumc(w))'*x*coeff)'*p*(x*coeff-(w/sumc(w))'*x*coeff)/((y-(w/sumc(w))'*y)'*p*(y-(w/sumc(w))'*y));

retp(coeff~std~norm~proba,r2w,rows(w),sumc(w));
endp;



/**************************************************************************************
* Weighted least square for the third stage regression
* Correction to take into account the sampling error
***************************************************************************************/

proc (5) = wls_corr(y0,x0,w0,cov0);
local i,j,n0,n,weight,xx,sig2,sig21,coeff,std,norm,proba,nomiss,r2,r2w,rcomn,w,x,y,cov,txerr,p,vxb,p12,wp;

n0=rows(w0);
nomiss=ones(n0,1);
for i(1,n0,1);
    nomiss[i,1]=ismiss(w0[i,1])+(w0[i,1] eq 0)+ismiss(y0[i,1])+ismiss(x0[i,.]);
endfor;
nomiss[.,1]=(nomiss[.,1] .eq 0);

x=selif(x0,nomiss);
y=selif(y0,nomiss);
w=selif(w0,nomiss); rcomn=rows(w);

cov=selif(cov0,selif(nomiss,(w0.>0)));
cov=selif(cov',selif(nomiss,(w0.>0)))';

cov=diagrv(eye(rcomn),(1./exp(y)))*cov*diagrv(eye(rcomn),(1./exp(y)));

p=diagrv(eye(rcomn),w);

xx=inv(x'*p*x);
coeff=xx*x'*p*y;

sig2=((y-x*coeff)'*p*(y-x*coeff)-sumc(diag(p*cov)))/rcomn;
txerr=sumc(diag(p*cov))/((y-x*coeff)'*p*(y-x*coeff));
std=sqrt(diag(sig2*xx+xx*x'*p*cov*p*x*xx));
norm=abs(coeff./std);
proba=2*cdfnc(norm);
r2w=(x*coeff-(w/sumc(w))'*x*coeff)'*p*(x*coeff-(w/sumc(w))'*x*coeff)/((x*coeff-(w/sumc(w))'*x*coeff)'*p*(x*coeff-(w/sumc(w))'*x*coeff)+sig2*rcomn);

retp(coeff~std~norm~proba,r2w,rows(w),sumc(w),txerr);
endp;



/**************************
* Third stage estimations
**************************/

y=lalphae;
w=nbcho;

/* Third-stage estimation without taking into account the sampling error */

x=ones(rows(nbcho),1)~tx_tech~tx_bac~tx_univ~tx_euro~tx_magr~tx_afr~tx_nat;
{results,r2w,nv,n}=wls(y,x,w);
print ""; print "Explanatory variables: const, tx_tech, tx_bac, tx_univ, tx_euro, tx_magr, tx_afr, tx_nat";
print results;
print ""; print "Number of municipalities"; print nv;
print ""; print "Number of observations"; print n;
print ""; print "R2w"; print r2w;

x=ones(rows(nbcho),1)~rtc45~rvp45;
{results,r2w,nv,n}=wls(y,x,w);
print ""; print "Explanatory variables: const, rtc45, rvp45";
print results;
print ""; print "Number of municipalities"; print nv;
print ""; print "Number of observations"; print n;
print ""; print "R2w"; print r2w;

x=ones(rows(nbcho),1)~tx_tech~tx_bac~tx_univ~tx_euro~tx_magr~tx_afr~tx_nat~rtc45~rvp45;
{results,r2w,nv,n}=wls(y,x,w);
print ""; print "Explanatory variables: const, tx_tech, tx_bac, tx_univ, tx_euro, tx_magr, tx_afr, tx_nat, rtc45, rvp45";
print results;
print ""; print "Number of municipalities"; print nv;
print ""; print "Number of observations"; print n;
print ""; print "R2w"; print r2w;

coeff3s_9_90=results[.,1];

save path=^spath coeff3s_9_90;



/* Third-stage estimation taking into account the sampling error */

load path=^spath valphae="valphae_9_90.fmt";

w=nbcho;

x=ones(rows(tx_tech),1)~tx_tech~tx_bac~tx_univ~tx_euro~tx_magr~tx_afr~tx_nat;
{results,r2w,nv,n,txerr}=wls_corr(y,x,w,valphae);
print ""; print "Explanatory variables: const, tx_tech, tx_bac, tx_univ, tx_euro, tx_magr, tx_afr, tx_nat";
print results;
print ""; print "Number of municipalities"; print nv;
print ""; print "Number of observations"; print n;
print ""; print "R2w"; print r2w;
print ""; print "Error rate"; print txerr;

x=ones(rows(tx_tech),1)~rtc45~rvp45;
{results,r2w,nv,n,txerr}=wls_corr(y,x,w,valphae);
print ""; print "Explanatory variables: const, rtc45, rvp45";
print results;
print ""; print "Number of municipalities"; print nv;
print ""; print "Number of observations"; print n;
print ""; print "R2w"; print r2w;
print ""; print "Error rate"; print txerr;

x=ones(rows(tx_tech),1)~tx_tech~tx_bac~tx_univ~tx_euro~tx_magr~tx_afr~tx_nat~rtc45~rvp45;
{results,r2w,nv,n,txerr}=wls_corr(y,x,w,valphae);
print ""; print "Explanatory variables: const, tx_tech, tx_bac, tx_univ, tx_euro, tx_magr, tx_afr, tx_nat, rtc45, rvp45";
print results;
print ""; print "Number of municipalities"; print nv;
print ""; print "Number of observations"; print n;
print ""; print "R2w"; print r2w;
print ""; print "Error rate"; print txerr;
