/**********************************************************************************************************
* Program used to compute make tests of fit for the multiplicative specification of the municipality hazard functions
* The approach is inspired by Andrews (1997), Econometrica.
***********************************************************************************************************/

new;

library pgraph,maxlik,optmum ;

dlibrary -a c:\temp\sple3f_andrews.dll;

#include maxlik.ext;
#include optmum.ext;
maxset; graphset;

spath="C:\\dossiers\\SpatialM\\GMS_ANPE\\publication_JAE_doc\\";

/* Number of observations used to bootstrap standard errors */

S=1000;

/* Truncation rate for last durations in the computation of the test statistic (in %) */

delta=.01;


/**********************************************************************************************************
* Data source
***********************************************************************************************************/

/* Individual data */

fich1=spath $+ "panel_10000";

open f1=^fich1 for read;
fhs=readr(f1,rowsf(f1));
nvar1=getname(fich1);
close(f1);

clearg sex2, enf1, enf2, enf3, enf4, enf5, nat2, nat3, nat4, nat5, edu2, edu3, edu4, age, age2, mat2, mat3, mat4, handi, mins2, mins3, mins4, mins5, mins6, a, e, i, c, t, depcom;
makevars(fhs,0,nvar1);

age=age/100;
age2=age2/1000;

n=rows(fhs);
x=mins2~mins3~mins4~mins5~mins6~age~age2~sex2~mat2~enf1~enf2~enf3~enf4~enf5~nat2~nat3~nat4~nat5~edu2~edu3~edu4~handi;
cx=cols(x);

/* Number of observations */

fich2=spath $+ "vcom_10000";
open f2=^fich2 for read;
vc=readr(f2,rowsf(f2));
nvar2=getname(fich2);
close(f2);
clearg vcom, dc;
makevars(vc,0,nvar2);



/***********************/
/* Value of parameters */
/***********************/

load path=^spath be="savepare_all.fmt";



/***********************************************************************************************************************
* Vector containing the number of observations by municipality and vector containing the identifier of the municipality
************************************************************************************************************************/

proc (2) = nbobs(sdepcom);
local svc,rc,sdc,n,com;

n=rows(sdepcom);

svc=1|zeros(n-1,1);

com=1;
for k(2,n,1);
    if sdepcom[k,1] ne sdepcom[k-1,1]; com=com+1; endif;
    svc[com,1]=svc[com,1]+1;
endfor;

svc=svc[1:sumc((svc.>0)),1];
rc=rows(svc);

sdc=zeros(rc,1); sdc[1,1]=sdepcom[1,1];

com=1;
for k(2,n,1);
    if sdepcom[k,1] ne sdepcom[k-1,1]; com=com+1; sdc[com,1]=sdepcom[k,1]; endif;    
endfor;

retp(svc,sdc);
endp;



/*****************************
* Kaplan-Meier estimator
*****************************/

proc (2) = kapcom(st,ss,svc);
local j,cumobs,debc,finc,kap,kapc,tmax,tmc,nc,tc,sc,rc,tcomm;

tmax=1;
rc=rows(svc);
kap=zeros(tmax,rc);
tcomm=zeros(rc,1);

cumobs=0;
for j(1,rc,1);
    debc=cumobs+1;
    finc=cumobs+svc[j,1];

    tc=st[debc:finc,1];
    sc=ss[debc:finc,1];

    if tc[svc[j,1],1]>tmax;
        kap =kap|zeros(tc[svc[j,1],1]-tmax,rc);
        tmax=tc[svc[j,1],1];
    endif;

    nc=svc[j,1];

    tmc=tc[svc[j,1],1];

    kapc=zeros(tmc,1);
    tcomm[j,1]=tmc;
    
    dllcall akaplan(tc,nc,tmc,sc,kapc);
    
    kap[1:tmc,j]=kapc;
    cumobs=cumobs+svc[j,1];
endfor;

retp(kap,tcomm);
endp;



/**********************************
* Integrated hazard by muncipality
***********************************/

proc (2) = hccom(st,ss,sexb,svc);
local j,cumobs,debc,finc,hc,rc,tmc,rxc,hcc,tmax,exbc,tc,sc,tcomm;

cumobs=0;

tmax=1;
rc=rows(svc);
hc=zeros(tmax,rc);
tcomm=zeros(rc,1);

for j(1,rc,1);
    debc=cumobs+1;
    finc=cumobs+svc[j,1];

    tc=st[debc:finc,1];
    sc=ss[debc:finc,1];
    exbc=sexb[debc:finc,1];

    if tc[svc[j,1],1]>tmax;
        hc  =hc|zeros(tc[svc[j,1],1]-tmax,rc);
        tmax=tc[svc[j,1],1];
    endif;

    rxc=svc[j,1]; tmc=tc[svc[j,1],1]; hcc=zeros(tmc,1);    
    tcomm[j,1]=tmc;

    dllcall ahazc(exbc,tc,sc,rxc,hcc);
    
    hc[1:tmc,j]=hcc;    
    cumobs=cumobs+svc[j,1];
endfor;

retp(hc,tcomm);
endp;



/*********************
* Integrated hazards * 
**********************/

/* Dummy for right censorship */
cens=(a+i+c .gt 0);

/* Effect of explanatory variables */
exb=exp(x*be);

/* Computation of the integrated for each observation */
{hc,tcommh}=hccom(t,e,exb,vcom); /* tcommh contains the last duration for which there are some observations at risk */
{kap,tcommk}=kapcom(t,e,vcom);

/* Computation of the test statistic */
ehc=exp(-hc);

md=zeros(rows(vcom),1);
sd=zeros(rows(vcom),1);
ad=zeros(rows(vcom),1);

cumobs=0;
for j(1,rows(vcom),1);
    debc=cumobs+1;
    finc=cumobs+vcom[j,1];
    diff=(ehc[t[debc:finc,1],j].^exb[debc:finc,1]).*(t[debc:finc,1] .lt tcommh[j,1])-kap[t[debc:finc,1],j];
    diff=selif(diff,(t[debc:finc,1] .le t[trunc(vcom[j,1]*(1-delta)),1]));
    md[j,1]=maxc(abs(diff));
    sd[j,1]=sqrt(meanc(diff.*diff));
    ad[j,1]=meanc(abs(diff));
    cumobs=cumobs+vcom[j,1];
endfor;



/*************
* Simulations 
*************/

mdiffm=zeros(rows(vcom),S);
sdiffm=zeros(rows(vcom),S);
adiffm=zeros(rows(vcom),S);

for i(1,S,1);

/* Draw of the observation id in the population of each municipality */

no=zeros(n,1);

cumobs=0;
for j(1,rows(vcom),1);
    debc=cumobs+1;
    finc=cumobs+vcom[j,1];
    no[debc:finc,1]=debc+trunc(rndu(vcom[j,1],1)*vcom[j,1]);
    cumobs=cumobs+vcom[j,1];
endfor;



/**********************************/
/* Generating some simulated data */
/**********************************/

s_exb=exp(x[no,.]*be);       /* Effect of explanatory variables */

print exb~s_exb;

s_hc=-ln(rndu(n,1))./s_exb;  /* Generating the integrated hazard for each individual after drawing in a [0,1] uniform law for the survival function */

/* Generating the identifier of the column corresponding to the muncipality; it is important that the municipalities are ranked in ascending order in the vector VCOM */
sdc=depcom[no,1]; /* Municipality of the individual */
s_dc=zeros(n,1); for k(1,n,1); s_dc[k,1]=sumc((dc .le sdc[k,1])); endfor; /* Identified of the corresponding muncipality */

/* Duration corresponding to the integrated hazard */

s_t=zeros(n,1);
for k(1,n,1);
        
        s_t[k,1]=sumc((hc[1:tcommh[s_dc[k,1],1],s_dc[k,1]] .lt s_hc[k,1]))+1; 
        s_t[k,1]=s_t[k,1]-(s_t[k,1] eq (tcommh[s_dc[k,1],1]+1)); /* The duration takes the maximum value in the municipality when the integrated hazard is larger than the maximum integrated hazard */
endfor;

s_e=1-cens[no,1].*(t[no,1] .le s_t); /* We take into account the possibility of censorship */

s_t=cens[no,1].*t[no,1]+(1-cens[no,1]).*s_t; /* We replace the simulated duration by the censored duration when there is censorship */



/**********************************************************************************************************/
/* We compute the Kaplan-Meier estimators and survival functions of the model from the Breslow estimators */
/**********************************************************************************************************/

y=zeros(n,4);
cumobs=0;

for j(1,rows(vcom),1);
    debc=cumobs+1;
    finc=cumobs+vcom[j,1];
    y[debc:finc,.]=sortc(s_t[debc:finc,1]~s_e[debc:finc,1]~s_exb[debc:finc,1]~s_dc[debc:finc,1],1);
    cumobs=cumobs+vcom[j,1];
endfor;

{shc,stcommh}=hccom(y[.,1],y[.,2],y[.,3],vcom);
{skap,stcommk}=kapcom(y[.,1],y[.,2],vcom);

shc=exp(-shc);

mdiff=zeros(rows(vcom),1);
sdiff=zeros(rows(vcom),1);
adiff=zeros(rows(vcom),1);

cumobs=0;
for j(1,rows(vcom),1);
    debc=cumobs+1;
    finc=cumobs+vcom[j,1];
    diff=(shc[s_t[debc:finc,1],j].^s_exb[debc:finc,1]).*(s_t[debc:finc,1] .lt stcommh[j,1])-skap[s_t[debc:finc,1],j];
    diff=selif(diff,(s_t[debc:finc,1] .le s_t[trunc(vcom[j,1]*(1-delta)),1]));
    mdiff[j,1]=maxc(abs(diff));
    sdiff[j,1]=sqrt(meanc(diff.*diff));
    adiff[j,1]=meanc(abs(diff));
    cumobs=cumobs+vcom[j,1];
endfor;

mdiffm[.,i]=mdiff;
sdiffm[.,i]=sdiff;
adiffm[.,i]=adiff;

endfor;

save path=^spath md_05_10000_1000=md sd_05_10000_1000=sd ad_05_10000_1000=ad mdiffm_05_10000_1000=mdiffm sdiffm_05_10000_1000=sdiffm adiffm_05_10000_1000=adiffm;



/*******************************************************/
/* Descrptive statistics computed from the simulations */
/*******************************************************/

mdiffm=mdiffm';
sdiffm=sdiffm';
adiffm=adiffm';

rc=cols(mdiffm);

for j(1,rc,1);
    mdiffm[.,j]=sortc(mdiffm[.,j],1);
    sdiffm[.,j]=sortc(sdiffm[.,j],1);
    adiffm[.,j]=sortc(adiffm[.,j],1);
endfor;

mdiffs=zeros(rc,1);
sdiffs=zeros(rc,1);
adiffs=zeros(rc,1);

for j(1,rc,1);
    mdiffs[j,1]=sumc((mdiffm[.,j] .gt md[j,1]))/rows(mdiffm);
    sdiffs[j,1]=sumc((sdiffm[.,j] .gt sd[j,1]))/rows(sdiffm);
    adiffs[j,1]=sumc((adiffm[.,j] .gt ad[j,1]))/rows(adiffm);
endfor;

mdiffp5=sumc((mdiffs .ge .05))/rows(mdiffs)*100;
sdiffp5=sumc((sdiffs .ge .05))/rows(sdiffs)*100;
adiffp5=sumc((adiffs .ge .05))/rows(adiffs)*100;

print mdiffp5~sdiffp5~adiffp5;


gmdiff=zeros(20,1);
gsdiff=zeros(20,1);
gadiff=zeros(20,1);

for k(1,20,1);
    if k lt 20; gmdiff[k,1]=meanc((mdiffs .ge ((k-1)*.05)).*(mdiffs .lt (k*.05)))*100; endif;
    if k eq 20; gmdiff[k,1]=meanc((mdiffs .ge .95).*(mdiffs .le 1)*100); endif;    

    if k lt 20; gsdiff[k,1]=meanc((sdiffs .ge ((k-1)*.05)).*(sdiffs .lt (k*.05)))*100; endif;
    if k eq 20; gsdiff[k,1]=meanc((sdiffs .ge .95).*(sdiffs .le 1))*100; endif;    

    if k lt 20; gadiff[k,1]=meanc((adiffs .ge ((k-1)*.05)).*(adiffs .lt (k*.05)))*100; endif;
    if k eq 20; gadiff[k,1]=meanc((adiffs .ge .95).*(adiffs .le 1))*100; endif;    
endfor;


print ""; print "";
print gmdiff~gsdiff~gadiff;

