## Prog2.r
## ML-ESTIMATION OF CORRELATED PROBIT AND POISSON MODEL 
## same program as POISNORMML2.R but with unobserved heterogeneity
## directory h:\research\hurdlecount
##
## Hans van Ophem
## January 2008

##INITIALIZATION lines 9-55
##LOAD fMultivar-PACKAGE (CUMULATIVE BIVARIATE NORMAL)/pscl-PACKAGE (HURDLE)
library(fMultivar)
library(pscl)
library(maxLik)
##NUMBER OF OBSERVATIONS
n=6231

##NUMBER OF EXPLANATORY VARIABLES PROBIT
m1=18 ##17
##NUMBER OF EXPLANATORY VARIABLES POISSON
m2=18 ##13
k=m1+m2+2
##MAXIMUM COUNT ALLOWED
maxc=100

dat=load("win99.rdat")
id=win99[1:n,1]
year=win99[1:n,2]
doctco=win99[1:n,3]
age=win99[1:n,4]
male=win99[1:n,5]
educ=win99[1:n,6]
married=win99[1:n,7]
hsize=win99[1:n,8]
sport=win99[1:n,9]
goodh=win99[1:n,10]
badh=win99[1:n,11]
sozh=win99[1:n,12]
loginc=win99[1:n,13]
ft=win99[1:n,14]
pt=win99[1:n,15]
unemp=win99[1:n,16]
winter=win99[1:n,17]
spring=win99[1:n,18]
fall=win99[1:n,19]
age2=(age*age/100) - mean(age/10)
age2=(age/10- mean(age/10))^2
d3140=((age > 30)&(age <= 40))
d4150=((age > 40)&(age <= 50))
d5160=((age > 50)&(age <= 60))
dg55=(age > 55)
yd =(doctco > 0)
count=doctco
const=yd*0+1
##OLD VERSION

##yxmat=cbind(yd,count,const,age,married,male,sport,winter,spring,fall,age2,loginc,hsize,goodh,badh,sozh,unemp,pt,ft,
##const,hsize,sport,male,educ,goodh,badh,sozh,ft,pt,unemp,age,age2)

##NEW VERSION
yxmat=cbind(yd,count,const,age,married,male,sport,winter,spring,fall,age2,loginc,hsize,goodh,badh,sozh,unemp,pt,ft,educ,
const,hsize,sport,male,educ,goodh,badh,sozh,ft,pt,unemp,age,age2,married,loginc,spring,fall,winter)

##ADDITIONAL INITIALIZATION lines 57-60
d1=(count==1)
imax=10
imin=-imax

##START VALUES lines 62-73
tdata=data.frame(age=age,age2=age2,male=male,educ=educ,married=married,sport=sport,hsize=hsize,loginc=loginc,goodh=goodh,badh=badh,sozh=sozh,ft=ft,pt=pt,
unemp=unemp,winter=winter,spring=spring,fall=fall,d3140=d3140,d4150-d4150,d5160=d5160,yd=yd,count=count)
##HURDLE - take notice of the order: first poisson then probit
hr1=hurdle(count~1+hsize+sport+male+educ+goodh+badh+sozh+ft+pt+unemp+age+age2+married+loginc+spring+fall+winter|1+age+married+
male+sport+winter+spring+fall+age2+loginc+hsize+goodh+badh+sozh+unemp+pt+ft+educ,tdata,dist="poisson",zero.dist="binomial", link = "probit")

##start=hr1$coefficients
load("btable4.par")
vstart=b
 
##PRINT INFORMATION TO REPLICATE lines 75-95
sink("poisoutMLu.txt")
print("POISNORMMLU.R - with unobserved heterogeneity")
"Lower limit integrals"
imin
"Upper limit integrals"
imax
"Results hurdle estimation:"
summary(hr1)
print("starting values")
vstart
print("zeros-ones selection")
table(yd)
print("count")
table(count)
dm=(count >= maxc)
print("maximum count")
maxc
print("number of censored observations")
sum(dm)
sink()

## ML: SPECIFY LOGLIKELIHOOD lines 97-178
mlf=function(b){
bv=b[1:m1]
cv=b[(m1+1):(m1+m2)]
sig=b[k-1]
rho=b[k]
rho=max(-0.999,rho)
rho=min(0.999,rho)
d=yxmat[,1]
co=yxmat[,2]
xv=yxmat[,3:(m1+2)]
zv=yxmat[,(m1+3):(m1+m2+2)]
dm = (co >= maxc)
co=dm*maxc + (1-dm)*co
zc=zv%*%cv
xb=xv%*%bv

#CUMULATIVE PROBS TRUNCATED COUNT (COUNT = 1,2,....)
pm=array(0,dim=c(n,2))

pc=function(x){
nr=NROW(x)
pp=0
for (j in 1:tel){
pp=pp+exp(-exp(zc[i] + x))*(exp(zc[i] + x)^j)/((1 - exp(-exp(zc[i] + x)))*factorial(j))
}
ppp=pp*dnorm(x/sig)/sig
dpp=is.infinite(ppp)
dpq=is.na(ppp)
for (j in 1:nr) {if ((dpp[j] == TRUE)|(dpq[j] == TRUE)) {ppp[j]=1e+99}}
return(ppp)
}
for (i in 1:n){
if (co[i] > 0){
tel=co[i]
int0=integrate(pc,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
pm[i,1]=int0$value
if (int0$value > 1) pm[i,1]=1e-20 
if (co[i] >= maxc){pm[i,1]=1}}
if (co[i] > 1){
tel=co[i]-1
int0=integrate(pc,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
pm[i,2]=int0$value
if (int0$value > 1) pm[i,2]=1e-20 
}
}
hpm1=cbind(1e-15,pm[,1])
dpm1=(hpm1[,1]>hpm1[,2])
pm[,1]=dpm1*hpm1[,1]+(1-dpm1)*hpm1[,2]
hpm2=cbind(1e-15,pm[,2])
dpm2=(hpm2[,1]>hpm2[,2])
pm[,2]=dpm2*hpm2[,1]+(1-dpm2)*hpm2[,2]
hpm1=cbind(0.9999999999999,pm[,1])
dpm1=(hpm1[,1]<hpm1[,2])
pm[,1]=dpm1*hpm1[,1]+(1-dpm1)*hpm1[,2]
hpm2=cbind(0.9999999999999,pm[,2])
dpm2=(hpm2[,1]<hpm2[,2])
pm[,2]=dpm2*hpm2[,1]+(1-dpm2)*hpm2[,2]
xxb=cbind(-100,xb)
dxb=(xxb[,1]>xxb[,2])
xb=dxb*xxb[,1]+(1-dxb)*xxb[,2]
xxb=cbind(100,xb)
dxb=(xxb[,1]<xxb[,2])
xb=dxb*xxb[,1]+(1-dxb)*xxb[,2]
save(pm,file="pm.mat")

eta1=qnorm(d*pm[,1]+(1-d)*0.5)
etam1=qnorm((1-d1)*d*pm[,2]+(1-d)*0.5+d1*0.5)
pt1=pnorm2d(xb,eta1,rho)
pt2=pnorm2d(xb,etam1,rho)
pt3=1-pnorm(xb)

ddd=((pt1 - pt2) < 1e-15)
pt2=(1-ddd)*pt2 + ddd*(pt2 - 1e-15)
dpt1=(pt1 < 1e-15)
pt1=(1-dpt1)*pt1 + dpt1*(1e-15)
dpt2=(pt2 < 1e-15)
pt2=(1-dpt2)*pt2 + dpt2*(1e-15)

li=d*(pt1-(1-d1)*pt2) + (1-d)*pt3
logl=log(li)
return(logl)
}

##GRADIENT lines 180-373
gr=function(b){
gr1=array(0,dim=c(n,k))
gr2=array(0,dim=c(n,k))
gr3=array(0,dim=c(n,k))
grt=array(0,dim=c(n,k))
grt1a=array(0,dim=c(n,k))
grt1b=array(0,dim=c(n,k))
grt2a=array(0,dim=c(n,k))
grt2b=array(0,dim=c(n,k))
grt3=array(0,dim=c(n,k))
save(b,file="par.vec")
bv=b[1:m1]
cv=b[(m1+1):(m1+m2)]
sig=b[k-1]
rho=b[k]
rho=max(-0.999,rho)
rho=min(0.999,rho)
d=yxmat[,1]
co=yxmat[,2]
xv=yxmat[,3:(m1+2)]
zv=yxmat[,(m1+3):(m1+m2+2)]
dm = (co >= maxc)
co=dm*maxc + (1-dm)*co
zc=zv%*%cv
xb=xv%*%bv

load("pm.mat")

eta1=qnorm(d*pm[,1]+(1-d)*0.5)
etam1=qnorm((1-d1)*d*pm[,2]+(1-d)*0.5+d1*0.5)
pt1=pnorm2d(xb,eta1,rho)
pt2=pnorm2d(xb,etam1,rho)
pt3=1-pnorm(xb)

ddd=((pt1 - pt2) < 1e-15)
pt2=(1-ddd)*pt2 + ddd*(pt2 - 1e-15)
dpt1=(pt1 < 1e-15)
pt1=(1-dpt1)*pt1 + dpt1*(1e-15)
dpt2=(pt2 < 1e-15)
pt2=(1-dpt2)*pt2 + dpt2*(1e-15)

li=d*(pt1-(1-d1)*pt2) + (1-d)*pt3

grb=xv
gret1=array(0,dim=c(n,1))
gret1m=array(0,dim=c(n,1))
grs=array(0,dim=c(n,1))
grsm=array(0,dim=c(n,1))

#DERIVATIVES WITH RESPECT TO GAMMA
gc=function(x){
grp=0
pp1=exp(-exp(zc[i] + x))*(exp(zc[i] + x))/((1 - exp(-exp(zc[i] + x))))
for (j in 1:tel){
pp=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^j)/((1 - exp(-exp(zc[i] + x)))*factorial(j))
grp=grp+pp*(j-exp(zc[i] + x)-pp1)
}
gpp=grp*dnorm(x/sig)/sig
return(gpp)
}
for (i in 1:n){
if (co[i] > 0){
tel=co[i]
int1=integrate(gc,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
gret1[i]=int1$value
if (co[i] >= maxc){gret1[i]=0}}
if (co[i] > 1){
tel=co[i]-1
int1=integrate(gc,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
gret1m[i]=int1$value
}
}
##DERIVATIVE WITH RESPECT TO SIG
ps=function(x){
pp=0
for (j in 1:tel){
pp=pp+exp(-exp(zc[i] + x))*(exp(zc[i] + x)^j)/((1 - exp(-exp(zc[i] + x)))*factorial(j))
}
pg=(((x/sig)^2) - 1)*dnorm(x/sig)/(sig^2)
pgg=pp*pg
return(pgg)
}
for (i in 1:n){
if (co[i] > 0){
tel=co[i]
int2=integrate(ps,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
grs[i]=int2$value
if (co[i] >= maxc){grs[i]=0}}
if (co[i] > 1){
tel=co[i]-1
int2=integrate(ps,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
grsm[i]=int2$value
}
}

grt1a[,1:m1]=grb
grt2a[,1:m1]=grb
dd1=dnorm(eta1)
dd2=dnorm(etam1)
for (j in 1:m2){
grt1b[,m1+j]=gret1*zv[,j]/dd1
grt1b[,k-1]=grs/dd1
grt2b[,m1+j]=gret1m*zv[,j]/dd2
grt2b[,k-1]=grsm/dd2
}
grt3[,1:m1]=xv

##FIRST PART OF LI
gpi1=array(0,dim=c(n,3))
arg1=xb
arg2=eta1
arg3=rho
c=1/(1 - (arg3^2))
cmin=sqrt(1/c)
cs=sqrt(c)
arg11=(arg2 - arg3*arg1)/cmin
gpi1[,1]=pnorm(arg11)*dnorm(arg1)
arg22=(arg1 - arg3*arg2)/cmin
gpi1[,2]=pnorm(arg22)*dnorm(arg2)
narg1=-arg1
narg2=-arg2
lp=pnorm2d(narg1,narg2,arg3)-pnorm(narg1)-pnorm(narg2)+1
hlp=cbind(1e-15,lp)
dlp=(hlp[,1]>hlp[,2])
lp=(1-dlp)*lp + dlp*(1e-15)
zh=dnorm(narg1)
zk=dnorm(narg2)
arga=(narg2-arg3*(narg1^2))*cs
argb=(narg1-arg3*(narg2^2))*cs
phia=1-pnorm(arga)
phib=1-pnorm(argb)
zhk=exp(-0.5*((narg1^2)-(2*narg1*narg2*arg3)+(narg2^2))*c)
zhk=zhk*cs/(2*pi)
m20=(narg1*zh*phia)+((arg3^2)*zk*narg2*phib)+(arg3*zhk/c)
m20=(m20/lp)+1
m02=(narg2*zk*phib)+((arg3^2)*zh*narg1*phia)+(arg3*zhk/c)
m02=(m02/lp)+1
m11=(((narg1*zh*phia)+(narg2*zk*phib)+lp)*arg3) + (zhk/c)
m11=m11/lp
intg=(arg3*m20)-(((arg3^2)+1)*m11)+(arg3*m02)
intg=intg*pt1
gpi1[,3]=arg3*c*pt1-((c^2)*intg)

##SECOND PART OF LI
gpi2=array(0,dim=c(n,3))
arg1=xb
arg2=etam1
arg3=rho
c=1/(1 - (arg3^2))
cmin=sqrt(1/c)
cs=sqrt(c)
arg11=(arg2 - arg3*arg1)/cmin
gpi2[,1]=pnorm(arg11)*dnorm(arg1)
arg22=(arg1 - arg3*arg2)/cmin
gpi2[,2]=pnorm(arg22)*dnorm(arg2)
narg1=-arg1
narg2=-arg2
lp=pnorm2d(narg1,narg2,arg3)-pnorm(narg1)-pnorm(narg2)+1
hlp=cbind(1e-15,lp)
dlp=(hlp[,1]>hlp[,2])
lp=(1-dlp)*lp + dlp*(1e-15)
zh=dnorm(narg1)
zk=dnorm(narg2)
arga=(narg2-arg3*(narg1^2))*cs
argb=(narg1-arg3*(narg2^2))*cs
phia=1-pnorm(arga)
phib=1-pnorm(argb)
zhk=exp(-0.5*((narg1^2)-(2*narg1*narg2*arg3)+(narg2^2))*c)
zhk=zhk*cs/(2*pi)
m20=(narg1*zh*phia)+((arg3^2)*zk*narg2*phib)+(arg3*zhk/c)
m20=(m20/lp)+1
m02=(narg2*zk*phib)+((arg3^2)*zh*narg1*phia)+(arg3*zhk/c)
m02=(m02/lp)+1
m11=(((narg1*zh*phia)+(narg2*zk*phib)+lp)*arg3) + (zhk/c)
m11=m11/lp
intg=(arg3*m20)-(((arg3^2)+1)*m11)+(arg3*m02)
intg=intg*pt2
gpi2[,3]=arg3*c*pt2-((c^2)*intg)

##THIRD PART OF LI
gpi3=-dnorm(xb)

grho=array(0,dim=c(1,k))
grho[,k]=1
for (j in 1:k){
gr1[,j]=gpi1[,1]*grt1a[,j] + gpi1[,2]*grt1b[,j] + gpi1[,3]*grho[,j]
gr2[,j]=gpi2[,1]*grt2a[,j] + gpi2[,2]*grt2b[,j] + gpi2[,3]*grho[,j]
gr3[,j]=gpi3*grt3[,j]
grt[,j]=(d*(gr1[,j]-(1-d1)*gr2[,j])+(1-d)*gr3[,j])/li
}
grad=grt 
return(grad)
}

##MAIN PROGRAM lines 375-387
##maximization loglik
##sink("poisoutMLu.txt",a=T)
##load("par.vec")
##vstart=b
##compareDerivatives(mlf,gr,hess=NULL,vstart,print=TRUE)
mles = maxLik(mlf,grad=gr,hess=NULL,vstart,method="BFGS",print.level=2)
sink("poisoutMLu.txt",a=T)
summary(mles)
sink()
par=mles$estimate
b=par
save(b,file="btable5.par")
hesst5=mles$hessian
save(hesst5,file="hesst5.par")
if (5*par[(k-1)] > imax) {
sink("poissonMLu.txt",a=T)
"INCREASE LIMMAX AND LIMMIN, INTEGRATION NOT PRECISE ENOUGH" 
sink()}

vstart=b
mles = maxLik(mlf,grad=gr,hess=NULL,vstart,method="BHHH",print.level=2)
sink("poisoutMLu.txt",a=T)
summary(mles)
sink()













