## Prog1.r
## ML-ESTIMATION OF CORRELATED PROBIT AND POISSON MODEL 
## Winkelmann (2004) version
## directory h:\research\hurdlecount
## Hans van Ophem
## January 2009

##INITIALIZATION lines 8-19
##LOAD pscl-PACKAGE (HURDLE)
library(pscl)
library(maxLik)
##NUMBER OF OBSERVATIONS
n=6231

##NUMBER OF EXPLANATORY VARIABLES PROBIT
m1=18   ##17
##NUMBER OF EXPLANATORY VARIABLES POISSON
m2=18   ##13
k=m1+m2+2

##DATA MANIPULATIONS 21-54
dat=load("win99.rdat")
id=win99[1:n,1]
year=win99[1:n,2]
doctco=win99[1:n,3]
age=win99[1:n,4]
male=win99[1:n,5]
educ=win99[1:n,6]
married=win99[1:n,7]
hsize=win99[1:n,8]
sport=win99[1:n,9]
goodh=win99[1:n,10]
badh=win99[1:n,11]
sozh=win99[1:n,12]
loginc=win99[1:n,13]
ft=win99[1:n,14]
pt=win99[1:n,15]
unemp=win99[1:n,16]
winter=win99[1:n,17]
spring=win99[1:n,18]
fall=win99[1:n,19]
age2=(age*age/100) - mean(age/10)
age2=(age/10- mean(age/10))^2
d3140=((age > 30)&(age <= 40))
d4150=((age > 40)&(age <= 50))
d5160=((age > 50)&(age <= 60))
dg55=(age > 55)
yd =(doctco > 0)
count=doctco
const=yd*0+1

##OLD VERSION
##yxmat=cbind(yd,count,const,age,married,male,sport,winter,spring,fall,age2,loginc,hsize,goodh,badh,sozh,unemp,pt,ft,
##const,hsize,sport,male,educ,goodh,badh,sozh,ft,pt,unemp,age,age2)

##NEW VERSION
yxmat=cbind(yd,count,const,age,married,male,sport,winter,spring,fall,age2,loginc,hsize,goodh,badh,sozh,unemp,pt,ft,educ,
const,hsize,sport,male,educ,goodh,badh,sozh,ft,pt,unemp,age,age2,married,loginc,spring,fall,winter)

##ADDITIONAL INITIALIZATION lines 56-59
d1=(count==1)
imax=10
imin=-imax

##START VALUES lines 60-66
tdata=data.frame(age=age,age2=age2,male=male,educ=educ,married=married,sport=sport,hsize=hsize,loginc=loginc,goodh=goodh,badh=badh,sozh=sozh,ft=ft,pt=pt,
unemp=unemp,winter=winter,spring=spring,fall=fall,d3140=d3140,d4150-d4150,d5160=d5160,yd=yd,count=count)
##HURDLE - take notice of the order: first poisson then probit
hr1=hurdle(count~1+hsize+sport+male+educ+goodh+badh+sozh+ft+pt+unemp+age+age2+married+loginc+spring+fall+winter|1+age+married+
male+sport+winter+spring+fall+age2+loginc+hsize+goodh+badh+sozh+unemp+pt+ft+educ,tdata,dist="poisson",zero.dist="binomial", link = "probit")
summary(hr1)
start=hr1$coefficients
vstart=c(start$zero,start$count,0.9,-0.3)

##PRINT INFORMATION TO REPLICATE lines 69-84
sink("Winkelmann.txt")
"Winkelmann.r"
"starting values"
vstart
"zeros-ones selection"
table(yd)
print("count")
table(count)
"Lower limit integrals"
imin
"Upper limit integrals"
imax
"Results hurdle estimation:"
summary(hr1)
sink()

## ML: SPECIFY LOGLIKELIHOOD lines 86-119
mlf=function(b){
bv=b[1:m1]
cv=b[(m1+1):(m1+m2)]
sig=b[(k-1)]
rho=b[k]
d=yxmat[,1]
co=yxmat[,2]
xv=yxmat[,3:(m1+2)]
zv=yxmat[,(m1+3):(m1+m2+2)]
zc=zv%*%cv
xb=xv%*%bv

#TRUNCATED COUNT (COUNT = 1,2,....)
pc=function(x){
pp=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])/((1 - exp(-exp(zc[i] + x)))*factorial(co[i]))
pd=pnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))
ppp=pd*pp*dnorm(x/sig)/sig
dpp=is.infinite(ppp)
dpq=is.na(ppp)
for (j in 1:21) {if ((dpp[j] == TRUE)|(dpq[j] == TRUE)) {ppp[j]=1e+99}}
return(ppp)
}
p1=matrix(0,nrow=n)
for (i in 1:n){
int0=integrate(pc,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
p1[i]=int0$value
if (int0$value > 1) p1[i]=1e-20 
}
p0=1 - pnorm(xb)
li=(1-d)*p0 + d*p1
logl=log(li)
return(logl)
}

##GRADIENT lines 121-204
gr=function(b){
bv=b[1:m1]
cv=b[(m1+1):(m1+m2)]
sig=b[(k-1)]
rho=b[k]
d=yxmat[,1]
co=yxmat[,2]
xv=yxmat[,3:(m1+2)]
zv=yxmat[,(m1+3):(m1+m2+2)]
zc=zv%*%cv
xb=xv%*%bv
save(b,file="par.vec")
#TRUNCATED COUNT (COUNT = 1,2,....)
pcg=function(x){
pp=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])/((1 - exp(-exp(zc[i] + x)))*factorial(co[i]))
pd=pnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))
ppp=pd*pp*dnorm(x/sig)/sig
dpp=is.infinite(ppp)
dpq=is.na(ppp)
for (j in 1:21) {if ((dpp[j] == TRUE)|(dpq[j] == TRUE)) {ppp[j]=1e+99}}
return(ppp)
}
p1=matrix(0,nrow=n)
for (i in 1:n){
int0=integrate(pcg,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
p1[i]=int0$value
if (int0$value > 1) p1[i]=1e-20 
}
p0=1 - pnorm(xb)

gr0=array(0,dim=c(n,k))
gr1=array(0,dim=c(n,k))

gr0[,1:m1]=matrix(-dnorm(xb),ncol=m1,nrow=n)*xv

##gradient p1 with respect to xb (output: n-vector)
prg1=function(x){
pp=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])/((1 - exp(-exp(zc[i] + x)))*factorial(co[i]))
pd=dnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))/sqrt(1-(rho^2))
ppp=pd*pp*dnorm(x/sig)/sig
return(ppp)
}
##gradient p1 with respect to zc (output: n-vector)
prg2=function(x){
f1=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])
f2=((1 - exp(-exp(zc[i] + x)))*factorial(co[i]))
df1=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])*(co[i] - exp(zc[i] + x))
df2=exp(zc[i] + x - exp(zc[i] + x))*factorial(co[i])
pp=(df1*f2 - df2*f1)/(f2^2)
pd=pnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))
ppp=pd*pp*dnorm(x/sig)/sig
return(ppp)
}
##gradient p1 with respect to sig (output: n-vector)
prg3=function(x){
pp=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])/((1 - exp(-exp(zc[i] + x)))*factorial(co[i]))
pd=pnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))
pdg=-dnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))*rho*x/(sqrt(1-(rho^2))*(sig^2))
pg=(((x/sig)^2) - 1)*dnorm(x/sig)/(sig^2)
ppp=pp*((pdg*dnorm(x/sig)/sig) + pd*pg)
return(ppp)
}
##gradient p1 with respect to rho (output: n-vector)
prg4=function(x){
pp=exp(-exp(zc[i] + x))*(exp(zc[i] + x)^co[i])/((1 - exp(-exp(zc[i] + x)))*factorial(co[i]))
pd=dnorm((xb[i] + (rho*x/sig))/(sqrt(1-(rho^2))))*((x/sig) + (rho*(xb[i] + rho*(x/sig))/(1-(rho^2))))/sqrt(1-(rho^2))
ppp=pd*pp*dnorm(x/sig)/sig
return(ppp)
}

for (i in 1:n){
int1=integrate(prg1,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
int2=integrate(prg2,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
int3=integrate(prg3,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
int4=integrate(prg4,imin,imax,rel.tol=1e-13,abs.tol=1e-13,stop.on.error = FALSE)
gr1[i,1:m1]=matrix(int1$value,ncol=m1,nrow=1)*xv[i,]
gr1[i,(m1+1):(m1+m2)]=matrix(int2$value,ncol=m2,nrow=1)*zv[i,]
gr1[i,(k-1)]=int3$value
gr1[i,k]=int4$value}

grad=matrix((1-d),ncol=k,nrow=n)*gr0/matrix(p0,ncol=k,nrow=n) + matrix(d,ncol=k,nrow=n)*gr1/matrix(p1,ncol=k,nrow=n)
return(grad)
}

##MAIN PROGRAM lines 206-220
##maximization loglik
##sink("Winkelmann.txt",a=T)
##compareDerivatives(mlf,gr,hess=NULL,vstart,print=TRUE)
##sink()
mles = maxLik(mlf,grad=gr,hess=NULL,vstart,method="BFGS",print.level=2)
sink("Winkelmann.txt",a=T)
summary(mles)
sink()
par=mles$estimate
b=par
save(b,file="btable4.par")
hesst4=mles$hessian
save(hesst4,file="hesst4.par")

if (5*par[(k-1)] > imax) {
sink("Winkelmann.txt",a=T)
"INCREASE LIMMAX AND LIMMIN, INTEGRATION NOT PRECISE ENOUGH" 
sink()
}













