## $Id: vc_discrete.R,v 1.1 2011/06/07 14:34:23 jracine Exp jracine $

## This file replicates the illustration in the paperd "Categorical
## Semiparametric Varying Coefficient Models", written with Qi Li and
## Desheng Ouyang. It uses the R package `np' that contains the data
## illustration, the np functions npscoefbw() and npscoef(). This file
## was created by Jeffrey S. Racine (racinej@mcmaster.ca).

rm(list=ls())

## Note - you must have the R packages `np' and `xtable' installed on
## your system.

library(np)
library(xtable)
## You can uncomment this line if you prefer to not have progress
## reported in the output file
#options(np.messages=FALSE)
data(wage1)

## Note: nonwhite, female, married already factors, cast numdep as an
## ordered factor

wage1$numdep <- ordered(wage1$numdep)

## Bandwidth selection and smooth coefficient model. Note that, to
## avoid the presence of local minima, we restart bandwidth search 100
## times (very aggressive and time-consuming)

bw <- npscoefbw(lwage~educ+exper+tenure+tenursq+expersq|
                nonwhite+female+married+numdep,
                data=wage1,
                ukertype="liracine",
                okertype="liracine",
                nmulti=100)

## Since we compare married/unmarried, consider two cases, 1) numdep=0
## and 2) numdep=2 (i.e. no kids, two kids). We create `evaluation'
## data sets for a number of cases in order to retrieve the
## coefficients for these cases (e.g. Male/Married/0 kids).

wage1.eval.male.married <- data.frame(educ=uocquantile(wage1$educ,0.5),
                                      exper=uocquantile(wage1$exper,0.5),
                                      tenure=uocquantile(wage1$tenure,0.5),
                                      tenursq=uocquantile(wage1$tenursq,0.5),
                                      expersq=uocquantile(wage1$expersq,0.5),
                                      nonwhite=uocquantile(wage1$nonwhite,0.5),
                                      female=factor("Male",levels=levels(wage1$female)),
                                      married=factor("Married",levels=levels(wage1$married)),
                                      numdep=ordered(0,levels=levels(wage1$numdep)))

wage1.eval.male.notmarried <- data.frame(educ=uocquantile(wage1$educ,0.5),
                                         exper=uocquantile(wage1$exper,0.5),
                                         tenure=uocquantile(wage1$tenure,0.5),
                                         tenursq=uocquantile(wage1$tenursq,0.5),
                                         expersq=uocquantile(wage1$expersq,0.5),
                                         nonwhite=uocquantile(wage1$nonwhite,0.5),
                                         female=factor("Male",levels=levels(wage1$female)),
                                         married=factor("Notmarried",levels=levels(wage1$married)),
                                         numdep=ordered(0,levels=levels(wage1$numdep)))

wage1.eval.female.married <- data.frame(educ=uocquantile(wage1$educ,0.5),
                                        exper=uocquantile(wage1$exper,0.5),
                                        tenure=uocquantile(wage1$tenure,0.5),
                                        tenursq=uocquantile(wage1$tenursq,0.5),
                                        expersq=uocquantile(wage1$expersq,0.5),
                                        nonwhite=uocquantile(wage1$nonwhite,0.5),
                                        female=factor("Female",levels=levels(wage1$female)),
                                        married=factor("Married",levels=levels(wage1$married)),
                                        numdep=ordered(0,levels=levels(wage1$numdep)))

wage1.eval.female.notmarried <- data.frame(educ=uocquantile(wage1$educ,0.5),
                                           exper=uocquantile(wage1$exper,0.5),
                                           tenure=uocquantile(wage1$tenure,0.5),
                                           tenursq=uocquantile(wage1$tenursq,0.5),
                                           expersq=uocquantile(wage1$expersq,0.5),
                                           nonwhite=uocquantile(wage1$nonwhite,0.5),
                                           female=factor("Female",levels=levels(wage1$female)),
                                           married=factor("Notmarried",levels=levels(wage1$married)),
                                           numdep=ordered(0,levels=levels(wage1$numdep)))

## Fit the model

model <- npscoef(bws=bw,betas=TRUE)

## Now grab the coefficients in table form for the following setting
## numdep=0.

factor.fit <- c("Male/Married","Female/Married","Male/Notmarried","Female/Notmarried")

model.fit <- c(
predict(model,newdata=wage1.eval.male.married),
predict(model,newdata=wage1.eval.female.married),
predict(model,newdata=wage1.eval.male.notmarried),
predict(model,newdata=wage1.eval.female.notmarried))

## Coefficients for subsets... note nonwhite is smoothed out... don't care about intercept

model.coef <- rbind(
coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==0),][1,-1],
coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==0),][1,-1],
coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==0),][1,-1],
coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==0),][1,-1])

xtable(cbind(factor.fit,
      formatC(model.fit,digits=4,format="f"),     
      formatC(model.coef,digits=4,format="f")))

## Now grab the coefficients in table form for the following setting
## numdep=2.

## Since we compare married/unmarried, makes sense to have numdep=2

wage1.eval.male.married <- data.frame(educ=uocquantile(wage1$educ,0.5),
                         exper=uocquantile(wage1$exper,0.5),
                         tenure=uocquantile(wage1$tenure,0.5),
                         tenursq=uocquantile(wage1$tenursq,0.5),
                         expersq=uocquantile(wage1$expersq,0.5),
                         nonwhite=uocquantile(wage1$nonwhite,0.5),
                         female=factor("Male",levels=levels(wage1$female)),
                         married=factor("Married",levels=levels(wage1$married)),
                         numdep=ordered(2,levels=levels(wage1$numdep)))

wage1.eval.male.notmarried <- data.frame(educ=uocquantile(wage1$educ,0.5),
                         exper=uocquantile(wage1$exper,0.5),
                         tenure=uocquantile(wage1$tenure,0.5),
                         tenursq=uocquantile(wage1$tenursq,0.5),
                         expersq=uocquantile(wage1$expersq,0.5),
                         nonwhite=uocquantile(wage1$nonwhite,0.5),
                         female=factor("Male",levels=levels(wage1$female)),
                         married=factor("Notmarried",levels=levels(wage1$married)),
                         numdep=ordered(2,levels=levels(wage1$numdep)))

wage1.eval.female.married <- data.frame(educ=uocquantile(wage1$educ,0.5),
                         exper=uocquantile(wage1$exper,0.5),
                         tenure=uocquantile(wage1$tenure,0.5),
                         tenursq=uocquantile(wage1$tenursq,0.5),
                         expersq=uocquantile(wage1$expersq,0.5),
                         nonwhite=uocquantile(wage1$nonwhite,0.5),
                         female=factor("Female",levels=levels(wage1$female)),
                         married=factor("Married",levels=levels(wage1$married)),
                         numdep=ordered(2,levels=levels(wage1$numdep)))

wage1.eval.female.notmarried <- data.frame(educ=uocquantile(wage1$educ,0.5),
                         exper=uocquantile(wage1$exper,0.5),
                         tenure=uocquantile(wage1$tenure,0.5),
                         tenursq=uocquantile(wage1$tenursq,0.5),
                         expersq=uocquantile(wage1$expersq,0.5),
                         nonwhite=uocquantile(wage1$nonwhite,0.5),
                         female=factor("Female",levels=levels(wage1$female)),
                         married=factor("Notmarried",levels=levels(wage1$married)),
                         numdep=ordered(2,levels=levels(wage1$numdep)))

model <- npscoef(bws=bw,betas=TRUE)

factor.fit <- c("Male/Married","Female/Married","Male/Notmarried","Female/Notmarried")

model.fit <- c(
predict(model,newdata=wage1.eval.male.married),
predict(model,newdata=wage1.eval.female.married),
predict(model,newdata=wage1.eval.male.notmarried),
predict(model,newdata=wage1.eval.female.notmarried))

## Coefficients for subsets... note nonwhite is smoothed out... don't care about intercept

model.coef <- rbind(
coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==2),][1,-1],
coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==2),][1,-1],
coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==2),][1,-1],
coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==2),][1,-1])

xtable(cbind(factor.fit,
      formatC(model.fit,digits=4,format="f"),     
      formatC(model.coef,digits=4,format="f")))

## Now bootstrap standard errors...

## We bootstrap the standard errors of the beta coefficients in the
## random coefficient model for specific combinations of interest.

set.seed(42)

num.boot <- 500

model <- npscoef(bws=bw,betas=TRUE)
k <- length(coef(model)[1,-1])
coef.mat.numdep.0.row.1 <- matrix(NA,num.boot,k)
coef.mat.numdep.2.row.1 <- matrix(NA,num.boot,k)

coef.mat.numdep.0.row.2 <- matrix(NA,num.boot,k)
coef.mat.numdep.2.row.2 <- matrix(NA,num.boot,k)

coef.mat.numdep.0.row.3 <- matrix(NA,num.boot,k)
coef.mat.numdep.2.row.3 <- matrix(NA,num.boot,k)

coef.mat.numdep.0.row.4 <- matrix(NA,num.boot,k)
coef.mat.numdep.2.row.4 <- matrix(NA,num.boot,k)

## Coefficients for subsets... note nonwhite is smoothed out... don't
## care about intercept

## numdep=0

wage1.orig <- wage1

for(j in 1:num.boot) {

  print(j)

  ## Easiest way to resample is realize the data is taken from the
  ## environment (the data is not `attached' hence taken from the data
  ## frame wage1) so if we replace wage1 with a resampled version it
  ## will have the desired effect - we naturally first save the
  ## original data then draw iid resamples from this object.
  
  wage1 <- wage1.orig[sample(1:nrow(wage1.orig),replace=T),]
  
  model <- npscoef(bws=bw,betas=TRUE)
  
  model.coef <- rbind(
                      coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==0),,drop=FALSE][1,-1,drop=FALSE],
                      coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==0),,drop=FALSE][1,-1,drop=FALSE],
                      coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==0),,drop=FALSE][1,-1,drop=FALSE],
                      coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==0),,drop=FALSE][1,-1,drop=FALSE])
  
  coef.mat.numdep.0.row.1[j,] <- model.coef[1,]
  coef.mat.numdep.0.row.2[j,] <- model.coef[2,]
  coef.mat.numdep.0.row.3[j,] <- model.coef[3,]
  coef.mat.numdep.0.row.4[j,] <- model.coef[4,]
  
  ## Coefficients for subsets... note nonwhite is smoothed out... don't
  ## care about intercept
  
  ## numdep=2
  
  model.coef <- rbind(
                      coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==2),,drop=FALSE][1,-1,drop=FALSE],
                      coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Married" & wage1$numdep==2),,drop=FALSE][1,-1,drop=FALSE],
                      coef(model)[which(wage1$female=="Male" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==2),,drop=FALSE][1,-1,drop=FALSE],
                      coef(model)[which(wage1$female=="Female" & wage1$nonwhite=="White" &  wage1$married=="Notmarried" & wage1$numdep==2),,drop=FALSE][1,-1,drop=FALSE])
  
  coef.mat.numdep.2.row.1[j,] <- model.coef[1,]
  coef.mat.numdep.2.row.2[j,] <- model.coef[2,]
  coef.mat.numdep.2.row.3[j,] <- model.coef[3,]  
  coef.mat.numdep.2.row.4[j,] <- model.coef[4,]  
  
}

## Compute standard deviations

colsd <- function(data) {
  colsd <- numeric(ncol(data))
  for(i in 1:ncol(data)) {
    colsd[i] <- sd(data[,i])
  }
  return(colsd)
}

## numdep=0

colsd.dat <- rbind(colsd(coef.mat.numdep.0.row.1),
                   colsd(coef.mat.numdep.0.row.2),
                   colsd(coef.mat.numdep.0.row.3),
                   colsd(coef.mat.numdep.0.row.4))

xtable(formatC(colsd.dat,digits=4,format="f"))

## numdep=2

colsd.dat <- rbind(colsd(coef.mat.numdep.2.row.1),
                   colsd(coef.mat.numdep.2.row.2),
                   colsd(coef.mat.numdep.2.row.3),
                   colsd(coef.mat.numdep.2.row.4))

xtable(formatC(colsd.dat,digits=4,format="f"))

