## Delgado and Parmeter
## 7/27/2013
## delgado2@purdue.edu

####################################################################
## Runs the Monte Carlo simulation for Delgado and Parmeter.      ##
##                                                                ##
##    Calculations on Linux cluster                               ##
##    Uses `snow' package                                         ##
##    Uses `harvestr' package to generate L'Ecuyer seeds          ##
##                                                                ##
##    Monte Carlo setup:                                          ##
##        Monte Carlo reps: 1000                                  ##
##        Sample size: panel - 1000 by 3 (id x time)              ##
##                                                                ##
##    Number of cores controlled by `core.number'                 ##
##        Current configuration: 4 cores                          ##
####################################################################


## -------------------------------------------------------------- ##
## Startup - clear memory, load packages, and set parameters      ##
## -------------------------------------------------------------- ##

## Clear memory
rm(list=ls())

## Load packages
library(snow)         # For parallel implementation
library(harvestr)     # For L'Ecuyer seeds

## Basic parameters
M <- 1000             # Monte Carlo replications
n <- 1000             # Number of cross-sections
T <- 3                # Number of time periods
nT <- n*T             # Total pooled sample size

## Number of cores to use in parallel
core.number <- 4


## -------------------------------------------------------------- ##
## Define seeds - uses adapted L'Ecuyer method discussed in text. ##
##                                                                ##
## Specifically, generates 1000 sets of L'Ecuyer seeds to be      ##
## called during Monte Carlo loop to control random numbers for   ##
## each independent calculation.                                  ##
## -------------------------------------------------------------- ##

seed.temp <- gather(M,seed=123)          # Uses 123 as initial seed
Seed <- matrix(nrow=M,ncol=6)            # Empty matrix to hold seeds
for(i in 1:M){                           # Loop to fill seed matrix

  Seed[i,] <- seed.temp[[i]][2:7]

}

## -------------------------------------------------------------- ##
## Define Monte Carlo function.                                   ##
##                                                                ##
## Function implements ONE single Monte Carlo run and returns     ##
## the p-value for that run. Function can be called M times.      ##
##                                                                ##
## This Monte Carlo tests the size of a standard Hausman test     ##
## following Hausman (1978) and Amini et al. (2012).              ##
## -------------------------------------------------------------- ##

mc <- function(M){
  
  seed.run <- Seed[M,]                  # Call row M from seed matrix
  set.seed(seed.run, "L'Ecuyer-CMRG")   # Set seed for run; L'Ecuyer type
  
  ## Library
  library(plm)                          # Package for panel models
  
  ## Generate data
  tid <- rep(c(1:T),n)                                        # Time
  nid <- matrix(t(matrix(rep(c(1:n),T),n,T)),nT,1,byrow=TRUE) # ID
  xit <- runif(nT,-1,1)                                       # Variable
  mui <- runif(n,-1,1)                                        # Fixed effect
  vi <- matrix(t(matrix(mui,n,T)),nT,1,byrow=TRUE)            # Residual
  yit <- 2*xit+vi+rnorm(nT)                                   # Outcome
  
  ## Set data as `panel'  (see `plm' package)
  pdata <- data.frame(y=yit,x=xit,nid=nid,tid=tid)
  pdata <- pdata.frame(pdata,c("nid","tid"))
  
  ## Fixed effects estimation
  model.fe <- plm(y~x,data=pdata,model="within",effect="individual")
  
  ## Random effects estimation
  model.re <- plm(y~x,data=pdata,model="random",effect="individual")
  
  ## Compute Hausman test
  test <- phtest(model.fe,model.re)       # Hausman test
  pval <- test$p.value                    # Pull out p-value
  
  return(pval)                            # Function return p-value
  
}


## -------------------------------------------------------------- ##
## Parallel implementation - uses `parLapply' in `snow'           ##
##                                                                ##
##   Sets up cluster using previously defined number of cores     ##
##   Exports objects to all cores                                 ##
##   Runs parallel computation                                    ##
##   Shuts down cluster when finished                             ##
##                                                                ##
## Uses the parallel version of `lapply' from the `snow'          ##
## package to repeat the Monte Carlo function M times, saving     ##
## the returned p-value from each function evaluation.            ##
## -------------------------------------------------------------- ##

## Initialize cluster; names the parallel environment `cluster'
cluster <- makeCluster(core.number,type="SOCK")   

## Export objects
clusterExport(cluster,c("n","T","nT","Seed"))

## Run Monte Carlo in parallel; record computation time
time <- system.time(solution <- parLapply(cluster,1:M,mc))

## If in sequential mode (core.number=1), use:
##   time <- system.time(solution <- lapply(1:M,mc))

## Shut down parallel process
stopCluster(cluster)

## Check time
time


## -------------------------------------------------------------- ##
## Not run: check Monte Carlo results or check p-values for       ##
## replicability across different computational setups.           ##
## -------------------------------------------------------------- ##

## Put returned p-values into single list
#result <- unlist(solution)

## Calculate the mean p-value
#pval.mean <- mean(result)



## Save workspace
save.image("snow_mc.RData")


## End program.