## Delgado and Parmeter
## 7/27/2013
## delgado2@purdue.edu

####################################################################
## Runs the standard error bootstrap for Delgado and Parmeter.    ##
##                                                                ##
##    Calculations on Linux cluster                               ##
##    Uses `snowfall' package                                     ##
##    Uses `harvestr' package to generate L'Ecuyer seeds          ##
##                                                                ##
##    Bootstrap setup:                                            ##
##        Bootstrap reps: 100,000                                 ##
##        Sample size: 30,000 observations                        ##
##        Bootstrap type: wild bootstrap                          ##
##        Errors: heteroskedastic                                 ##
##                                                                ##
##    Number of cores controlled by `core.number'                 ##
##        Current configuration: 4 cores                          ##
####################################################################


## -------------------------------------------------------------- ##
## Startup - clear memory, load packages, and set parameters      ##
## -------------------------------------------------------------- ##

## Clear memory
rm(list=ls())
set.seed(123)	    # Seed for data generation

## Load packages
library(snowfall)     # For parallel implementation
library(harvestr)     # For L'Ecuyer seeds

## Basic parameters
B <- 100000           # Bootstrap replications
n <- 30000            # Number of observations

## Number of cores to use in parallel
core.number <- 4

## -------------------------------------------------------------- ##
## Define seeds - uses adapted L'Ecuyer method discussed in text. ##
##                                                                ##
## Specifically, generates 1000 sets of L'Ecuyer seeds to be      ##
## called during bootstrap loop to control random numbers for     ##
## each independent calculation.                                  ##
## -------------------------------------------------------------- ##

seed.temp <- gather(B,seed=123)          # Uses 123 as initial seed
Seed <- matrix(nrow=B,ncol=6)            # Empty matrix to hold seeds
for(i in 1:B){                           # Loop to fill seed matrix

  Seed[i,] <- seed.temp[[i]][2:7]

}

## -------------------------------------------------------------- ##
## Generate data:                                                 ##
##                                                                ##
##    5 variables: uniformly distributed over variable ranges     ##
##    Parameter vector: (12,3.6,4,-0.7,1.1,-1)                    ##
##    Standard deviation of errors depends on x                   ##
## -------------------------------------------------------------- ##

## Regressors
x <- cbind(runif(n,2,8),
           runif(n,2,3),
           runif(n,-3,3),
           runif(n,1,2),
           runif(n,2,6))
           
## Linear outcome with heteroskedasticity
y <- 12+3.6*x[,1]+4*x[,2]-0.7*x[,3]+1.1*x[,4]-1*x[,5]+rnorm(n,0,x[,1])

## -------------------------------------------------------------- ##
## Estimate model: regression, residuals, and fitted values       ##
## -------------------------------------------------------------- ##

## Linear regression
lm <- lm(y~x)

## Pull out residuals and fitted values
resid <- residuals(lm)            # Residuals
fit <- fitted(lm)                 # Fitted values


## -------------------------------------------------------------- ##
## Define bootstrap function.                                     ##
##                                                                ##
## Function implements ONE single bootstrap run and returns       ##
## the coefficients for that bootstrap sample run. Function can   ##
## be called B times.                                             ##
##                                                                ##
## This procedure bootstraps standard errors for a standard       ##
## linear regression with heteroskedasticity.                     ##
## -------------------------------------------------------------- ##

boot <- function(B){
  
  seed.run <- Seed[B,]                  # Call row B from seed matrix
  set.seed(seed.run, "L'Ecuyer-CMRG")   # Set seed for run; L'Ecuyer type
  
  ## Generate wild bootstrap residuals
  a <- -0.6180339887499     
  P.a <- 0.72360679774998
  b <- 1.6180339887499

  resid.star <- resid*ifelse(rbinom(length(resid),1,P.a)==1,a,b)
  
  ## Construct bootstrap y
  y.star <- fit + resid.star
  
  ## Run bootstrap regression
  lm.boot <- lm(y.star~x)
  
  ## Return bootstrap coefficients
  return(coefficients(lm.boot))
  
}

## -------------------------------------------------------------- ##
## Parallel implementation - uses `sfLapply' in `snowfall'        ##
##                                                                ##
##   Sets up cluster using previously defined number of cores     ##
##   Exports objects to all cores                                 ##
##   Runs parallel computation                                    ##
##   Shuts down cluster when finished                             ##
##                                                                ##
## Uses the parallel version of `lapply' from the `snowfall'      ##
## package to repeat the bootstrap procedure B times, saving      ##
## the returned bootstrap coefficient estimates from each rep.    ##
## -------------------------------------------------------------- ##

## Initialize cluster
sfInit(parallel=TRUE,cpus=core.number)   

## Export objects
sfExport("n","resid","fit","x","Seed")

## Run bootstrap in parallel; record computation time
time <- system.time(solution <- sfLapply(1:B,boot))

## If in sequential mode (core.number=1), use:
##   time <- system.time(solution <- lapply(1:B,boot))

## Shut down parallel process
sfStop()

## Check time
time


## -------------------------------------------------------------- ##
## Not run: check bootstrap results or check for replicability    ##
## across different computational setups.                         ##
## -------------------------------------------------------------- ##

## Put returned coefficient estimates into single matrix
#result <- matrix(ncol=length(coefficients(lm)),nrow=B)
#for(i in 1:B){

#  result[i,] <- solution[[i]]

#}

## Calculate the standard errors
#se <- apply(result,2,sd)

## Compare estimates and standard errors
#cbind(coefficients(lm),se)



## Save workspace
save.image("snowfall_boot.RData")


## End program.