## Delgado and Parmeter
## 7/27/2013
## delgado2@purdue.edu

####################################################################
## Runs nonlinear least squares for Delgado and Parmeter.         ##
##                                                                ##
##    Calculations on Linux cluster                               ##
##    Uses `snowfall' package                                     ##
##    Uses `harvestr' package to generate L'Ecuyer seeds          ##
##                                                                ##
##    NLS setup:                                                  ##
##        Model setup: Cobb-Douglas                               ##
##        Number of multistarts: 100                              ##
##        Sample size: 2500 observations                          ##
##                                                                ##
##    Number of cores controlled by `core.number'                 ##
##        Current configuration: 4 cores                          ##
####################################################################


## -------------------------------------------------------------- ##
## Startup - clear memory, load packages, and set parameters      ##
## -------------------------------------------------------------- ##

## Clear memory
rm(list=ls())
set.seed(123)	    # Seed for data generation

## Load packages
library(snowfall)     # For parallel implementation
library(harvestr)     # For L'Ecuyer seeds

## Basic parameters
n <- 2500             # Sample size
nmulti <- 100         # Number of multistarts

## Number of cores to use in parallel
core.number <- 4

## -------------------------------------------------------------- ##
## Define seeds - uses adapted L'Ecuyer method discussed in text. ##
##                                                                ##
## Specifically, generates 100 sets of L'Ecuyer seeds to be       ##
## called during optimization loop to control random numbers for  ##
## each independent calculation.                                  ##
## -------------------------------------------------------------- ##

seed.temp <- gather(nmulti,seed=123)       # Uses 123 as initial seed
Seed <- matrix(nrow=nmulti,ncol=6)         # Empty matrix to hold seeds
for(i in 1:nmulti){                        # Loop to fill seed matrix

  Seed[i,] <- seed.temp[[i]][2:7]

}

## -------------------------------------------------------------- ##
## Generate data:                                                 ##
##                                                                ##
##    3 variables: uniformly distributed over variable ranges     ##
##    Parameter vector: (0.8,1.1,0.7,0.2)                         ##
##    Standard normal errors                                      ##
## -------------------------------------------------------------- ##

## Regressors
x <- cbind(runif(n,4,12),
           runif(n,2,5),
           runif(n,6,7))

## Coefficients
b <- c(0.8,1.1,0.7,0.2)     

## Outcome
y <- b[1]*x[,1]^b[2]*x[,2]^b[3]*x[,3]^b[4]+rnorm(n)

## -------------------------------------------------------------- ##
## Define NLS function.                                           ##
##                                                                ##
## Function implements ONE single multistart and returns the      ##
## optimal function value, convergence classification, and        ##
## coefficient estimates for that optimization run. Function can  ##
## be called `nmulti' times.                                      ##
## -------------------------------------------------------------- ##

## Construct function to send to processors
nls <- function(nmulti){

  seed.run <- Seed[nmulti,]             # Call row `nmulti' from seed matrix
  set.seed(seed.run, "L'Ecuyer-CMRG")   # Set seed for run; L'Ecuyer type
  
  ## Define nls function
  model <- function(a){
    
    (1/n)*sum((y - a[1]*x[,1]^a[2]*x[,2]^a[3]*x[,3]^a[4])^2)
    
  }
  
  ## Construct solutions vector
  solve <- numeric(length(b)+2)
  
  ## Optimize and store results
  optim <- optim(runif(length(b)),model,method="Nelder-Mead")
  solve[1] <- optim$value
  solve[2] <- optim$convergence
  solve[3:(length(b)+2)] <- optim$par
  
  ## Return solution
  return(solve)
  
}

## -------------------------------------------------------------- ##
## Parallel implementation - uses `sfLapply' in `snowfall'        ##
##                                                                ##
## Uses previously defined cores for cluster, and shuts down      ##
## cluster when finished.                                         ##
##                                                                ##
## Uses the parallel version of `lapply' from the `snowfall'      ##
## package to repeat the NLS procedure `nmulti' times, saving     ##
## the returned optimization results from each rep.               ##
## -------------------------------------------------------------- ##

## Initialize cluster
sfInit(parallel=TRUE,cpus=core.number)   

## Export objects
sfExport("n","x","y","b","Seed")

## Run NLS in parallel; record computation time
time <- system.time(solution <- sfLapply(1:nmulti,nls))

## If in sequential mode (core.number=1), use:
##   time <- system.time(solution <- lapply(1:nmulti,nls))

## Shut down parallel process
sfStop()

## Check time
time


## -------------------------------------------------------------- ##
## Not run: check NLS results at lowest function value to see     ##
## if the optimization procedure was successful.                  ##
## -------------------------------------------------------------- ##

## Obtain optimal solution with lowest function value
#Results <- sapply(1:nmulti,function(i){cbind(solution[[i]])})
#min <- min(Results[1,])
#b.hat <- Results[3:(length(b)+2),Results[1,]==min]

## Compare actual and estimated coefficients
#cbind(b,b.hat)
 

## Save workspace
save.image("snowfall_nls.RData")


## End program.
