## Accompanying materials to the Online Appendix to
## "Private returns to R\&D in the presence of spillovers, revisited"
## Forthcoming, Journal of Applied Econometrics
## Giovanni Millo, August 1st 2018

## The dineof() function is part of the 'sinkr' package
## "A collection of functions with emphasis on multivariate methods and handling of
## geographic datasets" (https://github.com/marchtaylor/sinkr)
##
## One version is added to the materials because, unlike other software referenced in the
## paper, it is not available from the standard source CRAN. It can be installed according
## to the instructions below:
##
## To load (using devtools):
## library(devtools)
## install_github("marchtaylor/sinkr")
## to cite:
## citation("sinkr")
##
## See also:
## http://menugget.blogspot.it/2012/10/dineof-data-interpolating-empirical.html

dineof <- function(Xo, n.max=NULL, ref.pos=NULL, delta.rms=1e-5){

 library(irlba)

 if(is.null(n.max)){
  n.max=dim(Xo)[2]
 }

 na.true <- which(is.na(Xo))
 na.false <- which(!is.na(Xo))
 if(is.null(ref.pos)) ref.pos <- sample(na.false, max(30, 0.01*length(na.false)))

 Xa <- replace(Xo, c(ref.pos, na.true), 0)
 rms.prev <- Inf
 rms.now <- sqrt(mean((Xa[ref.pos] - Xo[ref.pos])^2))
 n.eof <- 1
 RMS <- rms.now
 NEOF <- n.eof
 Xa.best <- Xa
 n.eof.best <- n.eof
 while(rms.prev - rms.now > delta.rms & n.max > n.eof){ #loop for increasing number of eofs
  while(rms.prev - rms.now > delta.rms){ #loop for replacement
   rms.prev <- rms.now
   SVDi <- irlba(Xa, nu=n.eof, nv=n.eof)
   RECi <- as.matrix(SVDi$u[,seq(n.eof)]) %*% as.matrix(diag(SVDi$d[seq(n.eof)], n.eof, n.eof)) %*% t(as.matrix(SVDi$v[,seq(n.eof)]))
   Xa[c(ref.pos, na.true)] <- RECi[c(ref.pos, na.true)]
   rms.now <- sqrt(mean((Xa[ref.pos] - Xo[ref.pos])^2))
   print(paste(n.eof, "EOF", "; RMS =", round(rms.now, 8)))
   RMS <- c(RMS, rms.now)
   NEOF <- c(NEOF, n.eof)
   gc()
   if(rms.now == min(RMS)) {
    Xa.best <- Xa
    n.eof.best <- n.eof
   }
  }
  n.eof <- n.eof + 1
  rms.prev <- rms.now
  SVDi <- irlba(Xa, nu=n.eof, nv=n.eof)
  RECi <- as.matrix(SVDi$u[,seq(n.eof)]) %*% as.matrix(diag(SVDi$d[seq(n.eof)], n.eof, n.eof)) %*% t(as.matrix(SVDi$v[,seq(n.eof)]))
  Xa[c(ref.pos, na.true)] <- RECi[c(ref.pos, na.true)]
  rms.now <- sqrt(mean((Xa[ref.pos] - Xo[ref.pos])^2))
  print(paste(n.eof, "EOF", "; RMS =", round(rms.now, 8)))
  RMS <- c(RMS, rms.now)
  NEOF <- c(NEOF, n.eof)
  gc()
  if(rms.now == min(RMS)) {
   Xa.best <- Xa
   n.eof.best <- n.eof
  }
 }

 Xa <- Xa.best
 n.eof <- n.eof.best
 rm(list=c("Xa.best", "n.eof.best", "SVDi", "RECi"))

 Xa[ref.pos] <- Xo[ref.pos]

 RESULT <- list(
  Xa=Xa, n.eof=n.eof, RMS=RMS, NEOF=NEOF
 )

 RESULT
}
