# ------------------------------------------------------------------------------
# Zichen Deng
# Date: 30 Sep 2021

# Clear everything--------------------------------------------------------------
rm(list=ls())

# Set Working Directory---------------------------------------------------------
setwd("dl-files/Simulation")

# Load Packages-----------------------------------------------------------------
library("plyr")
library("MatchIt")
library("truncnorm")
library("trust")
library("xtable")

# Load estimators---------------------------------------------------------------
tsiv <- function(data){
  
  fs <- lm(D ~ Z , data=subset(data, R==0))
  rd <- lm(Y ~ Z, data=subset(data, R==1))
  coef <- rd$coefficients[2][[1]]/fs$coefficients[2][[1]]
  
  return(coef)
}

tstsls <- function(data){
  
  fs <- lm(D ~ Z, data=subset(data, R==0))
  data$Dhat <- predict(fs, data)
  rd <- lm(Y ~ Dhat - 1, data=subset(data, R==1))
  coef <- rd$coefficients[1][[1]]
  
  return(coef)
  
}

# Borrow from XXX
source("pscore.cal.R")
source("loss.ps.IPT.R")
source("loss.ps.cal.R")

ipt <- function(data){
  
  # D as vector
  D <- as.vector(data$R)
  # Sample size
  n <- length(D)
  # Add constant to covariate vector
  int.cov <- as.matrix(rep(1,n))
  if (!is.null(covariates)){
    if(all(as.matrix(covariates)[,1]==rep(1,n))){
      int.cov <- as.matrix(covariates)
    } else {
      int.cov <- as.matrix(cbind(1, covariates))
    }
  }
  
  # Weights
  i.weights <- as.vector(rep(1, n))
  pscore.ipt <- pscore.cal(D, int.cov, i.weights = i.weights, n = n)
  
  w1 <- pscore.ipt$pscore
  w2 <- data$X
  
  n1 <- size
  n0 <- size
  n <- n1 + n0
  t=c(rep(1,n1),rep(0,n0))
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  
  mu_1 = apply(as.matrix(Y[t==1]*Z[t==1]),2,sum)/n*(n/n1)
  mu_3 = apply(as.matrix(w1[t==0]*D[t==0]*Z[t==0]/((1-w1)[t==0])),2,sum)/sum(w1[t==0]/((1-w1)[t==0]))
  coef = mu_1/mu_3
  
  return(coef)
  
}

twostep <- function(data){
  
  m.out <- matchit(R~X, method="nearest", replace=TRUE, data = data, caliper = .2)
  m.data <- match.data(m.out)
  
  fs <- lm(D ~ Z, data=subset(m.data, R==0))
  rd <- lm(Y ~ Z, data=subset(m.data, R==1))
  coef <- rd$coefficients[2][[1]]/fs$coefficients[2][[1]]
  
  return(coef)
  
}

# Load function to simulate data------------------------------------------------
simu_data <- function(theta, omega){
  
  #-----------------------------------------------------------------------------
  size <- 2500
  
  # primary population
  X <- rtruncnorm(size, 0, 2, 1.5, omega)
  
  U <- rnorm(size, 0, 1)
  Z <- rnorm(size, 0, 1)
  D <- 0.5*Z+theta*Z*X+U+rnorm(size, 0, 1)
  
  M <- 0.5*D+U
  Y <- rnorm(size, 0, 1) + M
  
  T1 <- X
  R <- rep(1, size)
  p <- as.data.frame(cbind(Y,D,Z,X,R,T1))
  
  # auxiliary population
  X <- rtruncnorm(size, 0, 2, 0.5, omega)
  
  U <- rnorm(size, 0, 1)
  Z <- rnorm(size, 0, 1)
  D <- 0.5*Z+theta*Z*X+U+rnorm(size, 0, 1)
  
  M <- 0.5*D+U
  Y <- M + rnorm(size, 0, 1)
  
  T1 <- X
  R <- rep(0, size)
  a <- as.data.frame(cbind(Y,D,Z,X,R,T1))
  
  data <- rbind(p, a)
  
  data$XX <- data$X*data$X
  
  
  return(data)
  
}

# Simulation-----------------------------------------------------------------
set.seed(2021)
M <- 1000

est1 <- c(1:M)
est2 <- c(1:M)
est3 <- c(1:M)
est4 <- c(1:M)

for (i in c(1:M)) {
  
  size <- 2500
  data <- simu_data(0, 1)
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  covariates<- as.matrix(cbind(data$X))
  
  est1[i] <- ipt(data)
  est2[i] <- tsiv(data)
  est3[i] <- tstsls(data)
  est4[i] <- twostep(data)
  
}
estimates1 <- as.data.frame(cbind(est2, est3, est1, est4))
names(estimates1) <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")

for (i in c(1:M)) {
  
  size <- 2500
  data <- simu_data(0.3, 1)
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  covariates<- as.matrix(cbind(data$X))
  
  est1[i] <- ipt(data)
  est2[i] <- tsiv(data)
  est3[i] <- tstsls(data)
  est4[i] <- twostep(data)
  
}
estimates2 <- as.data.frame(cbind(est2, est3, est1, est4))
names(estimates2) <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")

for (i in c(1:M)) {
  
  size <- 2500
  data <- simu_data(0, 0.35)
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  covariates<- as.matrix(cbind(data$X))
  
  est1[i] <- ipt(data)
  est2[i] <- tsiv(data)
  est3[i] <- tstsls(data)
  est4[i] <- twostep(data)
  
}
estimates3 <- as.data.frame(cbind(est2, est3, est1, est4))
names(estimates3) <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")

for (i in c(1:M)) {
  
  size <- 2500
  data <- simu_data(0.3, 0.35)
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  covariates<- as.matrix(cbind(data$X))
  
  est1[i] <- ipt(data)
  est2[i] <- tsiv(data)
  est3[i] <- tstsls(data)
  est4[i] <- twostep(data)
  
}
estimates4 <- as.data.frame(cbind(est2, est3, est1, est4))
names(estimates4) <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")

for (i in c(1:M)) {
  
  size <- 2500
  data <- simu_data(0.1, 0.35)
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  covariates<- as.matrix(cbind(data$X))
  
  est1[i] <- ipt(data)
  est2[i] <- tsiv(data)
  est3[i] <- tstsls(data)
  est4[i] <- twostep(data)
  
}
estimates5 <- as.data.frame(cbind(est2, est3, est1, est4))
names(estimates5) <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")

for (i in c(1:M)) {
  
  size <- 2500
  data <- simu_data(0.2, 0.35)
  
  X <- data$X
  Y <- data$Y
  D <- data$D
  Z <- data$Z
  covariates<- as.matrix(cbind(data$X))
  
  est1[i] <- ipt(data)
  est2[i] <- tsiv(data)
  est3[i] <- tstsls(data)
  est4[i] <- twostep(data)
  
}
estimates6 <- as.data.frame(cbind(est2, est3, est1, est4))
names(estimates6) <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")

# summary descriptive
summarycols <- c("TSIV","TSTSLS","IPT","TWO-STEP-TSTSLS")
N <- function(x) length(x)
Bias <- function(x) unname(mean(x-0.5))
SD <- function(x) unname(sd(x))
RMSE <- function(x) unname(sqrt(mean((x-0.5)^2)))

(summarystats1 <- t(sapply(estimates1[,summarycols], each(N,Bias,SD,RMSE))))
(summarystats2 <- t(sapply(estimates2[,summarycols], each(N,Bias,SD,RMSE))))
(summarystats3 <- t(sapply(estimates3[,summarycols], each(N,Bias,SD,RMSE))))
(summarystats4 <- t(sapply(estimates4[,summarycols], each(N,Bias,SD,RMSE))))
(summarystats5 <- t(sapply(estimates5[,summarycols], each(N,Bias,SD,RMSE))))
(summarystats6 <- t(sapply(estimates6[,summarycols], each(N,Bias,SD,RMSE))))

# Produce Table A1 A2-----------------------------------------------------------
# Table A1 - Scenario 1
summarytable1 <- xtable(summarystats1,
                       digits=3,
                       align=c("Y",rep("r",4)),
                       display=c("s","d","f","f","f"))

# Table A1 - Scenario 2
summarytable2 <- xtable(summarystats2,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

# Table A1 - Scenario 3
summarytable3 <- xtable(summarystats3,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

# Table A1 - Scenario 4
summarytable4 <- xtable(summarystats4,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

# Table A2 - Scenario 1
summarytable5 <- xtable(summarystats3,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

# Table A2 - Scenario 2
summarytable6 <- xtable(summarystats5,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

# Table A2 - Scenario 3
summarytable7 <- xtable(summarystats6,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

# Table A2 - Scenario 4
summarytable8 <- xtable(summarystats4,
                        digits=3,
                        align=c("Y",rep("r",4)),
                        display=c("s","d","f","f","f"))

print(summarytable1,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable2,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable3,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable4,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable5,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable6,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable7,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)

print(summarytable8,
      tabular.environment="tabularx",
      only.contents=T,
      comment=F,
      booktabs=T,
      include.colnames=F,
      hline.after=NULL)
      

