

source("rrp.dist.R")
source("mahala.dist.R") # calculates the mahalanobis distance
source("nnk.att.R") # calculates NN1, NN4, NN16 estimators with adjusted versions
source("rrp.att.R") # calculates the simple and the weighted RRP ATT estimator
source("optm.att.R") # calculates the optimal match estimator

# uncomment to test DW data
load("all.rda")

tsubjects <- which(ST$treated==1) # change dataset here
csubjects <- which(ST$treated==0) #
dati <- ST[,-c(1,9)]              #
dati$u74 <- factor(dati$re74==0)
dati$u75 <- factor(dati$re75==0)
outcome <- ST$re78                #
fname <- "STuu"                     #

MySplit <- 5
MyCheck <- FALSE
MyRep <- 1500
MyCut <- 20

nt <- length(tsubjects)
nc <- length(csubjects)
n <- nt+nc
treated <- logical(n)
treated[tsubjects] <- TRUE

# we move treated first and controls last for efficiency reasons
dati <- dati[c(tsubjects,csubjects),]
treated <- treated[c(tsubjects,csubjects)]
outcome <- outcome[c(tsubjects,csubjects)]


tsubjects.orig <- tsubjects
csubjects.orig <- csubjects
tsubjects <- 1:nt
csubjects <- (nt+1):n

dati$black <- factor(dati$black)
dati$hispanic <- factor(dati$hispanic)
dati$nodegree <- factor(dati$nodegree)
dati$married <- factor(dati$married)


dati.num <- dati
for(i in 1:dim(dati)[2])
 dati.num[,i] <- as.numeric(dati[,i])
  
MAH <- mahala.dist(dati.num)



pdf(file=paste(fname,".pdf",sep=""))
resume <- NULL

set.seed(123)
  D <- rrp.dist(dati, treated, msplit=MySplit,Rep=MyRep, cut.in=MyCut, check.bal=MyCheck)
save(D, file=paste(fname,"_D.rda",sep=""))

	lambda <- seq(0,1,length=20)
	p <- 1-as.matrix(D)
	nl <- length(lambda)
	xxx <- numeric(nl)
	for(l in 1:nl)
		xxx[l] <- sum(apply(p[tsubjects,csubjects],1,function(x) (length(which(x>=lambda[l]))>0))) 

	
P2 <- 1-D
P2[which(P2>0)] <- 1
p2 <- as.matrix(P2)
p2 <- p2[tsubjects,]



par(mar=c(2,2,1,1))
image(t(p2),col=c("white","black"),axes=F)
axis(1, c(0.5*nt/n,nt/n+0.5*nc/n), c("treated","controls"))
axis(2, 0.5, "treated")
abline(v=nt/n,lwd=1,col="gray")
box()
par(mar=c(5,4,1,1))
plot(lambda,xxx/nt,type="l",xlab=expression(lambda), ylab=expression(S(lambda)),ylim=c(0,1))
dev.off()
  cat("\n")
  
1+min(which(abs(xxx[3:20]/xxx[2:19]-1)<0.001)) -> li

thr <- lambda[li]
tsubjects[which(apply(p[tsubjects,csubjects],1,function(x) length(which(x>=thr)))>0)] -> idxT
csubjects[unique(unlist(apply(p[tsubjects,csubjects],1,function(x) which(x>=thr))))] -> idxC
cat(paste("empirical threshold lambda=",thr,"\n"))
cat(paste("length(idxT)=",length(idxT),"\n"))
cat(paste("length(idxC)=",length(idxC),"\n"))
  
print("Best: Treated vs Controls")
print(summary(dati[idxT,]))
print(summary(dati[idxC,]))

tsubjects[ - match(idxT,tsubjects)] -> idxt
csubjects[ - match(idxC,csubjects)] -> idxc

print("Less favorable: Treated vs Controls")
print(summary(dati[idxt,]))
print(summary(dati[idxc,]))


  
RAW <- mean(outcome[tsubjects]) - mean(outcome[csubjects])  

tmp.dati <- cbind(outcome, dati)
adj.model <- rpart(outcome~., data=tmp.dati[-treated,], method="anova")             
pred.ocome <- as.vector(predict(adj.model, dati))  # this is the estimator of mu_0^C(X) = E(Y^C|X)

tmp <- NULL
which(D==1) -> prohib
A <- D
A[prohib] <- Inf

opt.match(A, treated, outcome, pred.ocome) -> rrp.out
RRP.Delta <- rrp.out[[1]]
RRP.ATT <- rrp.out[[2]]
RRP.ATT.ADJ <- rrp.out[[3]]
rrp.alpha <- rrp.out[[4]]
rrp.beta <- rrp.out[[5]]

opt.match(MAH, treated, outcome, pred.ocome) -> mah.out
Full.Delta <- mah.out[[1]]
Full.ATT <- mah.out[[2]]
Full.ATT.ADJ <- mah.out[[3]]
mah.alpha <- mah.out[[4]]
mah.beta <- mah.out[[5]]

B <- MAH
B[prohib] <- Inf
opt.match(B, treated, outcome, pred.ocome) -> mah2.out
Full2.Delta <- mah2.out[[1]]
Full2.ATT <- mah2.out[[2]]
Full2.ATT.ADJ <- mah2.out[[3]]
mah2.alpha <- mah2.out[[4]]
mah2.beta <- mah2.out[[5]]

rrp.att(D, treated, outcome, pred.ocome) -> atts
att <- atts[[1]]
att.adj <- atts[[2]]
w.att <- atts[[3]]
w.att.adj <- atts[[4]]

sel <- c(idxT,idxC)
if(length(sel)>0){
	nt.sel <- length(idxT)
	nc.sel <- length(idxC)
	t.sel <- logical(nt.sel+nc.sel)
	t.sel[1:nt.sel] <- T
	rrp.att(as.matrix(D)[sel,sel], t.sel, outcome[sel], pred.ocome[sel]) -> atts.sel
	att.sel <- atts.sel[[1]]
	att.adj.sel <- atts.sel[[2]]
	w.att.sel <- atts.sel[[3]]
	w.att.adj.sel <- atts.sel[[4]]
} else {
	att.sel <- NA
	att.adj.sel <- NA
	w.att.sel <- NA
	w.att.adj.sel <- NA
}

nn.att(D, treated, outcome, pred.ocome) -> yy
att.nn1 <- yy[[1]]
att.nn4 <- yy[[2]]
att.nn16 <- yy[[3]]
att.nn1.adj <- yy[[4]]
att.nn4.adj <- yy[[5]]
att.nn16.adj <- yy[[6]]

# mahalanobis NN1/4/16
nn.att(MAH, treated, outcome, pred.ocome) -> yy
att3.nn1 <- yy[[1]]
att3.nn4 <- yy[[2]]
att3.nn16 <- yy[[3]]
att3.nn1.adj <- yy[[4]]
att3.nn4.adj <- yy[[5]]
att3.nn16.adj <- yy[[6]]

	
raw.adj <- mean(outcome[tsubjects]-pred.ocome[tsubjects])-mean(outcome[csubjects]-pred.ocome[csubjects])

   
tmp <- rbind(tmp, c(RAW, raw.adj, att, att.adj, w.att, w.att.adj, RRP.ATT, RRP.ATT.ADJ, Full.ATT, 
Full.ATT.ADJ, Full2.ATT, Full2.ATT.ADJ,att.nn1, att.nn4, att.nn16, att3.nn1, att3.nn4, att3.nn16,
att.nn1.adj, att.nn4.adj, att.nn16.adj, att3.nn1.adj, att3.nn4.adj, att3.nn16.adj, att.sel, att.adj.sel,
w.att.sel,w.att.adj.sel))




 


colnames(tmp) <-c("RAW","RAW.ADJ", "RRP:ATT","RRP:ATT.ADJ", "W.RRP:ATT", "W.RRP:ATT.ADJ", "RRP:FULL", "RRP:FULL.ADJ",
"MAH:FULL","MAH:FULL.ADJ","MAH+RRP:FULL","MAH+RRP:FULL.ADJ", "RRP:NN1","RRP:NN4","RRP:NN16",
"MAH:NN1","MAH:NN4","MAH:NN16","RRP:NN1.ADJ","RRP:NN4.ADJ","RRP:NN16.ADJ","MAH:NN1.ADJ","MAH:NN4.ADJ","MAH:NN16.ADJ",
"RRP.SEL:ATT","RRP.SEL:ATT.ADJ","W.RRP.SEL:ATT","W.RRP.SEL:ATT.ADJ")
print(apply(tmp,2,mean))

save.image(file=paste(fname,".rda",sep=""))
#dev.off()
