source("rrp.dist.R")
source("mahala.dist.R") # calculates the mahalanobis distance
source("nnk.att.R") # calculates NN1, NN4, NN16 estimators with adjusted versions
source("rrp.att.R") # calculates the simple and the weighted RRP ATT estimator
source("optm.att.R") # calculates the optimal match estimator


# uncomment to test DW data
load("att.rda")

tsubjects <- which(DW$treated==1) # change dataset here
csubjects <- which(DW$treated==0) #
dati <- DW[,-c(1,9)]              #
outcome <- DW$re78                #

MyRep <- 500
MySplit <- 20
MyCut <- 20
MyCheck <- FALSE
MCSim <- 1000

dname <- "DW"
basename <- sprintf("%s_MC5.5%d_R%3.3d_S%2.2d_C%2.2d_B=%1.1d",dname,MCSim,MyRep, MySplit, MyCut, MyCheck)

pdfname <- paste(basename,"pdf",sep=".")
outname <- paste(basename,"out",sep=".")

ofile <- file(outname, open="wt")

nt <- length(tsubjects)
nc <- length(csubjects)
n <- nt+nc
treated <- logical(n)
treated[tsubjects] <- TRUE

# we move treated first and controls last for efficiency reasons
dati <- dati[c(tsubjects,csubjects),]
treated <- treated[c(tsubjects,csubjects)]
outcome <- outcome[c(tsubjects,csubjects)]


tsubjects.orig <- tsubjects
csubjects.orig <- csubjects
tsubjects <- 1:nt
csubjects <- (nt+1):n

dati$black <- factor(dati$black)
dati$hispanic <- factor(dati$hispanic)
dati$nodegree <- factor(dati$nodegree)
dati$married <- factor(dati$married)


dati.num <- dati
for(i in 1:dim(dati)[2])
 dati.num[,i] <- as.numeric(dati[,i])


propensity  <- glm(treated~ I(age^2) + I(education^2) + black +
                   hispanic + married + nodegree + I(re74^2) + I(re75^2) +
                   I(re74==0) + I(re75==0), family=binomial, data=dati)

M <- cbind(rep(1, n),
           propensity$linear.pred,
           I(log(dati$age)^2),
           I(log(dati$education)^2),
           I(log(dati$re74+0.01)^2),
           I(log(dati$re75+0.01)^2))

propensity.coeffs <- propensity$coeff

#let's create some arbritrary weights
propensity.coeffs <- as.matrix(c(
                                 1+00,  #(Intercept)        
                                 .5,    #linear.pred
                                 .01, #age
                                 -.3,  #educ
                                 -0.01, #I(re74^2)          
                                 0.01   #I(re75^2)          
                                 ))

mu = M %*% propensity.coeffs
Tr.pred <- exp(mu)/(1+exp(mu))
#print(summary(Tr.pred))

TreatmentEffect <- 1000
TreatmentReal <- matrix(nrow=n, ncol=1)

cat(paste("data :",dname,"\n"),file=ofile)


cat(sprintf("Replications:%3.3d\nMin split:%2.2d\nSupport Cut:2.2d\nBalance Check:%1.1d\n",MyRep, MySplit, MyCut, MyCheck),file=ofile)

MAH <- mahala.dist(dati.num)

#pdf(file=pdfname)
resume <- NULL

set.seed(2810192) # same seed used in GenMatch Exp 2
tmp <- NULL

for(MC in 1:MCSim){

	for(i in 1:n)
		TreatmentReal[i] = sample(0:1, 1, prob=c(1-Tr.pred[i],Tr.pred[i]))

    outcome <- I(TreatmentEffect*TreatmentReal) + .1*exp(.7*log(dati$re74+0.01) + .7*log(dati$re75+0.01)) + rnorm(n, 0, 10)
    treated <- TreatmentReal
	tsubjects <- which(TreatmentReal==1)
	csubjects <- which(TreatmentReal==0)
	nt <- length(tsubjects)
	nc <- length(csubjects)

	D <- rrp.dist(dati, treated, msplit=MySplit,Rep=MyRep, cut.in=MyCut, check.bal=MyCheck)
	cat(sprintf("\n%5.5d/%5.5d\n",MC,MCSim))
	lambda <- seq(0,1,length=20)
	p <- 1-as.matrix(D)
	nl <- length(lambda)
	xxx <- numeric(nl)
	for(l in 1:nl)
		xxx[l] <- sum(apply(p[tsubjects,csubjects],1,function(x) (length(which(x>=lambda[l]))>0))) 
		
#	P2 <- 1-D
#	P2[which(P2>0)] <- 1
#	p2 <- as.matrix(P2)
#	p2 <- p2[tsubjects,]
  
	1+min(which(abs(xxx[3:20]/xxx[2:19]-1)<0.001)) -> li

	thr <- lambda[li]
	tsubjects[which(apply(p[tsubjects,csubjects],1,function(x) length(which(x>=thr)))>0)] -> idxT
	csubjects[unique(unlist(apply(p[tsubjects,csubjects],1,function(x) which(x>=thr))))] -> idxC
	#cat(paste("empirical threshold lambda=",thr,"\n"))
	#cat(paste("length(idxT)=",length(idxT),"\n"))
	#cat(paste("length(idxC)=",length(idxC),"\n"))

	RAW <- mean(outcome[tsubjects]) - mean(outcome[csubjects])  

	tmp.dati <- cbind(outcome, dati)
	adj.model <- rpart(outcome~., data=tmp.dati[-treated,], method="anova")             
	pred.ocome <- as.vector(predict(adj.model, dati))  # this is the estimator of mu_0^C(X) = E(Y^C|X)

	which(D==1) -> prohib
	A <- D
	A[prohib] <- Inf

	opt.match(A, treated, outcome, pred.ocome) -> rrp.out
	RRP.Delta <- rrp.out[[1]]
	RRP.ATT <- rrp.out[[2]]
	RRP.ATT.ADJ <- rrp.out[[3]]
	rrp.alpha <- rrp.out[[4]]
	rrp.beta <- rrp.out[[5]]

	opt.match(MAH, treated, outcome, pred.ocome) -> mah.out
	Full.Delta <- mah.out[[1]]
	Full.ATT <- mah.out[[2]]
	Full.ATT.ADJ <- mah.out[[3]]
	mah.alpha <- mah.out[[4]]
	mah.beta <- mah.out[[5]]

	B <- MAH
	B[prohib] <- Inf
	opt.match(B, treated, outcome, pred.ocome) -> mah2.out
	Full2.Delta <- mah2.out[[1]]
	Full2.ATT <- mah2.out[[2]]
	Full2.ATT.ADJ <- mah2.out[[3]]
	mah2.alpha <- mah2.out[[4]]
	mah2.beta <- mah2.out[[5]]

	rrp.att(as.matrix(D), treated, outcome, pred.ocome) -> atts
	att <- atts[[1]]
	att.adj <- atts[[2]]
	w.att <- atts[[3]]
	w.att.adj <- atts[[4]]

	sel <- c(idxT,idxC)
	if(length(sel)>0){
		nt.sel <- length(idxT)
		nc.sel <- length(idxC)
		t.sel <- logical(nt.sel+nc.sel)
		t.sel[1:nt.sel] <- T
		rrp.att(as.matrix(D)[sel,sel], t.sel, outcome[sel], pred.ocome[sel]) -> atts.sel
		att.sel <- atts.sel[[1]]
		att.adj.sel <- atts.sel[[2]]
		w.att.sel <- atts.sel[[3]]
		w.att.adj.sel <- atts.sel[[4]]
	} else {
		att.sel <- NA
		att.adj.sel <- NA
		w.att.sel <- NA
		w.att.adj.sel <- NA
	}

	nn.att(as.matrix(D), treated, outcome, pred.ocome) -> yy
	att.nn1 <- yy[[1]]
	att.nn4 <- yy[[2]]
	att.nn16 <- yy[[3]]
	att.nn1.adj <- yy[[4]]
	att.nn4.adj <- yy[[5]]
	att.nn16.adj <- yy[[6]]

# mahalanobis NN1/4/16
	nn.att(as.matrix(MAH), treated, outcome, pred.ocome) -> yy
	att3.nn1 <- yy[[1]]
	att3.nn4 <- yy[[2]]
	att3.nn16 <- yy[[3]]
	att3.nn1.adj <- yy[[4]]
	att3.nn4.adj <- yy[[5]]
	att3.nn16.adj <- yy[[6]]

	
	raw.adj <- mean(outcome[tsubjects]-pred.ocome[tsubjects])-mean(outcome[csubjects]-pred.ocome[csubjects])

    tmp1 <- c(RAW, raw.adj, att, att.adj, w.att, w.att.adj, RRP.ATT, RRP.ATT.ADJ, Full.ATT, 
		Full.ATT.ADJ, Full2.ATT, Full2.ATT.ADJ,att.nn1, att.nn4, att.nn16, att3.nn1, att3.nn4, att3.nn16,
		att.nn1.adj, att.nn4.adj, att.nn16.adj, att3.nn1.adj, att3.nn4.adj, att3.nn16.adj, att.sel, att.adj.sel,
		w.att.sel,w.att.adj.sel) 
	tmp <- rbind(tmp, tmp1)



	colnames(tmp) <-c("RAW","RAW.ADJ", "RRP:ATT","RRP:ATT.ADJ", "W.RRP:ATT", "W.RRP:ATT.ADJ", "RRP:FULL", "RRP:FULL.ADJ",
		"MAH:FULL","MAH:FULL.ADJ","MAH+RRP:FULL","MAH+RRP:FULL.ADJ", "RRP:NN1","RRP:NN4","RRP:NN16",
		"MAH:NN1","MAH:NN4","MAH:NN16","RRP:NN1.ADJ","RRP:NN4.ADJ","RRP:NN16.ADJ","MAH:NN1.ADJ","MAH:NN4.ADJ","MAH:NN16.ADJ",
		"RRP.SEL:ATT","RRP.SEL:ATT.ADJ","W.RRP.SEL:ATT","W.RRP.SEL:ATT.ADJ")
#	print(tmp1)
	cat(apply(tmp,2,mean),file=ofile)
	cat(apply(tmp,2,function(x) sqrt(mean(x^2))),file=ofile)
	cat("\n",file=ofile)
}
rdaname <- paste(basename,"rda",sep=".")
save.image(file=rdaname)
pdf(pdfname)
for(i in 1:dim(tmp)[2]){
 plot(tmp[,i],type="l", main=colnames(tmp)[i])
 abline(h=TreatmentEffect,lty=3,col="red")
 plot((tmp[,i]-TreatmentEffect),type="l", main=paste(colnames(tmp)[i],"sqrt(mse)"))
}
dev.off()
close(ofile)
#dev.off()
