# Comparison: Prediction
# Mar 29, 2016
# Ref: ~/Projects/DP/baseball_compare
# adds on results for iteration method (WLVmix)
 
source("data_select2012.R")

source("profl_fun.R")
source("uvTweedie_WGLVmix.R") # Bayes rule (Tweedie formula) for u or v (marginal out v or u)
source("uvTweedie_WTLVmix.R") # Bayes rule for u or v (independence prior)
source("uTweedie_Bmix.R") # Bayes rule (Tweedie formula) for u or v (marginal out v or u)

load("joint_np_it.Rda")
fi = f_it

load("joint_it_with_age_lag.Rda")

whichmax <- arrayInd(which.max(pl2),dim(pl2))
b1l <- bs2[,1][whichmax[1]]
b2l <- bs2[,2][whichmax[2]]
rho <- rhos[whichmax[3]]
lag = C$lag
y_lag = ha - rho * lag - b1l * (age - 30)/100 - b2l*(age-30)^2/10000
f_age_lag <- WLVmix(y_lag,id,w,rtol = 1e-10,maxit = 5)

load("joint_it_with_age.Rda")

whichmax <- arrayInd(which.max(pl2),dim(pl2))
b1 <- bs2[,1][whichmax[1]]
b2 <- bs2[,2][whichmax[2]]
load("joint_it_with_age_lag.Rda")  # to get rid of 2002 data so that length of ha agrees with joint_with_age_lag
yage = ha - b1 * (age - 30)/100 - b2*(age-30)^2/10000
fage <- WLVmix(yage,id,w,rtol = 1e-10,maxit = 5)   #with age covariates, independece prior


bballt2012$HA <- asin(sqrt((bballt2012$H+0.25)/(bballt2012$AB + 0.5)))   #2012 realized transformed batting average
sub1 <- bballt$id %in% bballt2012$id  # excludes players not in 2012
sub2 <- bballt2012$id %in% bballt$id  # excludes players only appear in 2012







# WLVmix model without covariates
LVp <- uvTweedie_WTLVmix(f_it, ha, id, w)
LVpu <- LVp$u
LVpv <- LVp$v

# WLVmix model with age
LVpage <- uvTweedie_WTLVmix(fage,yage,id,w)
LVpageu <- LVpage$u
LVpagev <- LVpage$v

# WLVmix model with age and lag
LVplag <- uvTweedie_WTLVmix(f_age_lag,y_lag,id,w)
LVplagu <- LVplag$u
LVplagv <- LVplag$v




# Merge 2012 player set with those that are involved in estimation


T_it <- as.data.frame(cbind(bballt$name, bballt$id, LVpu, LVpv,LVpageu, LVpagev, LVplagu, LVplagv))
names(T_it) <- c("name","id", "LVpu","LVpv","LVxpu", "LVxpv", "LVlagpu","LVlagpv")

C11 = C[C$year==2011,]
C2011 <- as.data.frame(cbind(unique(C11$id), tapply(C11$HA, C11$id,"mean"),tapply(C11$AB,C11$id,"sum"),tapply(C11$H,C11$id,"sum")))
names(C2011) <- c("id", "HA2011","AB2011","H2011")
C2011$name = unique(C11$name)
T1 = merge(C2011, T_it, by.x = "id", by.y = "id")
T1 = subset(T1, select = -c(name.y))
names(T1) <- c("id","HA2011","AB2011","H2011","name", "LVpu","LVpv","LVxpu", "LVxpv", "LVlagpu","LVlagpv")


final <- merge(bballt2012,T1,by.x = "id", by.y = "id")
final <- subset(final, select = -c(name.y))
names(final) <- c("id","AB2012","H2012","walks2012","pitcher","age","name","HA","HA2011","AB2011","H2011", "LVpu","LVpv","LVxpu", "LVxpv", "LVlagpu","LVlagpv")



# compute the prediction (back to binomial p scale) w or w/o covariates effect accounted for (!! NLVpu is a factor, so be careful when converting it back to numerics)
final$LVpu <- as.numeric(paste(final$LVpu))   # WLVmix w/o x

final$LVpub <- sin(final$LVpu)^2   # WLVmix w/o x in p scale


final$LVxpu <- as.numeric(paste(final$LVxpu)) + b1 * (final$age - 30)/100 + b2*(final$age-30)^2/10000 # WLVmix w x 
final$LVxpub <- sin(final$LVxpu)^2   # WTLVmix w x in p scale




lag2011 = mean(C[C$year==2011,]$HA)
final$LVlagpu <- as.numeric(paste(final$LVlagpu)) + rho * lag2011 + b1l * (final$age - 30)/100 + b2l*(final$age-30)^2/10000
final$LVlagpub <- sin(final$LVlagpu)^2

final$p2012 <- final$H2012/final$AB2012  #realization
final$HA2012 <- asin(sqrt((final$H2012+0.25)/(final$AB2012+0.5)))


SSE = function(a,b,AB){
	sum((a-b)^2*(4*AB))}

TSE <- function(a,b, AB){
	sum((a - b)^2 - 1/(4 * AB))
	}

LSModel_TSE = c(TSE(final$LVpu, final$HA2012,final$AB2012), TSE(final$LVxpu, final$HA2012,final$AB2012), TSE(final$LVlagpu,final$HA2012,final$AB2012))
names(LSModel_TSE) = c("WLV", "WLVage", "WLVlag")

print(LSModel_TSE)


TSEp <- function(a,b,n) sum((a-b)^2 - b*(1-b)/n)

lagp2011 = sum(C[C$year==2011,]$H)/sum(C[C$year==2011,]$AB)
BModel_TSEp = c(TSEp(final$LVpub,final$p2012,final$AB2012),TSEp(final$LVxpub, final$p2012,final$AB2012), TSEp(final$LVlagpub,final$p2012,final$AB2012))
names(BModel_TSEp) = c("WLV","WLVage","WLVlag")
print(BModel_TSEp)


LSModel_SSE = c(SSE(final$LVpu, final$HA2012,final$AB2012), SSE(final$LVxpu, final$HA2012,final$AB2012), SSE(final$LVlagpu,final$HA2012,final$AB2012))
names(LSModel_SSE) = c("WLV","WLVage", "WLVlag")
print(LSModel_SSE)





