# This file generates two sets of simulations studied in "Estimating Bayesian Decision Problems with Heterogeneous Expertise".
# The first does not include interaction terms between case and time covariates, and the second does.
# Running this file will produce a set of .Rdata files written to the working directory, 
# where each corresponds to a particular expertise difference between two voter types A and B.


MM <- function(data,inputs){

	# This function takes voting data (data is a dataframe) and lists of individual and time covariates (contained in inputs),
	# and returns a list containing first-stage parameters estimates and second-stage structural parameters both with and without interaction terms.
	
	vars <- c(inputs$time,inputs$individual)
	data <- subset(data, select = c(vars,"case","vote"))
	
	num_tvars <- length(inputs$time)
	num_tint <- sum(inputs$time_interactions)
	num_ivars <- length(inputs$individual)
	num_iint <- sum(inputs$individual_interactions)
	
	interactions <- matrix(NA, nrow = nrow(data), ncol = num_tint*num_iint)
	
	index <- 1

	for(i in 1:num_tvars){
		if(inputs$time_interactions[i] == 1){
			for(j in 1:num_ivars){
				if(inputs$individual_interactions[j] == 1){
						interactions[,index] <- data[,inputs$time[i]]*data[,inputs$individual[j]]
						index <- index + 1
					}
			} 
			
		}
	}
	
	votes <- as.matrix(data[,"vote"])
	
	kvars0 <- as.matrix(cbind(1,data[,vars]))
	kvars1 <- as.matrix(cbind(kvars0,interactions))
	
	tvars <- unique(cbind(data[,c(inputs$time,"case")]))
	qvars <- as.matrix(cbind(1,tvars[,inputs$time]))
	
	num_vars0 <- ncol(qvars) + 2*ncol(kvars0)
	args0 <- list("votes" = votes, "qvars" = qvars, "kvars" = kvars0)
	x0 <- tryCatch(optim(c(rep(0,num_vars0)), LLF, gr = NULL, args0, method = "BFGS", control = list(fnscale=-1)), error = function(err) paste("MY_ERROR:  ",err))
	
	if(length(x0) <= 1){
		print("error in optim without interactions")
		return(0)
	} else if(x0$convergence != 0){
		print("no convergence without interactions")
		return(0)
	} else output0 <- second_stage(x0$par,qvars,kvars0)
	
	args1 <- list("votes" = votes, "qvars" = qvars, "kvars" = kvars1)
	start1 <- c(x0$par[1:(ncol(qvars) + ncol(kvars0))],rep(0,ncol(interactions)),x0$par[(ncol(qvars) + ncol(kvars0) + 1):num_vars0],rep(0,ncol(interactions)))

	x1 <- tryCatch(optim(start1, LLF, gr = NULL, args1, method = "BFGS", control = list(fnscale=-1)), error = function(err) paste("MY_ERROR:  ",err))

	if(length(x1) <= 1){
		print("error in optim with interactions")
		return(0)
	} else if(x1$convergence != 0){
		print("no convergence with interactions")
		return(0)
	} else output1 <- second_stage(x1$par,qvars,kvars1)
	
	output <- list("first_stage_no_int" = x0$par, "params_no_int" = output0, "first_stage_yes_int" = x1$par, "params_yes_int" = output1)
	
}

LLF <- function(par,args){

	# The first stage likelihood function evaluated at "par" given data contained in "args"
	
	mem_votes <- args$votes
	qvars <- args$qvars
	kvars <- args$kvars
	
	num_qvars <- ncol(qvars)
	num_kvars <- ncol(kvars)
	num_meetings <- nrow(qvars)
	
	q.par <- as.matrix(par[1:num_qvars])
	k0.par <- as.matrix(par[(1+num_qvars):(num_qvars+num_kvars)])
	k1.par <- as.matrix(par[(num_qvars+num_kvars+1):(num_qvars+2*num_kvars)])
	
	q.est <- exp(qvars %*% q.par) / (1 + exp(qvars %*% q.par))
	k0.est <- exp(kvars %*% k0.par) / (1 +  exp(kvars %*% k0.par))
	k1.est <- (k0.est + exp(kvars %*% k1.par)) / (1 + exp(kvars %*% k1.par))
	
	p0 <- NULL
	p1 <- NULL
	sum.p0.p1 <- NULL
	
	start_index = 1
	
	for(t in 1:num_meetings){
				
		p0[t] <- (1 - q.est[t]) * exp(sum(mem_votes[(start_index):(start_index + 8)] * log(k0.est[(start_index):(start_index + 8)]) + (1 - mem_votes[(start_index):(start_index + 8)]) * log(1 - k0.est[(start_index):(start_index + 8)])))
				
		p1[t] <- q.est[t] * exp(sum(mem_votes[(start_index):(start_index + 8)] * log(k1.est[(start_index):(start_index + 8)]) + (1 - mem_votes[(start_index):(start_index + 8)]) * log(1 - k1.est[(start_index):(start_index + 8)])))
				
		sum.p0.p1[t] <- log(p0[t] + p1[t])
		
		start_index = start_index + 9
				
	}
			
	return(sum(sum.p0.p1))
			
}

second_stage <- function(first_stage_estimates,qvars,kvars){
	
	## This extracts structural parameters for each voter in each time period given first stage estimates
	
	q_params <- first_stage_estimates[1:ncol(qvars)]
	k0_params <- first_stage_estimates[(ncol(qvars) + 1):(ncol(qvars) + ncol(kvars))]
	k1_params <- first_stage_estimates[(ncol(qvars) + ncol(kvars) + 1):(ncol(qvars) + 2*ncol(kvars))]
	
	num_meetings <- nrow(qvars)
	
	q.temp <- exp(qvars %*% q_params) / (1 + exp(qvars %*% q_params))
	q <- c()

	for(t in 1:num_meetings){
		temp <- rep(q.temp[t],9)
		q <- c(q,temp)
	}
	
	k0 <- exp(kvars %*% k0_params) / (1 +  exp(kvars %*% k0_params))
	k1 <- (k0 + exp(kvars %*% k1_params)) / (1 +  exp(kvars %*% k1_params))

	sigma_est <- 1 / (qnorm(1 - k0) - qnorm(1 - k1))
	cutoff_est <- qnorm(1 - k0) / (qnorm(1 - k0) + qnorm(k1))
	
	theta_est_sin <- (1-2*cutoff_est)/(2*(sigma_est)^2) - log(q/(1-q))
	theta_est_sin <- exp(theta_est_sin) / (1 + exp(theta_est_sin))
	
	parameters <- as.data.frame(cbind(q,cutoff_est,theta_est_sin,sigma_est))
	colnames(parameters) <- c("Q","Cutoff","TN","S")
	return(parameters)
	
}


simulations <- function(thetaA,thetaB,sigmaA,sigmaB,sims,ql,qh,num_meetings,numA){

	# Runs "sims" simluations for nine-person commitee that takes "num_meetings" different decisions.
	# "numA" members are type A and have preference parameter "thetaA" and expertise parameter "sigmaA".
	# 9 - "numB" members are type B and have preference parameter "thetaB" and expertise parameter "sigmaB".
	# For each decision, the prior distribution on the state is drawn uniformly from [ql,qh] where 0 <= ql <= qh <= 1.

	logodds_theta_A <- log(thetaA/(1-thetaA))
	logodds_theta_B <- log(thetaB/(1-thetaB))
	
	true_Q <- matrix(NA, nrow = num_meetings, ncol = sims)
	
	theta_A_est_N <- matrix(NA, nrow = num_meetings, ncol = sims)
	sigma_A_est_N <- matrix(NA, nrow = num_meetings, ncol = sims)
	theta_B_est_N <- matrix(NA, nrow = num_meetings, ncol = sims)
	sigma_B_est_N <- matrix(NA, nrow = num_meetings, ncol = sims)
	
	theta_A_est_Y <- matrix(NA, nrow = num_meetings, ncol = sims)
	sigma_A_est_Y <- matrix(NA, nrow = num_meetings, ncol = sims)
	theta_B_est_Y <- matrix(NA, nrow = num_meetings, ncol = sims)
	sigma_B_est_Y <- matrix(NA, nrow = num_meetings, ncol = sims)
	
	counter <- 1
	
	while(counter < sims + 1){
		
		priors <- runif(num_meetings,ql,qh)
		log_odds_priors <- log(priors / (1-priors))
		
		cutoff_A <- 0.5 - sigmaA^2*(logodds_theta_A + log_odds_priors)
		cutoff_B <- 0.5 - sigmaB^2*(logodds_theta_B + log_odds_priors)
	
		shocks <- c()
		for(j in 1:num_meetings) shocks[j] <- rbinom(1, 1, priors[j])
		
		data <- as.data.frame(matrix(NA,9*num_meetings,6))
		colnames(data) <- c("Constant", "vote", "A", "Prior", "Interaction", "case")
	
		for(j in 1:num_meetings){
			index1 <- 9*(j-1) + 1
			index2 <- 9*j	
	
			pa <- 1 - pnorm((cutoff_A[j] - shocks[j])/sigmaA)
			pb <- 1 - pnorm((cutoff_B[j] - shocks[j])/sigmaB)
				
			votes <- c(rbinom(numA,1,pa),rbinom(9-numA,1,pb))
			gpA <- c(rep(1,numA),rep(0,9-numA))
	
			data[index1:index2,1] <- 1
			data[index1:index2,2] <- votes
			data[index1:index2,3] <- gpA
			data[index1:index2,4] <- priors[j]
			data[index1:index2,5] <- priors[j]*gpA
			data[index1:index2,6] <- j
		}
		
		time <- c("Prior")
		time_interactions <- c(1)
		ind <- c("A")
		ind_interactions <- c(1)
		inputs <- list("time" = time, "time_interactions" = time_interactions, "individual" = ind, "individual_interactions" = ind_interactions)

		output <- MM(data,inputs)
		
		if(length(output) > 1){
			
			augmented_N <- cbind(output$params_no_int,data)
			augmented_Y <- cbind(output$params_yes_int,data)
			
			true_Q[,counter] <- priors 
			
			num_row1 <- dim(as.matrix(unique(subset(augmented_N, subset = (A == 1), select = TN))))[1]
			num_row2 <- dim(as.matrix(unique(subset(augmented_N, subset = (A == 1), select = S))))[1]
			num_row3 <- dim(as.matrix(unique(subset(augmented_N, subset = (A == 0), select = TN))))[1]
			num_row4 <- dim(as.matrix(unique(subset(augmented_N, subset = (A == 0), select = S))))[1]
			
			num_row5 <- dim(as.matrix(unique(subset(augmented_Y, subset = (A == 1), select = TN))))[1]
			num_row6 <- dim(as.matrix(unique(subset(augmented_Y, subset = (A == 1), select = S))))[1]
			num_row7 <- dim(as.matrix(unique(subset(augmented_Y, subset = (A == 0), select = TN))))[1]
			num_row8 <- dim(as.matrix(unique(subset(augmented_Y, subset = (A == 0), select = S))))[1]
			
			if(counter == 1) first_stage_N <- matrix(output$first_stage_no_int, nrow = 1)
			else first_stage_N <- rbind(first_stage_N,output$first_stage_no_int)
			
			theta_A_est_N[1:num_row1,counter] <- as.matrix(unique(subset(augmented_N, subset = (A == 1), select = TN)))
			sigma_A_est_N[1:num_row2,counter] <- as.matrix(unique(subset(augmented_N, subset = (A == 1), select = S)))
			theta_B_est_N[1:num_row3,counter] <- as.matrix(unique(subset(augmented_N, subset = (A == 0), select = TN)))
			sigma_B_est_N[1:num_row4,counter] <- as.matrix(unique(subset(augmented_N, subset = (A == 0), select = S)))
			
			if(counter == 1) first_stage_Y <- matrix(output$first_stage_yes_int, nrow = 1)
			else first_stage_Y <- rbind(first_stage_Y,output$first_stage_yes_int)
			
			theta_A_est_Y[1:num_row5,counter] <- as.matrix(unique(subset(augmented_Y, subset = (A == 1), select = TN)))
			sigma_A_est_Y[1:num_row6,counter] <- as.matrix(unique(subset(augmented_Y, subset = (A == 1), select = S)))
			theta_B_est_Y[1:num_row7,counter] <- as.matrix(unique(subset(augmented_Y, subset = (A == 0), select = TN)))
			sigma_B_est_Y[1:num_row8,counter] <- as.matrix(unique(subset(augmented_Y, subset = (A == 0), select = S)))
			
			cat("sim = ", counter, "\n")
			counter <- counter + 1
		}
		
	}
	
	all_specs <- list("Prior" = true_Q, "FirstStageN" = first_stage_N, "FirstStageY" = first_stage_Y, "TA" = theta_A_est_N, "TA_int" = theta_A_est_Y, "TB" = theta_B_est_N, "TB_int" = theta_B_est_Y, "SA" = sigma_A_est_N, "SA_int" = sigma_A_est_Y, "SB" = sigma_B_est_N, "SB_int" = sigma_B_est_Y)
		
}

counter <- 1

# This loop generates simulation files for fixed values of A and B preference parameters (thetaA and thetaB), while varying expertise parameters.

for(x in seq(0,0.50,0.05)){
	
	results <- simulations(thetaA = 1/3, thetaB = 2/3, sigmaA = 1 + x, sigmaB = 1 - x, sims = 1000, ql = 0.2, qh = 0.8, num_meetings = 150, numA = 5)
	save(results, file = paste("continuous_sincere_unbiased_",counter,".Rdata", sep=""))
	
	counter <- counter + 1
	
}