rm(list = ls())
setwd("[enter directory where results_2021-03-31.RData is stored]")

# Packages
library(dplyr)
library(zoo)
library(reshape2)
library(ggplot2)
library(knitr)
library(lmtest)
library(sandwich)

# Seed
set.seed(20200503)

# Load functions
source("procs_rev.R")

# Destination folder for outputs
out_fold <- "outputs/" 

# Data type for UCSV/RW model
type_rw <- "first_release"

# Choice of evaluation sample
short_sample <- FALSE
if (short_sample){
  start_date <- 1987.5
  add <- "_short"
} else {
  start_date <- 1976.25
  add <- ""
}

# Parameters for plots (pdf figures)
plot_width <- 5
plot_height <- 5

# Load results
load("results_2021-03-31.RData")
pred_all <- results$pred_all %>% 
  filter(origin_date >= (start_date + target_date - origin_date))
comb_all <- results$comb_all %>% 
  filter(origin_date >= (start_date + .25*(h-1)))
# note that first date differs across horizons to accomodate horizon specific
# estimation of combination parameters

# Time series plot of forecasts (Figure 3 in paper)
for (hh in 1:5){
  for (type_sel in c("mean", "v")){
    yl <- if_else(type_sel == "mean", "Mean", "Variance")
    plot_tmp <- pred_all %>% 
      filter((target_date - origin_date)*4 == (hh-1), type == type_sel) %>%
      mutate(method = toupper(method), 
             method = gsub("RW", "UCSV", method)) %>%
      ggplot(aes(x = target_date, y = value, color = method)) + geom_line(size = 1) + 
      theme_minimal(base_size = 18) + 
      scale_color_viridis_d(name = "") + xlab("Year") + 
      ylab(yl) + 
      theme(legend.position = "top") + xlim(c(1976, 2021))
    ggsave(paste0(out_fold, "/forecasts_h", hh, "_", type_rw,
                  "_", type_sel, add, ".pdf"), 
           width = plot_width, height = plot_height)
  }
}

# Preparations for tables and figures on combinations
# SE and D at each time point
comb_all2 <- comb_all %>% filter(method %in% c("LP", "CLP"), !w %in% c(0, 1)) %>%
  group_by(origin_date, h, w) %>% 
  summarise(e2 = e2[method == "LP"], av = variance[method == "CLP"], 
            d = variance[method == "LP"] - variance[method == "CLP"]) %>%
  ungroup

# Correlation btw variance and SE
comb_all3 <- comb_all2 %>% group_by(h, w) %>% 
  summarise(cor_lp = cor(av + d, e2),
            cor_clp = cor(av, e2)) %>%
  ungroup %>% melt(id.vars = c("h", "w"))

# Compute MSFE-optimal weight for plots 
w_opt <- pred_all %>% mutate(h = (target_date-origin_date)*4+1) %>%
  filter(type == "e") %>% dcast(target_date+h~method, value.var = "value") %>%
  group_by(h) %>% summarise(w = get_weight(cmm, rw)) %>%
  ungroup

# Plot of correlation against weight (2nd row of Figure 4 in paper)
cls <- c("#000000", "#E69F00", "#56B4E9") # colors
line_width <- 1.2 # line width
for (hh in 1:5){
  comb_all3 %>% filter(h == hh) %>% 
    ggplot(aes(x = w, y = value, color = variable, linetype = variable)) + 
    geom_line(size = line_width) + theme_minimal(base_size = 18) + 
    scale_color_manual(name = "", breaks = c("cor_lp", "cor_clp"), 
                       labels = c("LP", "CLP"), values = cls[1:2]) + 
    scale_linetype_manual(breaks = c("cor_lp", "cor_clp"), 
                          values = 1:2) + 
    guides(linetype = FALSE) +
    geom_vline(xintercept = w_opt$w[hh], color = cls[3], 
               linetype = 3, size = I(2*line_width)) +
    xlab(expression("Combination Weight"~omega[1])) +
    ylab("Cor(V,S)") + 
    theme(legend.position = "top")
  ggsave(paste0(out_fold, "/corvs_h", hh, "_", type_rw, add, ".pdf"), 
         width = plot_width, height = plot_height)
}

# Compute average scores and variances
comb_all4 <- comb_all %>% filter(method %in% c("LP", "CLP")) %>%
  group_by(h, method, w) %>% 
  summarise(variance = mean(variance), se = mean(e2),
            dss = mean(dss), logs = mean(logs), .groups = "keep") %>%
  ungroup %>% mutate(method = factor(method, levels = c("LP", "CLP")))

# Score and variance plots
for (hh in 1:5){
  # Variance plot (1st row of Figure 4)
  comb_all4 %>% filter(h == hh) %>% 
    select(-h) %>%
    melt(id.vars = c("method", "w")) %>%
    filter(!(method == "CLP" & variable == "se"), 
           variable %in% c("se", "variance")) %>%
    mutate(type = factor(paste0(method, variable))) %>%
    ggplot(aes(x = w, y = value, color = type, linetype = type)) + 
    geom_line(size = line_width) + 
    scale_color_manual(name = "", 
                       breaks = c("LPvariance", 
                                  "CLPvariance", 
                                  "LPse"),
                       labels = c("LP", "CLP", "MSFE"), 
                       values = cls) + 
    scale_linetype_manual(name = "",
                          breaks = c("LPvariance", 
                                     "CLPvariance", 
                                     "LPse"),
                          values = 1:3) + 
    guides(linetype = FALSE) +
    theme_minimal(base_size = 18) + 
    xlab(expression("Combination Weight"~omega[1])) + 
    ylab("Variance") + theme(legend.position = "top") +
    geom_vline(xintercept = w_opt$w[hh], color = cls[3], 
               linetype = 3, size = I(2*line_width))
  ggsave(paste0(out_fold, "/variance_h", hh, "_", type_rw, add, ".pdf"), 
         width = plot_width, height = plot_height)
  
  # DSS plot (1st row of Figure 5)
  comb_all4 %>% filter(h == hh) %>% 
    ggplot(aes(x = w, y = dss, color = method, linetype = method)) + 
    geom_line(size = line_width) + 
    scale_color_manual(name = "", 
                       breaks = c("LP", "CLP"),
                       labels = c("LP", "CLP"), 
                       values = cls[1:2]) + 
    scale_linetype_manual(name = "", 
                          breaks = c("LP", "CLP"),
                          values = 1:2) + 
    theme_minimal(base_size = 18) + 
    xlab(expression("Combination Weight"~omega[1])) +
    ylab("DSS") + 
    geom_vline(xintercept = w_opt$w[hh], col = cls[3], size = I(2*line_width),
               linetype = 3) + 
    theme(legend.position = "top") + guides(linetype = FALSE)
  ggsave(paste0(out_fold, "/dss_h", hh, "_", type_rw, add, ".pdf"), 
         width = plot_width, height = plot_height)
  
  # LogS plot (2nd row of Figure 5)
  comb_all4 %>% filter(h == hh) %>% 
    ggplot(aes(x = w, y = -logs, color = method, linetype = method)) + 
    geom_line(size = line_width) + 
    scale_color_manual(name = "", 
                       breaks = c("LP", "CLP"),
                       labels = c("LP", "CLP"), 
                       values = cls[1:2]) + 
    scale_linetype_manual(name = "", 
                          breaks = c("LP", "CLP"),
                          values = 1:2) + 
    theme_minimal(base_size = 18) + 
    xlab(expression("Combination Weight"~omega[1])) +
    ylab("LogS") + geom_vline(xintercept = w_opt$w[hh], 
                              color = cls[3], size = I(2*line_width),
                              linetype = 3) + 
    theme(legend.position = "top") + guides(linetype = FALSE)
  ggsave(paste0(out_fold, "/logs_h", hh, "_", type_rw, add, ".pdf"), 
         width = plot_width, height = plot_height)
}

# Comparison of combinations (Table 2 in paper)
df0_t <- df_t <- data.frame()
comb_meths <- c("LP", "CLP", "SLP", "BLP", "SCLP", "BCLP")
pairs <- c("LP-CLP", "SLP-CLP", "BLP-CLP", "SLP-SCLP", "BLP-BCLP")
f_v <- kernHAC # function for variance computation (either kernHAC or NeweyWest)
for (h_sel in 1:5){
  for (score in c("logs", "dss")){
    vals <- rep(NA, 5)
    df_score <- data.frame(comb_all[, c("origin_date", "method", "h", "w")], 
                           score = comb_all[,score]) %>%
      filter(w == .5) %>%
      mutate(origin_date = as.numeric(origin_date)) %>%
      filter(h == h_sel) %>% select(origin_date, method, score) %>%
      dcast(origin_date~method, value.var = "score") %>% na.omit
    # Make tables on average scores and Diebold-Mariano tests
    # Change sign for log score (s.t. positive stat means that CLP is better)
    const <- ifelse(score == "logs", -1, 1)
    vals0 <- const*colMeans(df_score[, comb_meths])
    # Average scores
    df0_t <- rbind(df0_t, 
                   data.frame(m = comb_meths, h = h_sel, 
                              score = score, value = vals0, 
                              m_nr = 1:6))
    # Diebold-Mariano statistics
    vals[1] <- (lm(const*(LP-CLP)~1, data = df_score) %>% 
                  coeftest(vcov. = f_v))[1,3]
    vals[2] <- (lm(const*(SLP-CLP)~1, data = df_score) %>% 
                  coeftest(vcov. = f_v))[1,3]
    vals[3] <- (lm(const*(BLP-CLP)~1, data = df_score) %>% 
                  coeftest(vcov. = f_v))[1,3]
    vals[4] <- (lm(const*(SLP-SCLP)~1, data = df_score) %>% 
                  coeftest(vcov. = f_v))[1,3]
    vals[5] <- (lm(const*(BLP-BCLP)~1, data = df_score) %>% 
                  coeftest(vcov. = f_v))[1,3]
    df_t <- rbind(df_t, 
                  data.frame(pair = pairs, h = h_sel, 
                             score = score, value = vals, 
                             pair_nr = 1:5))
  }
}

# Write tex code for Table 2
dcast(m+m_nr~score+h, data = df0_t) %>% 
  arrange(m_nr) %>% select(-m_nr) %>%
  table_helper %>%
  writeLines2(paste0(out_fold, "/tab_scores_", type_rw, add, ".tex"))
# Write tex code for Table 3
dcast(pair+pair_nr~score+h, data = df_t) %>% 
  arrange(pair_nr) %>% select(-pair_nr) %>%
  table_helper(digits = 2, bold = FALSE) %>%
  writeLines2(paste0(out_fold, "/tab_tstats_", type_rw, add, ".tex"))

# Summary info on evaluation sample
pred_all %>% group_by(method) %>% 
  summarise(min_origin = min(origin_date),
            min_target = min(target_date), 
            max_origin = max(origin_date), 
            max_target = max(target_date)) %>%
  kable(digits = 2)
comb_all %>% mutate(target_date = origin_date + .25*(h-1)) %>%
  group_by(method) %>% 
  summarise(min_origin = min(origin_date),
            min_target = min(target_date), 
            max_origin = max(origin_date), 
            max_target = max(target_date)) %>%
  kable(digits = 2)