if(.Platform$OS.type=="windows") {
  quartz <- function(title, width, height) windows(width, height)
}
  
gfilter <- function(x, kk, s = 2) { 
  # gaussian smoother, corrected to zero minimum 12-12-14
  #  reflecting, kk < 0 negative reflecting, tails to sqrt(10.), k points on each side 
  if(kk == 0) return(x)
  k <- abs(kk)
  n <- length(x)
  k <- min(k, n - 1)
  y <- NULL
  if(kk > 0){
    y[1:k] <- x[seq(k+1 , 2, -1)]
  } else {
    y[1:k] <- 2*x[1] - x[seq(k+1 , 2, -1)]
  }
  y[k + 1:n] <- x
  if(kk > 0){
    y[n + k + 1:k] <- x[seq(n-1, n - k, -1)]
  } else {
    y[n + k + 1:k] <- 2*x[n] - x[seq(n-1, n - k, -1)]
  }
  gf <- exp( -((-k:k)^2)/(0.2 * k^2))
  gf <- gf - min(gf)
  gf <- gf/sum(gf)
  return(filter(y, gf,sides=s)[k+1:n])
}

rP <- function(a, s) {
  N <- length(a)
  aSums <- cumsum(a[1:(N-1)])
  if(max(aSums) > 0) {
    stop("Positive alphas sums")
  }
  relativePrice <- array(1, N)
  for(i in 1:(N-1)) {
    relativePrice[i + 1] <- relativePrice[i]*exp( (s[i])/(4*aSums[i]) )
  }
  return(relativePrice/mean(relativePrice))
}

estimateModel <- function(dat, ords, nSmooths, fRange, len, nObs, stShift, stPVD) {
  lt <- matrix(0, nrow=len-1, ncol=nObs-stShift+1)
  for(t in stShift:(nObs-1)) {
    w0 <- cumsum(dat[ords[1:(len-1), t], t])
    w1 <- cumsum(dat[ords[1:(len-1), t+1], t+1])
    w2 <- cumsum(dat[ords[1:(len-1), t], t+1])
    w3 <- dat[ords[1:(len-1), t], t]
    lt[, (t+1) + (1-stShift)] <- lt[, t + (1-stShift)] + 2*w0*log(w1/w2)/w3
  }
  
  ltFinal <- lt[, nObs-stShift+1]/(nObs-stShift+1)
  ltFinal <- gfilter(ltFinal, -fRange)
  alphas <- array(0, len)
  alphas[1] <- -0.5*ltFinal[1]
  alphas[2:(len-1)] <- -0.5*diff(ltFinal)
  for(i in 1:nSmooths[1]) alphas[1:(len-1)] <- gfilter(alphas[1:(len-1)], fRange)
  alphas[len] <- alphas[len] - sum(alphas)
  
  sig <- array(0, len-1)
  for(t in stShift:(nObs-1)) {
    sig <- sig + log((dat[ords[1:(len-1), t], t+1]/dat[ords[1:(len-1)+1, t], t+1])/(dat[ords[1:(len-1), t], t]/dat[ords[1:(len-1)+1, t], t]))^2
  }
  sig <- sig/(nObs - stShift)
  sigmas <- sig
  for(i in 1:nSmooths[2]) sigmas <- gfilter(sigmas, fRange)
  
  rpTemp <- matrix(1, nrow=len, ncol=nObs-stPVD+1)
  for(t in 1:(nObs-stPVD+1)) {
    rpTemp[, t] <- dat[ords[, t+(stPVD-1)], t+(stPVD-1)]/mean(dat[, t+(stPVD-1)])
  }
  avgRP <- rowMeans(rpTemp)
  
  aSums <- array(0, len-1)
  aSums[1] <- alphas[1]
  for(i in 2:(len-1)) {
    aSums[i] <- aSums[i - 1] + alphas[i]
  }
  rPr <- array(1, len)
  for(i in 1:(len-1)) {
    rPr[i + 1] <- rPr[i]*exp((sigmas[i])/(4*aSums[i]))
  }
  rPr <- rPr/mean(rPr)
  
  return( c(sum(abs(rPr - avgRP)), sum((rPr - avgRP)^2)) )
}


install.packages("shape")
library(shape)


########

nObservations <- 36*12
nCommodities <- 22
linesToSkip <- 11

temp <- scan("aluminum.txt", n = nObservations*2, skip = linesToSkip, what = character())
aluminum <- temp[seq(2, nObservations*2, 2)]
aluminum <- as.numeric(aluminum)
  
temp <- scan("bananas.txt", n = nObservations*2, skip = linesToSkip, what = character())
bananas <- temp[seq(2, nObservations*2, 2)]
bananas <- as.numeric(bananas)
  
temp <- scan("barley.txt", n = nObservations*2, skip = linesToSkip, what = character())
barley <- temp[seq(2, nObservations*2, 2)]
barley <- as.numeric(barley)
  
temp <- scan("beef.txt", n = nObservations*2, skip = linesToSkip, what = character())
beef <- temp[seq(2, nObservations*2, 2)]
beef <- as.numeric(beef)
  
temp <- scan("brentCrude.txt", n = nObservations*2, skip = linesToSkip, what = character())
brentCrude <- temp[seq(2, nObservations*2, 2)]
brentCrude <- as.numeric(brentCrude)
  
temp <- scan("cocoa.txt", n = nObservations*2, skip = linesToSkip, what = character())
cocoa <- temp[seq(2, nObservations*2, 2)]
cocoa <- as.numeric(cocoa)
  
temp <- scan("copper.txt", n = nObservations*2, skip = linesToSkip, what = character())
copper <- temp[seq(2, nObservations*2, 2)]
copper <- as.numeric(copper)
  
temp <- scan("corn.txt", n = nObservations*2, skip = linesToSkip, what = character())
corn <- temp[seq(2, nObservations*2, 2)]
corn <- as.numeric(corn)
  
temp <- scan("cotton.txt", n = nObservations*2, skip = linesToSkip, what = character())
cotton <- temp[seq(2, nObservations*2, 2)]
cotton <- as.numeric(cotton)
  
temp <- scan("iron.txt", n = nObservations*2, skip = linesToSkip, what = character())
iron <- temp[seq(2, nObservations*2, 2)]
iron <- as.numeric(iron)

temp <- scan("lamb.txt", n = nObservations*2, skip = linesToSkip, what = character())
lamb <- temp[seq(2, nObservations*2, 2)]
lamb <- as.numeric(lamb)

temp <- scan("lead.txt", n = nObservations*2, skip = linesToSkip, what = character())
lead <- temp[seq(2, nObservations*2, 2)]
lead <- as.numeric(lead)

temp <- scan("nickel.txt", n = nObservations*2, skip = linesToSkip, what = character())
nickel <- temp[seq(2, nObservations*2, 2)]
nickel <- as.numeric(nickel)

temp <- scan("orange.txt", n = nObservations*2, skip = linesToSkip, what = character())
orange <- temp[seq(2, nObservations*2, 2)]
orange <- as.numeric(orange)

temp <- scan("poultry.txt", n = nObservations*2, skip = linesToSkip, what = character())
poultry <- temp[seq(2, nObservations*2, 2)]
poultry <- as.numeric(poultry)

temp <- scan("rubber.txt", n = nObservations*2, skip = linesToSkip, what = character())
rubber <- temp[seq(2, nObservations*2, 2)]
rubber <- as.numeric(rubber)

temp <- scan("soybeans.txt", n = nObservations*2, skip = linesToSkip, what = character())
soybeans <- temp[seq(2, nObservations*2, 2)]
soybeans <- as.numeric(soybeans)

temp <- scan("sugar.txt", n = nObservations*2, skip = linesToSkip, what = character())
sugar <- temp[seq(2, nObservations*2, 2)]
sugar <- as.numeric(sugar)

temp <- scan("tin.txt", n = nObservations*2, skip = linesToSkip, what = character())
tin <- temp[seq(2, nObservations*2, 2)]
tin <- as.numeric(tin)

temp <- scan("wheat.txt", n = nObservations*2, skip = linesToSkip, what = character())
wheat <- temp[seq(2, nObservations*2, 2)]
wheat <- as.numeric(wheat)

temp <- scan("woolFine.txt", n = nObservations*2, skip = linesToSkip, what = character())
wool <- temp[seq(2, nObservations*2, 2)]
wool <- as.numeric(wool)

temp <- scan("zinc.txt", n = nObservations*2, skip = linesToSkip, what = character())
zinc <- temp[seq(2, nObservations*2, 2)]
zinc <- as.numeric(zinc)

rm(temp)


## Create commodity prices matrix
commodityPrices <- matrix(0, nrow=nCommodities, ncol=nObservations)
commodityPrices[1, ] <- aluminum
commodityPrices[2, ] <- bananas
commodityPrices[3, ] <- barley
commodityPrices[4, ] <- beef
commodityPrices[5, ] <- brentCrude
commodityPrices[6, ] <- cocoa
commodityPrices[7, ] <- copper
commodityPrices[8, ] <- corn
commodityPrices[9, ] <- cotton
commodityPrices[10, ] <- iron
commodityPrices[11, ] <- lamb
commodityPrices[12, ] <- lead
commodityPrices[13, ] <- nickel
commodityPrices[14, ] <- orange
commodityPrices[15, ] <- poultry
commodityPrices[16, ] <- rubber
commodityPrices[17, ] <- soybeans
commodityPrices[18, ] <- sugar
commodityPrices[19, ] <- tin
commodityPrices[20, ] <- wheat
commodityPrices[21, ] <- wool
commodityPrices[22, ] <- zinc


## Relative prices by name and rank
fullData <- matrix(0, nrow=nCommodities, ncol=nObservations)
fullData[, 1] <- array(100, nCommodities)
for(t in 2:nObservations) {
  fullData[, t] <- fullData[, t-1]*commodityPrices[, t]/commodityPrices[, t-1]
}
relPName <- matrix(0, nrow=nCommodities, ncol=nObservations)
relPRank <- matrix(0, nrow=nCommodities, ncol=nObservations)
orders <- matrix(0, nrow=nCommodities, ncol=nObservations)
for(t in 1:nObservations) {
  relPName[, t] <- fullData[, t]/mean(fullData[, t])
  orders[, t] <- order(relPName[, t], decreasing=TRUE)
  relPRank[, t] <- relPName[orders[1:nCommodities, t], t]
}
pSharesName <- relPName/nCommodities
rm(commodityPrices)


## Plot relative prices, ranked relative prices, and log-log ranked relative prices
plotPrices <- F

startShift <- 1


if(plotPrices) {
  quartz('1', width=8, height=6)
  plot(x=1:nObservations, y=log(relPName[1, ]), type = 'l', xlab="Year", ylab="Price Relative to Average (log)", xaxt = 'n', ylim = range(log(relPName)) )
  axis(side=1, at = c(1, 61, 121, 181, 241, 301, 361, 421), label = expression(1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015))
  for(i in 2:nCommodities) lines(x=1:nObservations, y=log(relPName[i, ]), col = i)
  
  quartz('2', width=8, height=6)
  plot(x=1:nObservations, y=log(relPRank[1, ]), type = 'l', xlab="Year", ylab="Price Relative to Average (log)", xaxt = 'n', ylim = range(log(relPRank)) )
  axis(side=1, at = c(1, 61, 121, 181, 241, 301, 361, 421), label = expression(1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015))
  for(i in 2:nCommodities) lines(x=1:nObservations, y=log(relPRank[i, ]), col = i)

  quartz('3', width=8, height=6)
  plot(relPRank[, startShift], type = 'l', xlab="Rank", ylab="Price Relative to Average", log = 'xy', xaxt = 'n', yaxt = 'n', ylim = c(0.5, 2.5))
  axis(side=1, at = c(1, 2, 3, 4, 5, 10, 15, 22))
  axis(side=2, at = c(0.5, 1, 2), las = 1)
  for(t in (startShift+1):nObservations) if(t%/%12 == 1) lines(relPRank[, t], col = t)
}


## Local times and alphas
filterRange <- ceiling(nCommodities*0.4)
n1Alpha <- 1
n1Sigma <- 33

plotLocalTimes <- F
plotAlphas <- F
plotSigmas <- F
plotOutcome <- F


lt1 <- matrix(0, nrow=nCommodities-1, ncol=nObservations-startShift+1)
for(t in startShift:(nObservations-1)) {
  w0 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t])
  w1 <- cumsum(pSharesName[orders[1:(nCommodities-1), t+1], t+1])
  w2 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t+1])
  w3 <- pSharesName[orders[1:(nCommodities-1), t], t]
  lt1[, (t+1) + (1-startShift)] <- lt1[, t + (1-startShift)] + 2*w0*log(w1/w2)/w3
}

if(plotLocalTimes) {
  quartz('4', width=8, height=6)
  plot(x=startShift:nObservations, y=lt1[1, ], type = 'l', xlab="Year", ylab="Local Time", xaxt = 'n', ylim=range(lt1))
  axis(side=1, at = c(1, 61, 121, 181, 241, 301, 361, 421), label = expression(1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015))
  for(i in 2:(nCommodities-1)) lines(x=startShift:nObservations, y=lt1[i, ], col=i) 
}

ltFinal1 <- lt1[, nObservations-startShift+1]/(nObservations-startShift+1)
alphasMA <- matrix(0, nrow=nCommodities, ncol=nObservations-startShift)
for(t in 1:(nObservations-startShift)) {
  tempLT <- gfilter(lt1[ , t+1] - lt1[ , t], -filterRange)
  alphasMA[1, t] <- -0.5*tempLT[1]  
  alphasMA[2:(nCommodities-1), t] <- -0.5*diff(tempLT)
  for(i in 1:n1Alpha) alphasMA[1:(nCommodities-1), t] <- gfilter(alphasMA[1:(nCommodities-1), t], filterRange) # Smooths alphasMA, can be commented out for more speed
  alphasMA[nCommodities, t] <- alphasMA[nCommodities, t] - sum(alphasMA[, t])
}
rm(tempLT)
ltFinal1 <- gfilter(ltFinal1, -filterRange)
alphas1 <- array(0, nCommodities)
alphas1[1] <- -0.5*ltFinal1[1]
alphas1[2:(nCommodities-1)] <- -0.5*diff(ltFinal1)
for(i in 1:n1Alpha) alphas1[1:(nCommodities-1)] <- gfilter(alphas1[1:(nCommodities-1)], filterRange)
alphas1[nCommodities] <- alphas1[nCommodities] - sum(alphas1)

if(plotAlphas) {
  quartz('5', width=8, height=6)
  # Multiplying by 12 annualizes the Alphas
  plot(12*100*alphas1[1:(nCommodities-1)], type = 'l', xlab = "Rank", ylab = "Alpha (%)", las = 1, ylim = 12*100*range(alphas1[1:(nCommodities-1)]))
  Arrows(x0 = 1, y0 = 3.8, x1 = 1, y1 = 1.4, lwd = 2)
  text(x = 4, y = 2.7, "More Mean Reversion")
}


## Sigmas
sig1 <- array(0, nCommodities-1)
sigmasMA <- matrix(0, nrow=nCommodities-1, ncol=nObservations-startShift)
for(t in startShift:(nObservations-1)) {
  tempSigma <- sig1
  sig1 <- sig1 + log( (pSharesName[orders[1:(nCommodities-1), t], t+1]/pSharesName[orders[1:(nCommodities-1)+1, t], t+1])/(pSharesName[orders[1:(nCommodities-1), t], t]/pSharesName[orders[1:(nCommodities-1)+1, t], t]) )^2
  sigmasMA[, t + (1-startShift)] <- sig1 - tempSigma
  for(i in 1:n1Sigma) sigmasMA[, t + (1-startShift)] <- gfilter(sigmasMA[, t + (1-startShift)], filterRange) # Smooths sigmasMA, can be commented out for more speed
}
sig1 <- sig1/(nObservations-startShift)
sigmas1 <- sig1
for(i in 1:n1Sigma) sigmas1 <- gfilter(sigmas1, filterRange)
rm(tempSigma)

if(plotSigmas) {
  quartz('6', width=8, height=6)
  # Multiplying by 12^0.5 annualizes the Sigmas
  plot(12^0.5*100*sigmas1^0.5, type = 'l', xlab = "Rank", ylab = "Sigma (%)", las = 1, ylim = 12^0.5*100*range(sigmas1^0.5))
  Arrows(x0 = 21, y0 = 33.89, x1 = 21, y1 = 33.98, lwd = 2)
  text(x = 19, y = 33.94, "More Volatility")
}  


## Prediction vs data
startPVD <- 10

avgRelP <- array(1, nCommodities)
for(i in 1:nCommodities) {
  avgRelP[i] <- mean(relPRank[i, startPVD:nObservations])
}

minRelP <- array(0, nCommodities)
maxRelP <- array(0, nCommodities)
for(i in 1:nCommodities) {
  minRelP[i] <- min(relPRank[i, startPVD:nObservations])
  maxRelP[i] <- max(relPRank[i, startPVD:nObservations])
}

relP <- rP(alphas1, sigmas1)
if(plotOutcome) {
  quartz('9', width=8, height=6) 
  plot(relP, type = 'l', xlab="Rank", ylab="Price Relative to Average", log = 'xy', yaxt = 'n', ylim = range(minRelP, maxRelP))
  lines(avgRelP, lty = 2, col = 2)
  lines(minRelP, lty = 3, col = 3)
  lines(maxRelP, lty = 3, col = 3)
  axis(side=2, at = c(0.3, 0.6, 1.2, 2.4, 4.8), label = expression(0.3, 0.6, 1.2, 2.4, 4.8), las = 1)
  legend(1, 0.4, c("Predicted", "Average for 1981 - 2015", "Maximum/Minimum for 1981 - 2015"), bty = "n", lty = c(1, 2, 3), col = c(1, 2, 3))
}


## Boostrap for Standard Errors
nSims <- 10000
significance <- 95
periods <- (nObservations-startShift+1) - 1
alphasBS <- matrix(nrow = nSims, ncol = nCommodities)
sigmasBS <- matrix(nrow = nSims, ncol = nCommodities-1)


for(i in 1:nSims) {
  obs <- sample(1:periods, periods, replace = T)
  
  ltBS <- matrix(0, nrow=nCommodities-1, ncol=periods+1)
  for(t in 1:periods) {
    index <- t + (startShift-1)
    t <- obs[t] + (startShift-1)
    w0 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t])
    w1 <- cumsum(pSharesName[orders[1:(nCommodities-1), t+1], t+1])
    w2 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t+1])
    w3 <- pSharesName[orders[1:(nCommodities-1), t], t]
    ltBS[, (index+1) + (1-startShift)] <- ltBS[, index + (1-startShift)] + 2*w0*log(w1/w2)/w3
  }
  
  ltFinalBS <- ltBS[, periods+1]/(periods+1)
  ltFinalBS <- gfilter(ltFinalBS, -filterRange)
  alphas <- array(0, nCommodities)
  alphas[1] <- -0.5*ltFinalBS[1]
  alphas[2:(nCommodities-1)] <- -0.5*diff(ltFinalBS)
  for(j in 1:n1Alpha) alphas[1:(nCommodities-1)] <- gfilter(alphas[1:(nCommodities-1)], filterRange)
  alphas[nCommodities] <- alphas[nCommodities] - sum(alphas)
  
  sig <- array(0, nCommodities-1)
  for(t in 1:periods) {
    t <- obs[t] + (startShift-1)
    sig <- sig + log( (pSharesName[orders[1:(nCommodities-1), t], t+1]/pSharesName[orders[1:(nCommodities-1)+1, t], t+1])/(pSharesName[orders[1:(nCommodities-1), t], t]/pSharesName[orders[1:(nCommodities-1)+1, t], t]) )^2
  }
  sig <- sig/periods
  sigmas <- sig
  for(j in 1:n1Sigma) sigmas <- gfilter(sigmas, filterRange)
  
  alphasBS[i, ] <- alphas
  sigmasBS[i, ] <- sigmas
}
rm(alphas, sigmas, sig, ltBS, ltFinalBS)


cIntervalAlphas <- matrix(nrow = 2, ncol = nCommodities-1)
cIntervalSigmas <- matrix(nrow = 2, ncol = nCommodities-1)
for(i in 1:(nCommodities-1)) {
  cIntervalAlphas[, i] <- quantile(alphasBS[1:nSims, i], probs = c(0.5*(1 - significance/100), 1 - 0.5*(1 - significance/100)))
  cIntervalSigmas[, i] <- quantile(sigmasBS[1:nSims, i], probs = c(0.5*(1 - significance/100), 1 - 0.5*(1 - significance/100)))
}

# Multiplying by 12 annualizes the Alphas  
quartz('10', width=8, height=6)
plot(12*100*alphas1[1:(nCommodities-1)], type = 'l', xlab = "Rank", ylab = "Alpha (%)", las = 1, ylim = range(c(12*100*cIntervalAlphas, 12*100*alphas1[1:(nCommodities-1)])) )
lines(12*100*cIntervalAlphas[1, ], col = 2, lty = 2)
lines(12*100*cIntervalAlphas[2, ], col = 2, lty = 2)
Arrows(x0 = 1, y0 = 5, x1 = 1, y1 = 2.2, lwd = 2)
text(x = 4, y = 3.6, "More Mean Reversion")


# Multiplying by 12^0.5 annualizes the Sigmas
quartz('11', width=8, height=6)
plot(12^0.5*100*sigmas1^0.5, type = 'l', xlab = "Rank", ylab = "Sigma (%)", las = 1, ylim = c(30.5, 36.5) )
lines(12^0.5*100*cIntervalSigmas[1, ]^0.5, col = 2, lty = 2)
lines(12^0.5*100*cIntervalSigmas[2, ]^0.5, col = 2, lty = 2)
Arrows(x0 = 21, y0 = 35.2, x1 = 21, y1 = 36.4, lwd = 2)
text(x = 19, y = 35.8, "More Volatility")


## Calculate growth rates for top 1 vs bottom N-1, top 2 vs bottom N-2, etc... 
startMonth <- 5
rankGroups <- matrix(100, nrow=(nCommodities-1)*2, ncol=nObservations-(startMonth-1))

for(t in 2:(nObservations-(startMonth-1)) ) {
  for(i in seq(1, (nCommodities-1)*2, 2)) {
    rankGroups[i, t] <- rankGroups[i, t-1]*( sum(fullData[orders[1:ceiling(i/2), t+(startMonth-1)-1], t+(startMonth-1)])/sum(fullData[orders[1:ceiling(i/2), t+(startMonth-1)-1], t+(startMonth-1)-1]) )
    rankGroups[i+1, t] <- rankGroups[i+1, t-1]*( sum(fullData[orders[(ceiling(i/2)+1):nCommodities, t+(startMonth-1)-1], t+(startMonth-1)])/sum(fullData[orders[(ceiling(i/2)+1):nCommodities, t+(startMonth-1)-1], t+(startMonth-1)-1]) )
  }
}
rankGroupsLog <- log(rankGroups)

# Monthly growth rates
rankGrowth <- matrix(100, nrow=(nCommodities-1)*2, ncol=nObservations-(startMonth-1)-1)
rankGrowthLog <- matrix(100, nrow=(nCommodities-1)*2, ncol=nObservations-(startMonth-1)-1)
rankGrowthDiff <- matrix(100, nrow=(nCommodities-1)*2, ncol=nObservations-(startMonth-1)-1)
for(i in 1:((nCommodities-1)*2) ) {
  rankGrowth[i, ] <- diff(rankGroups[i, ])/rankGroups[i, 1:(nObservations-(startMonth-1)-1)]
  rankGrowthLog[i, ] <- diff(rankGroupsLog[i, ])
}
for(i in seq(1, (nCommodities-1)*2, 2)) {
  rankGrowthDiff[i, ] <- rankGrowth[i, ] - rankGrowth[i+1, ]
  rankGrowthDiff[i+1, ] <- rankGrowthLog[i, ] - rankGrowthLog[i+1, ]
}
for(i in seq(1, (nCommodities-1)*2, 2)) {
  print( 100*c((i+1)/200, mean(rankGrowthDiff[i, ]), mean(rankGrowthDiff[i+1, ])) )
  print( 100*c( var(rankGrowthDiff[i, ])^0.5, var(rankGrowthDiff[i+1, ])^0.5) )
  print( c( mean(rankGrowthDiff[i, ])/(var(rankGrowthDiff[i, ])/(nObservations-(startMonth-1)-1))^0.5, mean(rankGrowthDiff[i+1, ])/(var(rankGrowthDiff[i+1, ])/(nObservations-(startMonth-1)-1))^0.5) )
}


## Out-of-Sample 1-month Forecasts of Ranked Commodity Groups Using Alphas (Estimate Alphas up to startMonth and Forecast Afterwards)
startShift <- 1
startMonth <- 120

# Estimate Alphas using Data from startShift to startMonth
filterRangeForecast <- ceiling(nCommodities*0.4)
nAlphaForecast <- 1

ltForecast <- matrix(0, nrow=nCommodities-1, ncol=startMonth-startShift+1)
for(t in startShift:(startMonth-1)) {
  w0 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t])
  w1 <- cumsum(pSharesName[orders[1:(nCommodities-1), t+1], t+1])
  w2 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t+1])
  w3 <- pSharesName[orders[1:(nCommodities-1), t], t]
  ltForecast[, (t+1) + (1-startShift)] <- ltForecast[, t + (1-startShift)] + 2*w0*log(w1/w2)/w3
}

ltFinalForecast <- ltForecast[, startMonth-startShift+1]/(startMonth-startShift+1)
ltFinalForecast <- gfilter(ltFinalForecast, -filterRangeForecast)
alphasForecast <- array(0, nCommodities)
alphasForecast[1] <- -0.5*ltFinalForecast[1]
alphasForecast[2:(nCommodities-1)] <- -0.5*diff(ltFinalForecast)
for(i in 1:nAlphaForecast) alphasForecast[1:(nCommodities-1)] <- gfilter(alphasForecast[1:(nCommodities-1)], filterRangeForecast)
alphasForecast[nCommodities] <- alphasForecast[nCommodities] - sum(alphasForecast)

# Generate Forecasts for Data after startMonth using Estimates of Alphas from before startMonth
forecast <- matrix(100, nrow=nCommodities, ncol=nObservations-(startMonth-1)-1)
errors <- matrix(0, nrow=nCommodities, ncol=nObservations-(startMonth-1)-1)
errorsRW <- matrix(0, nrow=nCommodities, ncol=nObservations-(startMonth-1)-1)

for( t in 1:(nObservations-(startMonth-1)-1) ) {
  forecast[, t] <- log( cumsum(relPName[orders[, t+(startMonth-1)], t+(startMonth-1)]) ) + cumsum(alphasForecast)*relPName[orders[, t+(startMonth-1)], t+(startMonth-1)]/cumsum(relPName[orders[, t+(startMonth-1)], t+(startMonth-1)])
  errors[, t] <- log( cumsum(relPName[orders[, t+(startMonth-1)], t+startMonth]) ) - forecast[, t]
  errorsRW[, t] <- log( cumsum(relPName[orders[, t+(startMonth-1)], t+startMonth]) ) - log( cumsum(relPName[orders[, t+(startMonth-1)], t+(startMonth-1)]) )
}

errorSums <- rowSums(errors^2)
errorSums <- errorSums^0.5
errorSumsRW <- rowSums(errorsRW^2)
errorSumsRW <- errorSumsRW^0.5
print(c(errorSums/errorSumsRW, sum(errorSums)/sum(errorSumsRW)))


## Out-of-Sample 1-month Forecasts of Ranked Commodity Groups Using Alphas (Rolling Estimates of Alphas while Forecasting)
startShift <- 1
monthStart <- 120

filterRangeForecast <- ceiling(nCommodities*0.4)
nAlphaForecast <- 1

forecast <- matrix(100, nrow=nCommodities, ncol=nObservations-(monthStart-1)-1)
errors <- matrix(0, nrow=nCommodities, ncol=nObservations-(monthStart-1)-1)
errorsRW <- matrix(0, nrow=nCommodities, ncol=nObservations-(monthStart-1)-1)

for(rollingStartMonth in monthStart:(nObservations-1)) {
  # Estimate Alphas using Data from startShift to rollingStartMonth, which ranges from monthStart to nObservations-1
  ltForecast <- matrix(0, nrow=nCommodities-1, ncol=rollingStartMonth-startShift+1)
  for(t in startShift:(rollingStartMonth-1)) {
    w0 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t])
    w1 <- cumsum(pSharesName[orders[1:(nCommodities-1), t+1], t+1])
    w2 <- cumsum(pSharesName[orders[1:(nCommodities-1), t], t+1])
    w3 <- pSharesName[orders[1:(nCommodities-1), t], t]
    ltForecast[, (t+1) + (1-startShift)] <- ltForecast[, t + (1-startShift)] + 2*w0*log(w1/w2)/w3
  }
  
  ltFinalForecast <- ltForecast[, rollingStartMonth-startShift+1]/(rollingStartMonth-startShift+1)
  ltFinalForecast <- gfilter(ltFinalForecast, -filterRangeForecast)
  alphasForecast <- array(0, nCommodities)
  alphasForecast[1] <- -0.5*ltFinalForecast[1]
  alphasForecast[2:(nCommodities-1)] <- -0.5*diff(ltFinalForecast)
  for(i in 1:nAlphaForecast) alphasForecast[1:(nCommodities-1)] <- gfilter(alphasForecast[1:(nCommodities-1)], filterRangeForecast)
  alphasForecast[nCommodities] <- alphasForecast[nCommodities] - sum(alphasForecast)
  
  # Generate Forecasts for Data in Month just after rollingStartMonth using Estimates of Alphas from before rollingStartMonth
  forecast[, rollingStartMonth-monthStart+1] <- log( cumsum(relPName[orders[, rollingStartMonth], rollingStartMonth]) ) + cumsum(alphasForecast)*relPName[orders[, rollingStartMonth], rollingStartMonth]/cumsum(relPName[orders[, rollingStartMonth], rollingStartMonth])
  errors[, rollingStartMonth-monthStart+1] <- log( cumsum(relPName[orders[, rollingStartMonth], rollingStartMonth+1]) ) - forecast[, rollingStartMonth-monthStart+1]
  errorsRW[, rollingStartMonth-monthStart+1] <- log( cumsum(relPName[orders[, rollingStartMonth], rollingStartMonth+1]) ) - log( cumsum(relPName[orders[, rollingStartMonth], rollingStartMonth]) )
}

errorSums <- rowSums(errors^2)
errorSums <- errorSums^0.5
errorSumsRW <- rowSums(errorsRW^2)
errorSumsRW <- errorSumsRW^0.5
print(c(errorSums/errorSumsRW, sum(errorSums)/sum(errorSumsRW)))


## Optimal smoothings
estimateModel(dat=pSharesName, ords=orders, nSmooths=c(n1Alpha, n1Sigma), fRange=filterRange, len=nCommodities, nObs=nObservations, stShift=startShift, stPVD=startPVD)

numSmoothsAlpha <- 10
ests <- matrix(0, nrow=numSmoothsAlpha, ncol=2)
for(i in 1:numSmoothsAlpha) {
  nSmths <- c(i, n1Sigma)
  ests[i, ] <- estimateModel(dat=pSharesName, ords=orders, nSmooths=nSmths,fRange=filterRange, len=nCommodities, nObs=nObservations, stShift=startShift, stPVD=startPVD)
}
c(which(ests[, 1] == min(ests[, 1])), min(ests[, 1]), which(ests[, 2] == min(ests[, 2])), min(ests[, 2]))
ests

n1Alpha <- 1
numSmoothsSigma <- 100
ests <- matrix(0, nrow=numSmoothsSigma, ncol=2)
for(i in 1:numSmoothsSigma) {
  nSmths <- c(n1Alpha, i)  
  ests[i, ] <- estimateModel(dat=pSharesName, ords=orders, nSmooths=nSmths,fRange=filterRange, len=nCommodities, nObs=nObservations, stShift=startShift, stPVD=startPVD)
}
c(which(ests[, 1] == min(ests[, 1])), min(ests[, 1]), which(ests[, 2] == min(ests[, 2])), min(ests[, 2]))
ests


## Simulations to Test Efficiency of Local-Time-Based Estimators
nAgents <- 300
cutoff <- 100
nSimulations <- 1000
nPeriods <- 12*50
periodLength <- 10
filterRangeSims <- ceiling(0.4*nAgents)

commonAlpha <- -0.02/12
commonSigma <- (0.04/12)^0.5 # Together with alpha = -0.01, this gives you Zipf's law (i.e. slope of -1 in the log-log plot of shares vs. rank)
#commonAlpha <- -0.2/12
#commonSigma <- (0.4/12)^0.5 # Together with alpha = -0.02, this gives you Zipf's law (i.e. slope of -1 in the log-log plot of shares vs. rank)
alphasSim <- array(commonAlpha, nAgents) # Use alphasSim for simulation; these alphas do not add up to zero, and in this case are all equal to commonAlpha
alphas <- alphasSim
alphas[nAgents] <- -sum(alphas[1:(nAgents-1)])  # In contrast to alphasSim, alphas are the fully correct alphas and add up to zero
aSums <- cumsum(alphas)
sigmas <- array(commonSigma, nAgents)

startShares <- array(1, nAgents)
for(i in 2:nAgents) {
  startShares[i] <- startShares[i-1] + 0.25*(sigmas[i]^2 + sigmas[i-1]^2)/aSums[i-1]
}
rm(aSums)
startShares <- exp(startShares)/sum(exp(startShares))
ordersTemp <- order(startShares, decreasing=T)

estimatePeriods <- 12*c(1, 2, 3, 4, 5, 10, 20, 30, 40, 50)

errorAlphas <- matrix(0, nrow=length(estimatePeriods), ncol=nSimulations)
errorAlphasDirect <- matrix(0, nrow=length(estimatePeriods), ncol=nSimulations) 

for(i in 1:nSimulations) {
  simResult <- matrix(0, nrow=nAgents, ncol=nPeriods)
  orders <- matrix(0, nrow=nAgents, ncol=nPeriods)
  shares <- startShares
  ordersTemp <- order(shares, decreasing=T)
  orders[, 1] <- ordersTemp
  simResult[, 1] <- shares
  index <- 1
  
  # Simulate economy for nPeriods, with each period being of length periodLength; 
  for(p in 1:(nPeriods-1)) {
    for(t in 1:periodLength) {
      shares <- log(shares)
      shares[ordersTemp] <- shares[ordersTemp] + alphasSim/periodLength + rnorm(nAgents, mean=0, sd=sigmas/periodLength^0.5)
      
      ordersTemp <- order(shares, decreasing=F)
      total <- 0
      for(j in 1:(nAgents-1)) {
        total <- total + shares[ordersTemp[j]]
        if( j*shares[ordersTemp[j+1]] - total > 0.5*alphas[nAgents]/periodLength ) break
      }
      pivot <- (2*total + alphas[nAgents]/periodLength)/j
      shares[ordersTemp[1:j]] <- pivot - shares[ordersTemp[1:j]]
    
      shares <- exp(shares)/sum(exp(shares)) # Turn shares into non-log shares that add up to one (need to do something like this to keep shares from getting too big)
      ordersTemp <- order(shares, decreasing=T)
    }
    orders[, p+1] <- ordersTemp
    simResult[, p+1] <- shares
    
    
    # Estimate alphas if period p is in array estimatePeriods
    if(is.element(p+1, estimatePeriods)) {
      # Estimate alphas using standard local-time-based method and then record estimates
      lt <- matrix(0, nrow=nAgents-1, ncol=p+1)
      for(t in 1:p) {
        w0 <- cumsum(simResult[orders[1:(nAgents-1), t], t])
        w1 <- cumsum(simResult[orders[1:(nAgents-1), t+1], t+1])
        w2 <- cumsum(simResult[orders[1:(nAgents-1), t], t+1])
        w3 <- simResult[orders[1:(nAgents-1), t], t]
        lt[, t+1] <- lt[, t] + 2*w0*log(w1/w2)/w3
      }
      
      ltFinal <- lt[, p+1]/(p+1)
      ltFinal <- gfilter(ltFinal, -filterRangeSims)
      alphasTemp <- array(0, nAgents)
      alphasTemp[1] <- -0.5*ltFinal[1]
      alphasTemp[2:(nAgents-1)] <- -0.5*diff(ltFinal)
      alphasTemp[1:(nAgents-1)] <- gfilter(alphasTemp[1:(nAgents-1)], filterRangeSims)
      alphasTemp[nAgents] <- alphasTemp[nAgents] - sum(alphasTemp)
      
      errorAlphas[index, i] <- sum( (alphasTemp[1:(cutoff-1)] - alphas[1:(cutoff-1)])^2 )
      
      # Estimate alphas directly and then record estimates
      alphasTemp <- array(0, nAgents)
      for(t in 1:p) {
        alphasTemp <- alphasTemp + log(simResult[orders[, t], t+1]/simResult[orders[, t], t])
      }
      alphasTemp <- alphasTemp/p
      alphasTemp <- gfilter(alphasTemp, filterRangeSims)
      alphasTemp[nAgents] <- -sum(alphasTemp[1:(nAgents-1)])
      
      errorAlphasDirect[index, i] <- sum( (alphasTemp[1:(cutoff-1)] - alphas[1:(cutoff-1)])^2 )
      
      index <- index + 1
      rm(alphasTemp, lt, ltFinal)
    }
    
  }
  rm(shares, ordersTemp, simResult, orders)
} 


avgError <- rowMeans(errorAlphas)
avgErrorDirect <- rowMeans(errorAlphasDirect)
print(100*avgError^0.5)
print(100*avgErrorDirect^0.5)

rm(alphasSim, startShares, shares, sigmas, errorAlphas, errorAlphasDirect, avgAlphas, avgAlphasDirect)

