#Replicating The Impact of Data Revisions on the Robustness of Growth Determinants 
#A Note on 'Determinants of Economic Growth. Will Data Tell?'} 
#Martin Feldkircher (martin.feldkircher@gzpace.net) and Stefan Zeugner (stefan.zeugner@ulb.ac.be)

#____Abstract___________________
#Ciccone and Jarocinski (2010) show that inference in Bayesian Model Averaging (BMA) can be highly sensitive to small changes in international income data. 
#In particular they demonstrate that the importance attributed to potential growth determinants varies tremendously over different revisions of growth data. 
#They conclude that 'agnostic' priors appear too sensible for this strand of growth empirics. In response, we show that the found instability owes much to a 
#specific BMA set-up: First, comparing the same countries over data revisions improves robustness. Second, much of the remaining variation can be reduced by 
#applying an evenly 'agnostic', but flexible prior. This tutorial replicates the results provided in Feldkircher and Zeugner (2012). 
#________________________________



#We start with loading the data
  rm(list=ls(all.names=TRUE))
  require(BMS)
  #please make sure that your current work director is set to the one of this file
  load("pwt_replication.RData")
  


#dataList contains three vintages of PWT data: PWT 6.0, 6.1, 6.2 and 6.3. We first generate an index sorting for the countries that are common to all three vintages:


 commonCountries=intersect(rownames(dataList[[3]]),intersect(rownames(dataList[[1]]),rownames(dataList[[2]])))
 dataListC=list(pwt60t88=dataList$pwt60t88[commonCountries,],  pwt61t84=dataList$pwt61t84[commonCountries,], pwt62t79=dataList$pwt62t79[commonCountries,])



#We now proceed to perfoming the BMA estimation for the three PWT vintages and various coefficient priors.
#We have also defined an extremely small number of iterations for the BMA MCMC-sampler: \truettt{burn=100} (for the burn-in draws) and \truettt{iter=100} for 'counted' draws.



   iterNr=1000;burnNr=5000
bricList=hyperList=uipList=list()
  
  #The B-RIC prior (g=max(K^2,N)): 
bricList[[1]] = bms(dataList$pwt60t88,burn=burnNr, iter=iterNr, mprior="uniform",g="bric",user.int=FALSE)
bricList[[2]] = bms(dataList$pwt61t84,burn=burnNr, iter=iterNr, mprior="uniform",g="bric",user.int=FALSE)
bricList[[3]] = bms(dataList$pwt62t79,burn=burnNr, iter=iterNr, mprior="uniform",g="bric",user.int=FALSE)

  #The hyper-g prior (g/(1+g) ~Beta(1,)):
hyperList[[1]] = bms(dataList$pwt60t88,burn=burnNr, iter=iterNr, mprior="uniform",g="hyper=BRIC",user.int=FALSE)
hyperList[[2]] = bms(dataList$pwt61t84,burn=burnNr, iter=iterNr, mprior="uniform",g="hyper=BRIC",user.int=FALSE)
hyperList[[3]] = bms(dataList$pwt62t79,burn=burnNr, iter=iterNr, mprior="uniform",g="hhyper=BRIC",user.int=FALSE)

  #The Unit information prior: g=N
uipList[[1]] = bms(dataList$pwt60t88,burn=burnNr, iter=iterNr, mprior="uniform",g="uip",user.int=FALSE)
uipList[[2]] = bms(dataList$pwt61t84,burn=burnNr, iter=iterNr, mprior="uniform",g="uip",user.int=FALSE)
uipList[[3]] = bms(dataList$pwt62t79,burn=burnNr, iter=iterNr, mprior="uniform",g="uip",user.int=FALSE)


names(bricList)=names(hyperList)=names(uipList)=c("pwt60t88", "pwt61t84", "pwt62t79")




  #The same for the common country samples
bricListC=hyperListC=uipListC=list()
bricListC[[1]] = bms(dataListC$pwt60t88,burn=burnNr, iter=iterNr, mprior="uniform",g="bric", user.int=FALSE)
bricListC[[2]] = bms(dataListC$pwt61t84,burn=burnNr, iter=iterNr, mprior="uniform",g="bric", user.int=FALSE)
bricListC[[3]] = bms(dataListC$pwt62t79,burn=burnNr, iter=iterNr, mprior="uniform",g="bric", user.int=FALSE)

hyperListC[[1]] = bms(dataListC$pwt60t88,burn=burnNr, iter=iterNr, mprior="uniform",g="hyper=BRIC", user.int=FALSE)
hyperListC[[2]] = bms(dataListC$pwt61t84,burn=burnNr, iter=iterNr, mprior="uniform",g="hyper=BRIC", user.int=FALSE)
hyperListC[[3]] = bms(dataListC$pwt62t79,burn=burnNr, iter=iterNr, mprior="uniform",g="hhyper=BRIC", user.int=FALSE)

uipListC[[1]] = bms(dataListC$pwt60t88,burn=burnNr, iter=iterNr, mprior="uniform",g="uip", user.int=FALSE)
uipListC[[2]] = bms(dataListC$pwt61t84,burn=burnNr, iter=iterNr, mprior="uniform",g="uip", user.int=FALSE)
uipListC[[3]] = bms(dataListC$pwt62t79,burn=burnNr, iter=iterNr, mprior="uniform",g="uip", user.int=FALSE)

names(bricListC)=names(hyperListC)=names(uipListC)=c("pwt60t88", "pwt61t84", "pwt62t79")




############## Some helper functions ###########################
  pipsfromlist = function(bmalist) {
    #returns the PIPs from a list of bma objects
    rnames=variable.names(hyperList[[1]])[-1]
    return( sapply(bmalist,function(x) coef.bma(x,order.by.pip=FALSE)[rnames,"PIP"]) )
  }
  
  
  maxminrat = function(bmalist) {
    #calculates MAx/Min ratios for PWT triplets
    opips=pipsfromlist(bmalist)
    indivmaxmin = lapply(list(1:3,1:2,c(1,3),2:3),function(ix) apply(opips[,ix],1,max)/apply(opips[,ix],1,min))  
    return( sapply(indivmaxmin, function(x) mean(x[is.finite(x)])) )
  }
  
  fmat=function(x,dig=4) format(round(x,dig),nsmall=dig)
  
  #require(xtable)
  
############## Reproducing Table 1 ###########################
  pipMax_MinV = cbind(maxminrat(bricList),maxminrat(uipList),maxminrat(hyperList) )
  colnames( pipMax_MinV)=c("g=K^2","UIP","hyper-g")
  rownames(pipMax_MinV)=c("Overall Max / Min Ratio", "PWT 6.0 vs. PWT 6.1","PWT 6.0 vs. PWT 6.2","PWT 6.1 vs. PWT 6.2")

  pipMax_MinC = cbind(maxminrat(bricListC),maxminrat(uipListC),maxminrat(hyperListC) )
  colnames( pipMax_MinC)=paste(colnames( pipMax_MinV),"*",sep=""); rownames(pipMax_MinC)=rownames(pipMax_MinV)
  
  
  #____ the rest of this section is just for display_________
  pipMax_Min=rbind(cbind(rownames(pipMax_MinV),fmat(pipMax_MinV)) ,
          c("Common Sample", colnames(pipMax_MinC)),
          cbind(rownames(pipMax_MinC),fmat(pipMax_MinC)))
  colnames(pipMax_Min)=c("Varying Sample",colnames(pipMax_MinV)); rownames(pipMax_Min)=NULL
  print(pipMax_Min)
  
############## Reproducing Table 2 ###########################
  SMv=sapply(list(bricList,uipList,hyperList),function(x) sapply(x,function(y) y$gprior.info$shrinkage.moments[1]))
  SMc=sapply(list(bricListC,uipListC,hyperListC),function(x) sapply(x,function(y) y$gprior.info$shrinkage.moments[1]))
  colnames(SMv)=c("g=K^2", "g=N", "hyper-g"); colnames(SMc)=paste(colnames(SMv),"*",sep="")
  rownames(SMv)=rownames(SMc)=c("PWT 6.0", "PWT 6.1", "PWT 6.2")
  
  #____ the rest of this section is just for display_________
  SM=rbind(cbind(rownames(SMv),fmat(SMv)) ,
          c("Common Sample", colnames(SMc)),
          cbind(rownames(SMc),fmat(SMc)))
  colnames(SM)=c("Varying Sample",colnames(SMv)); rownames(SM)=NULL
  print(SM)
  


############## Reproducing Table 3 ###########################
  rNames=c("GDPCAP0",as.character(unlist(nameMatching[,3])))
  bric=sapply(bricList,function(x) coef.bma(x)[rNames,"PIP"]) #left panel
  bricC=sapply(bricListC,function(x) coef.bma(x)[rNames,"PIP"]) #center panel
  hyperC=sapply(hyperListC,function(x) coef.bma(x)[rNames,"PIP"]) #right panel
  
  
  
  #____ the rest of this section is just for display_________
  TableCicc=fmat(cbind(bric, bricC,hyperC),dig=2);
  rownames(TableCicc)[1:67]=c("GDP in 1960 (log)",as.character(nameMatching[,2]))
  CicVars=c("Fraction Confucius","Population Density 1960","Population Density Coastal in 1960s",
                     "East Asian Dummy","Fertility in 1960s ","Investment Price","African Dummy", "Fraction of Tropical Area")

  for(i in 1:length(CicVars)){
    #TableCicc[CicVars[i],]=paste("\\bf{", TableCicc[CicVars[i],],"}",sep="")
  }
  rownames(TableCicc)[rownames(TableCicc) %in% CicVars]=paste("\\bf{",CicVars,"}",sep="")

  TableCicc=rbind(TableCicc,c(88,84,79,rep(79,3*2)))
  rownames(TableCicc)[nrow(TableCicc)]="\\# Obs."; colnames(TableCicc)=rep(c("6.0", "6.1", "6.2"),3)


############## Reproducing Figure 1 ###########################
  nbtmods=min(sapply(list(bricListC, uipListC, hyperListC), function(x) length(x$pwt60t88$topmod$lik())))
  
  pmpGraphik=cbind(cumsum(pmp.bma(bricListC$pwt60t88)[1:nbtmods,1]),
             cumsum(pmp.bma(uipListC$pwt60t88)[1:nbtmods,1]),
             cumsum(pmp.bma(hyperListC$pwt60t88)[1:nbtmods,1]))
             
    colnames(pmpGraphik)=c("g=K^2*","g=N*","hyper-g*")
  
  #____ the rest of this section is just for display_________
  Colours=c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E" ,"#E6AB02", "#A6761D", "#666666")
  par(mar=c(6, 4, 4, 2))  #c(bottom, left, top, right)
  matplot(pmpGraphik[,c(1,2,3)],type="l",col=Colours[c(1,3,2)],main="PMP Distribution (PWT 6.1, common sample)",ylab="Cumulative Posterior Model Prob. (in %)",
          xlab="Model rank according to PMP",lty=c(1,2,9),lwd=2,log="y",yaxt="n")
  legend("bottomright", colnames(pmpGraphik)[c(1,2,3)],lty=c(1,2,9),col=Colours[c(1,3,2,6)],bty="n",lwd=2)
   grid()
  Labels=paste(axTicks(2)*100,"%",sep="");#Labels[1]="0.0001%"
  axis(2, at = axTicks(2), labels =Labels)
  layout(matrix(1))
  par(mar=c(5, 4, 4, 2) + 0.1)

  
  
############## Reproducing Figure 2 ###########################

  #find number of best models: this ist just necessary with very few MCMC iterations
  nbtmods=min(sapply(list(bricList, uipList, hyperList), function(x) length(x$pwt60t88$topmod$lik())))
  
  pmpGraphik=cbind(cumsum(pmp.bma(bricList$pwt60t88)[1:nbtmods,1]),
             cumsum(pmp.bma(uipList$pwt60t88)[1:nbtmods,1]),
             cumsum(pmp.bma(hyperList$pwt60t88)[1:nbtmods,1]))
  colnames(pmpGraphik)=c("g=K^2","g=N","hyper-g")
  
  #____ the rest of this section is just for display_________
  Colours=c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E" ,"#E6AB02", "#A6761D", "#666666")
  par(mar=c(6, 4, 4, 2))  #c(bottom, left, top, right)
  matplot(pmpGraphik[,1:3],type="l",col=Colours[c(1,3,2,6)],main="PMP Distribution (PWT 6.1, varying sample)",ylab="Cumulative Posterior Model Prob. (in %)",
          xlab="Model rank according to PMP",lty=c(1,2,9),lwd=2,log="y",yaxt="n")
  legend("bottomright", colnames(pmpGraphik)[1:3],lty=c(1,2,9),col=Colours[c(1,3,2,6)],bty="n",lwd=2)
  grid()
  Labels=paste(axTicks(2)*100,"%",sep="");#Labels[1]="0.0001%"
  axis(2, at = axTicks(2), labels =Labels)
  #axis(1, las=2, at = 1:nrow(PIP), label = rownames(PIP),cex.axis=0.5)
  layout(matrix(1))
  par(mar=c(5, 4, 4, 2) + 0.1)
