/*
**
**  Program MLKern.ox
**
**  Purpose:
**    Calculate the log-marginal likelihood of a model, using the 
**    LaPlace, harmonic mean and kernel method. This calculation is
**    only possible for models where the analytical (log)likelihood 
**    function is known.
**
**  Version:
**    4
**
**  Author:
**    Charles Bos
**
**  Date:
**    7 november 2000
**
*/
#include <oxstd.h> // include the Ox standard library header
#include <oxfloat.h> // M_2PI declaration
#import <maximize>
#include "include/libkern.ox"   // This library comes with the GnuDraw package
#include "include/info.ox"
#include "include/size.ox"

// Function declarations
CalcCovar(const vP, const nData);
LnPdf_Loc(const vP, const adFunc, const avScore, const amHessian);
LnPdfExPrior_Loc(const vP, const adFunc);
Initialize(const avInitVP, const ainData, const amBounds, const avFreePars);
LoadTheta(const SimFile, DataFrac);
CalcMMS2(const SimBase, const DataFrac, const avTheta_Mean, 
         const avTheta_Median, const avTheta_Mode, 
         const amTheta_S2);
CalcIntegConstant(const SimBase, const mTheta, const DataFrac);

/* Include declarations of the model  */
#include "simox.dec"

main()                // function main is the starting point
{
  decl 
    dLnPdf, vP, ir, SimBase, SimFile, nRot, nData, nDim, dDataFrac,
    vTheta_Mean, vTheta_Median, vTheta_Mode, mTheta_S2, dTime,
    mM1, mM2, mM3, mM4, mM5, mS2, mIC, mX, i, mTheta, OutFile, fh;

  /* Initialize */
  dTime= timer();
  Initialize(&g_InitVP, &g_nData, &g_Bounds, &g_FreePars);

  nRot= sizeof(g_Flex_nMH);
  SimBase= sprint(g_OutDir, "/", g_VersFile);
  OutFile= sprint(SimBase, "mlkern.out");
  fh= fopen(OutFile, "l");

  println("MLKern.OX");
  println("----------");
  print("Calculating marginal likelihood, using parameters: ");
  print(g_VarNames[g_FreePars]);
  println("Using model:");
  println(g_Vers);

  dDataFrac= 1;         // Put to e.g. .5 to use half of the sample
  nDim= columns(g_InitVP);
  CalcMMS2(SimBase, dDataFrac, &vTheta_Mean, &vTheta_Median, 
           &vTheta_Mode, &mTheta_S2);

  /* Calculate using LaPlace, first at overal mean and cov */
  println("Calculating mM1: LaPlace at sample mean and covariance");
  ir= LnPdf_Loc(vTheta_Mean, &dLnPdf, 0, 0);
  mM1= (nDim/2)*log(M_2PI)+0.5*determinant(mTheta_S2)+dLnPdf;

  /* Calculate using LaPlace, then at overal median and cov */
  println("Calculating mM2: LaPlace at sample median and numerical covariance");
  ir= LnPdf_Loc(vTheta_Median, &dLnPdf, 0, 0);
  mS2= CalcCovar(vTheta_Median, g_nData);
  mM2= (nDim/2)*log(M_2PI)+0.5*determinant(mS2)+dLnPdf;

  /* Calculate using LaPlace, then at overal mode and cov */
  println("Calculating mM3: LaPlace at sample mode and numerical covariance");
  mS2= CalcCovar(vTheta_Mode, g_nData);
  ir= LnPdf_Loc(vTheta_Mode, &dLnPdf, 0, 0);
  mM3= (nDim/2)*log(M_2PI)+0.5*determinant(mS2)+dLnPdf;

  /* Calculate using kernels, around the mean, median and mode */ 
  println("Calculating mM4: Kernel at sample mean, median and mode");
  mX= vTheta_Mean~vTheta_Median~vTheta_Mode;
  mIC= CalcIntegConstant(SimBase, mX, dDataFrac);
  mM4= zeros(1, 3);
  for (i= 0; i < 3; ++i)
    {
      ir= LnPdf_Loc(mX[][i], &dLnPdf, 0, 0);
      mM4[i]= dLnPdf-log(mIC[i]);
    }

  /* Calculate using harmonic mean */
  println("Calculating mM5: Harmonic mean");
  SimFile = sprint(SimBase, ".fmt");
  mTheta = LoadTheta(SimFile, dDataFrac);
  nData= columns(mTheta);
  mM5= 0;
  for (i= 0; i < nData; ++i)
    {
      if (imod(i, g_Flex_InfoRep*4) .== 0)
        info(i, nData);
      ir= LnPdfExPrior_Loc(mTheta[][i], &dLnPdf);
      mM5+= 1/exp(dLnPdf);
    }
  mM5= -log(mM5)+log(rows(mTheta));

  print("Result using LaPlace, mean, median and mode: ", 
          "%12.4f", mM1~mM2~mM3);
  print("Result using kernels, mean, median and mode: ", 
          "%12.4f", mM4);
  print("Result using harmonic mean:                  ", 
          "%12.4f", mM5);

  println("Time elapsed:\n ", "%10s", timespan(dTime));
}

/*
**
**  Procedure CalcCovar(const vP, const nData)
**
**  Purpose:
**    Calculate the numerical approximation to the covariance
**    matrix as the inverse of minus the hessian, taking into
**    account that LnPdfMean_Loc/Shell returns the mean loglikelihood
**    function.
**
*/
CalcCovar(const vP, const nData)
{
  decl hess, invhess, result;

  result= Num2Derivative(LnPdf_Loc, vP, &hess);
  if (!result)
    {
      println("Covar() failed in numerical second derivatives");
      return zeros(vP);
    }

  invhess= invertgen(-hess, 30);
  return (invhess);
}


/*
**
**  Function LnPdfMean_Loc
**
**  Purpose:
**    Enlarge the parameter vector with the elements
**    that are not free.
**
*/
LnPdfMean_Loc(const vP, const adFunc, const avScore, const amHessian)
{
  decl vP_Loc;

  vP_Loc= g_InitVP;
  vP_Loc[g_FreePars]= vP;
  return LnPdf_Shell(vP_Loc, adFunc, avScore, amHessian);
}

/*
**
**  Function LnPdf_Loc
**
**  Purpose:
**    Enlarge the parameter vector with the elements
**    that are not free. Return Loglikelihood, not MEAN loglikelihood
**
*/
LnPdf_Loc(const vP, const adFunc, const avScore, const amHessian)
{
  decl vP_Loc, ir;

  vP_Loc= g_InitVP;
  vP_Loc[g_FreePars]= vP;
  ir= LnPdf_Shell(vP_Loc, adFunc, avScore, amHessian);
  adFunc[0]*= g_nData;
  return ir;
}

/*
**
**  Function LnPdfExPrior_Loc
**
**  Purpose:
**    Enlarge the parameter vector with the elements
**    that are not free, calculate likelihood excluding prior.
**
*/
LnPdfExPrior_Loc(const vP, const adFunc)
{
  decl vP_Loc, dLnPdf, dLnPrior, ir;

  vP_Loc= g_InitVP;
  vP_Loc[g_FreePars]= vP;

  ir= LnPdf_Loc(vP, &dLnPdf, 0, 0);
  dLnPrior= LnPrior_Shell(vP_Loc);
  
  adFunc[0]= dLnPdf - dLnPrior;
  return ir;
}


/*
**
**  Procedure Initialize
**
**  Purpose:
**    Call the PdfInit, set the bounds free etc.
**
*/
Initialize(const avInitVP, const ainData, const amBounds, const avFreePars)
{
  decl vP, Bounds, BoundsOrg, f;

  PdfInit(avInitVP, ainData);

  f= avFreePars[0];
  if (f == 0)
    f= range(0, rows(avInitVP[0])-1)';
  avFreePars[0]= f;

  vP= avInitVP[0][f];

  /* Set other bounds free */
  Bounds= (M_INF_NEG~M_INF).*(ones(avInitVP[0]));
  BoundsOrg= amBounds[0];
  Bounds[f][]= BoundsOrg[f][];
  amBounds[0]= Bounds;

}

/*
**
**  Procedure LoadTheta
**
**  Purpose:
**    Load a file with the Theta's, taking into account the fraction
**    of the data that is needed, and memory. 
**
**  Inputs:
**    SimFile     Name of the file with the Theta's, and possibly the
**                weights
**    DataFrac    Fraction of data requested. If < 1, then a random
**                fraction is selected; if > 1, then also a random
**                part of the data is selected of approximately
**                size DataFrac.
**
**  Return value:
**    Theta       Matrix nDim x nData with Theta's in columns
**
*/
LoadTheta(const SimFile, DataFrac)
{    
    decl nData, mTheta;
    
    mTheta= loadmat(SimFile);
    nData= rows(mTheta);
    if (nData > 0)
      {
        if (DataFrac < 1)
          DataFrac *= nData;
        if (DataFrac > 1)
          {
            DataFrac= min(DataFrac, nData);
            mTheta= thinr(mTheta, DataFrac);
          }
      }
    else
      {
        mTheta= 0;
        println("Warning: Simulation file ", SimFile, " not found");
      }
    
    return mTheta';
}

/*
**
**  Procedure CalcMMS2
**
**  Purpose:
**    Calculate Mean, Median, Mode and variance (w.r.t. mean) of the posterior
**
*/
CalcMMS2(const SimBase, const DataFrac, const avTheta_Mean, 
         const avTheta_Median, const avTheta_Mode, 
         const amTheta_S2)
{
  decl mTheta, SimFile, ir, dLnPdf, mS2;

  SimFile = sprint(SimBase, ".fmt");
  println ("Calculating posterior mean, median and variance from ", SimFile);

  mTheta = LoadTheta(SimFile, DataFrac);
  avTheta_Mean[0]= meanr(mTheta);
  avTheta_Median[0]= quantiler(mTheta);
  amTheta_S2[0]= variance(mTheta');
  mTheta= <>;

  avTheta_Mode[0]= avTheta_Median[0];
  mS2= amTheta_S2[0];
  ir= MaxBFGS(LnPdf_Loc, avTheta_Mode, &dLnPdf, &mS2, TRUE);
}

/*
**
**  CalcIntegConstant
**
**  purpose : calculate the integrating constant for the posterior using a 
**            kernel method
**
**  format  : CalcIntegConstant(const SimBase, const mTheta);
**
**  input   : SimFile   String indicating the base for the posterior file name 
**            mTheta    nDim x k matrix with Theta's, used in calculating the 
**                      integrating constant.
**  output  : r.v.      1 x k vector, estimated integrating constant
**
**  authors : Charles Bos
**
**  using   : libkern.ox
**
*/
CalcIntegConstant(const SimBase, const mTheta, const DataFrac)
{
  decl ir, SimFile, mThetaPost, vIC;

  SimFile = sprint(SimBase, ".fmt");
  mThetaPost= LoadTheta(SimFile, DataFrac);

  ir= s_KernelMult(mTheta, mThetaPost, &vIC);

  return vIC;
}


