#include <oxstd.h>
#include <oxfloat.h>
#include <oxdraw.h>
#include <oxprob.h>
#import <maximize>

const decl M_LOG2PI = 1.83787706640935;	// log(2*pi)
decl M_MAXITER = 500;	// max number of iterations in Student's ECM estimation
decl M_TOLERANCE = 0.001; // max variation of the loglik tollerated for convergence
decl M_VERB = 1; // 1 for verbose, 0 for silent

// enumerator for type of test
enum{NONE, RCONST, CONST, RTREND, TREND}
// NONE		: no deterministic
// RCONST	: restricted constant
// CONST	: unrestricted constant
// RTREND	: restricted trend
// TREND	: unrestricted linear trend (may lead to deterministic quadratic trends)

johansen(mDy, mYx, const mUx, const avEval, const amAlpha, const amBeta, ...)
// additional pars are		  const cCoint, const amGamma, const amV, const amErr)
// performs Gaussian ML for ECM model with cCoint cointegration relations
// mDy: (T x K1) matrix of first difference of original series
// mYx: (T x K2>=K1) matrix of 1-lagged levels of original series ~ restricted regressors
// mUx: (T x K3) matrix of unrestricted regressors (delays of mDy and unrestricted dummies)
// avEval: (K1 x 1) vector of eigenvalues (address)
// amAlpha:	adjustment coefficient matrix (address)
// amBeta:	cointegration matrix (address)
// optional (in case cointegration rank known)
// cCoint:  scalar rank of cointegration
// amGamma:	unrestricted regressors coefficients matrix (address)
// amV: covariance matrix of errors (address)
// amErr: error series (address)
// returns: Johansen trace statistics if OK and only standard parameters are present,
//			LogLik if optional parameters are present
//			0 if problems
{
	// initial checks & declarations
	decl aArgs=va_arglist();
	decl cSizeArgs=sizeof(aArgs);
	decl cT  = rows(mDy);
	decl cK1 = columns(mDy);
	decl cK2 = columns(mYx);
	decl cK3 = columns(mUx);

	// error checks
	if (cK2 < cK1) {println("Error in johansen(): K2<K1"); return 0;}
	if (cSizeArgs && (aArgs[0] < 0 || aArgs[0] > cK1))
		{println("Error in johansen(): wrong cointegration rank"); return 0;}

	// concentrating out the unrestricted regressors
	decl mBetaYxUx, mBetaDyUx;	// "prior" regression coefficients
    if (cK3)
    {
		olsc(mYx, mUx, &mBetaYxUx);
		mYx  -= mUx * mBetaYxUx;		  /* residual mYx */

		olsc(mDy, mUx, &mBetaDyUx);
        mDy -= mUx * mBetaDyUx;          /* residual mDy */
    }
	
	// eigen-problem
	decl mBeta; // will hold the cointegrating vectors
	decl vEval;
	decl mS01 = mDy'mYx / cT;
	decl mS00 = (mDy'mDy)/cT;
	if (eigensymgen(mS01' / mS00 * mS01, mYx'mYx/cT, &vEval, &mBeta) != 0)
		vEval[] = .NaN;
	else vEval = vEval .< 0 .|| vEval .> 1 .? .NaN .: vEval;

	if (cSizeArgs==0)
	{
		amBeta[0] = mBeta[][ : cK1 - 1]; /* remove vectors with zero eigvals */
    	avEval[0] = vEval[ : cK1 - 1];   /* also remove zero eigvals */
    	amAlpha[0] = mS01 * amBeta[0];
		return -cT * reversec(cumulate(log(1-reversec(avEval[0]'))))';
	}

	// optional part executed only if optional arguments are specified
	// aArgs[0] = cCoint
	// aArgs[1]	= amGamma
	// aArgs[2] = amV
	// aArgs[3]	= mErr
	if (aArgs[0]) amBeta[0] = mBeta[][ : aArgs[0]-1]; // returns only the requested coint. vectors
	else amBeta[0] = zeros(rows(amBeta), rows(amBeta));
	avEval[0] = vEval[ : cK1 - 1];		// returns all eigenvalues
   	amAlpha[0] = mS01 * amBeta[0];

	if (cK3) // estimate unrestricted coefficients amGamma (if any)
	{
//		(aArgs[1])[0] = mBetaDyUx' - amAlpha[0]*amBeta[0]'*mBetaYxUx'; // mBetaYxUx*amBeta[0]*amAlpha[0]';
		(aArgs[1])[0] = ( mBetaDyUx - mBetaYxUx*amBeta[0]*amAlpha[0]' )'; // mBetaYxUx*amBeta[0]*amAlpha[0]';
	}
	else (aArgs[1])[0] = <>; // no unrestricted coefficients

	(aArgs[3])[0] = mDy - mYx*amBeta[0]*amAlpha[0]';	// compute reg errors amErr
	(aArgs[2])[0] = (aArgs[3])[0]'(aArgs[3])[0]/cT; // compute error cov matrix amV

	decl temp, out;
	if (aArgs[0]) out = -0.5*cT*( cK1*(1 + M_LOG2PI) +
						logdet(mS00,&temp) + sumr(log(1-vEval[ : aArgs[0]-1])) );
	else out = -0.5*cT*( cK1*(1 + M_LOG2PI) +
						logdet(mS00,&temp) );
	if (temp) return out;
	else return 0;
}

series_builder(const mY, const mX, const mU, const cP,
			   const amDy, const amYx, const amUx)
// makes the mDy and mYx series for johansen, strating
// from mY (endogenous variables) and mX (restricted regressors)
// and mU (unrestricted regressors)
// cP is the number of lags of differenced variables in the ECM
// returns 1 if OK, 0 otherwise
{
	decl mDy = diff0(mY,1);
	decl mUx = <>;
	for (decl i=1; i<=cP; ++i)
	{
		mUx ~= lag0(mDy,i);
	}
	if (rows(mU) || cP)	amUx[0] = (mUx ~ mU)[cP+1:][];
	else amUx[0] = <>;
	amDy[0] = mDy[cP+1:][];
	amYx[0] = (lag0(mY,1)~mX)[cP+1:][];

	return 1;
}

weights(const mErr, const mV, const dDF, const avW)
{
	decl mInvV = invertsym(mV);
	decl cT = rows(mErr);
	avW[0] = zeros(cT,1);
	decl dNum = dDF + columns(mErr);
	for (decl t=0; t<cT; ++t)
	{
		avW[0][t] = sqrt( dNum ./ (dDF + mErr[t][] * mInvV * mErr[t][]') );
	}
}

tlogdens(const mX, const mV, const dDF)
{
	decl cT = rows(mX);
	decl cK = columns(mX);
	decl dLogDetV;
	decl mInvV = invertsym(mV, &dLogDetV);
	decl dLogDens=0;
	for (decl t=0; t<cT; ++t)
	{
		dLogDens += log(1 + mX[t][]*mInvV*mX[t][]'/dDF);
	}
	return cT*( loggamma((dDF+cK)/2)
			  - loggamma(dDF/2) - log(M_PI*dDF)*cK/2 - dLogDetV/2 )
			  - dLogDens*(dDF+cK)/2;
}

tecm(mDy, mYx, const mUx, const dDF, const cCoint, const amAlpha, const amBeta,
     const amGamma, const amV, const amErr, const avW)
// performs Student's t ML for ECM model with cCoint cointegration relations
// using the EM algorithm (cf. Lamge et al. (1989) JASA, Little (1988) JRSS C)
// mDy: (T x K1) matrix of first difference of original series
// mYx: (T x K2>=K1) matrix of 1-lagged levels of original series ~ restricted regressors
// mUx: (T x K3) matrix of unrestricted regressors (delays of mDy and unrestricted dummies)
// dDF: (1 x 1) degree of freedom of Student's T
// cCoint:  scalar rank of cointegration
// all the following pointers are only for output
// amAlpha:	adjustment coefficient matrix (address)
// amBeta:	cointegration matrix (address)
// amGamma:	unrestricted regressors coefficients matrix (address)
// amV: covariance matrix of errors (address)
// amErr: error series (address)
// avW: (T x 1) vector with weights for WLS
// returns: LogLik,
{

	// initial estimates from normal ECM
	decl vEval; // not really used, needed for johansen()
	decl vLogLiks=zeros(M_MAXITER, 1);
	decl dNormLogLik=johansen(mDy, mYx, mUx,
		 				  	  &vEval, amAlpha, amBeta, cCoint, amGamma, amV, amErr);
	vLogLiks[0] = tlogdens(amErr[0], amV[0], dDF);
	avW[0] = ones(rows(mDy),1);

	// EM steps
	decl i;
	for (i=1; i<M_MAXITER ; ++i)
	{
		weights(amErr[0], amV[0], dDF, avW);
		johansen(avW[0].*mDy, avW[0].*mYx, avW[0].*mUx,
				 &vEval, amAlpha, amBeta, cCoint, amGamma, amV, amErr);
		vLogLiks[i] = tlogdens(amErr[0], amV[0], dDF);
		if (fabs(vLogLiks[i]-vLogLiks[i-1]) < M_TOLERANCE) break;
	}

	// messages: comment out if used in simulation experiments
	if (i==M_MAXITER)
	{
		if (M_VERB) println("tecm() warning:\nmax number of iterations reached");
		i--;
	}
	else if (M_VERB) println("...convergence after ", i, " iterations");

	// corrections of estimates from johansen procedure
	amErr[0] ./= avW[0];	// computes errors w/r to original data
	if (dDF>2) amV[0] *= dDF/(dDF-2); // if cov matrix of Student's t exists, compute
	
	return vLogLiks[i];
}

lucas_plr(const mY, const cLags, const cDetType, dDF, ...)
// Lucas' Pseudo LR test with Student's t with dDF degree of freedom
// mY (T x K) endogenous series
// cLags (1 x 1) number of lags of differenced mY
// cDetType (1 x 1) type of deterministic part:
// NONE		: no deterministic
// RCONST	: restricted constant
// CONST	: unrestricted constant
// RTREND	: restricted trend
// TREND	: unrestricted linear trend (may lead to deterministic quadratic trends)
// dDF (1 x 1) degree of freedom of t
// optional arguments: [0] restricted regressors, [1] unrestricted regressors
// if only restricted regressors needed, pass <> as first optional arguments
{

	decl mDy, mYx, mUx;
	decl mAlpha, mBeta, mGamma, mV, mErr, vW;
	decl mX = <>;
	decl mU = <>;
	decl cT = rows(mY);
	decl cK	= columns(mY);

	// build deterministic variables
	if (cDetType == RCONST) mX = ones(cT, 1);
	else if (cDetType == CONST)  mU = ones(cT, 1);
	else if (cDetType == RTREND) mX = ones(cT, 1) ~ range(1, cT)';
	else if (cDetType == TREND)	 mU = ones(cT, 1) ~ range(1, cT)';

	// add restricted and unrestricted regressors
	if (sizeof(va_arglist()) >= 1)
	{
		mX ~= va_arglist()[0];
	}
	if (sizeof(va_arglist()) >= 2)
	{
		mU ~= va_arglist()[1];		
	}

	// build serie to pass to tecm
	series_builder(mY, mX, mU, cLags, &mDy, &mYx, &mUx);

	// compute logliks
	decl vLogLik = zeros(cK+1, 1);
	for (decl i=0; i <= cK; ++i)
	{
		if (M_VERB)	println("ECM-t estimation for rank = ", i);
		vLogLik[i] = tecm(mDy, mYx, mUx, dDF, i,
						  &mAlpha, &mBeta, &mGamma,
						  &mV, &mErr, &vW);
	}

	// compute PLR tests and prints output
	decl vPLR=2*(vLogLik[cK] - vLogLik[0 : cK-1]);
	if (M_VERB)
	{
		decl vTotPars = cK*cK*cLags;
		if (cDetType == RCONST) vTotPars += (cK+1)*range(cK,0,-1)';
		else if (cDetType == CONST)  vTotPars += cK*range(cK,0,-1)' + cK;
		else if (cDetType == RTREND) vTotPars += (cK+2)*range(cK,0,-1)';
		else if (cDetType == TREND)	 vTotPars += cK*range(cK,0,-1)' + 2*cK;
		
		decl vBIC = (-2*vLogLik + vTotPars*log(cT))/cT;
		println("\nLucas' Pseudo LR test based on Student's t with ", dDF," DF");
		println("Number of endogenous variables: ", cK);
		println("Number of observations: ", cT);
		println("Number of observations after adjustments: ", rows(mDy));
		println("Number of lagged differences: ", cLags);
		decl asDet={"none", "restricted constant", "unrestricted constant",
				"restricted trend", "unrestricted trend"};
		println("Deterministic: ", asDet[cDetType]);
		println("%cf", {"%5.1g", "\t\t%6g"},
			"%c", {"H0:rank<=", "PLR", "BIC"},
			range(0,cK-1)'~vPLR~vBIC[0:cK-1]);
		println("BIC for stationary model: ", vBIC[cK]);
	}
	return vPLR;
}

sim_ecm(const mEps, const mYstart, const cCoint, const cP, const mX, const mU,
		const mAlpha, const mBeta, const mGamma, const amY)
// simulate from ECM(cP) model with cointegration rank cCoint
// using mYstart as initial values, mEps as shocks, mX as restricted regressors,
// mUx as unrestricted regressors, parameters mAlpha, mBeta and mGamma
// the simulated time series is written in amY
// for formats see series_builder()
// Notice: mYstart must be of rows cP+1
{
	decl cT = rows(mEps);
	decl cK = columns(mEps);
	decl cKr = columns(mX);		// number of restricted regressors
	decl cKu = columns(mU);		// number of unrestricted regressors
	decl mPi = ( mBeta*mAlpha' )';	// cointegration matrix
	decl mA = mEps;
	if (mX) mA += mX[cP+1:][]*mPi[][cK:]';
	if (mU) mA += mU[cP+1:][]*mGamma[][cP*cK:]';
	// VAR coefficient matrices
	decl amC = new array[cP+1];
	amC[0] = (unit(cK) + mPi[][0:cK-1])'; // lag 1 parameter matrix
	if (cP) amC[0] += mGamma[][0:cK-1]';  // = = = = = =

	for (decl i=1; i<=cP; ++i)
	{
		if (i < cP) amC[i] = mGamma[][i*cK : (i+1)*cK-1]'
				           - mGamma[][(i-1)*cK : i*cK-1]';
		else amC[i] = - mGamma[][(i-1)*cK : i*cK-1]';
	}
	// check on dimensions of mYstart
	if (rows(mYstart)!=cP+1 || columns(mYstart)!=cK)
	{
		println("Error in sim_ecm():\nwrong number of initial values");
		return 0;
	}
	// trick to fool how cumulate() works on initial values
	decl mInit = mYstart;
	for (decl t=1; t<=cP; ++t)
	{
		mInit[t][] = mYstart[t][];
		for (decl j=0; j<t; ++j)
		{
			mInit[t][] -= mYstart[t-1-j][]*amC[j];
		}
	}

	// generation of simulated series
	amY[0] = cumulate(mInit|mA, amC);
	return 1;
}

// do not use the following, use boot_plr() below instead
bootstrap_plr(const cIter, const mY, const cLags, const cDetType, dDF, ...)
// bootstrap Lucas' Pseudo LR test with Student's t with dDF degree of freedom
// mY (T x K) endogenous series
// cLags (1 x 1) number of lags of differenced mY
// cDetType (1 x 1) type of deterministic part:
// NONE		: no deterministic
// RCONST	: restricted constant
// CONST	: unrestricted constant
// RTREND	: restricted trend
// TREND	: unrestricted linear trend (may lead to deterministic quadratic trends)
// dDF (1 x 1) degree of freedom of t
// optional arguments: [0] restricted regressors, [1] unrestricted regressors
// if only restricted regressors needed, pass <> as first optional arguments
{
	decl mDy, mYx, mUx;
	decl mAlpha, mBeta, mGamma, mV, mErr, vW;
	decl mAlpha0, mBeta0, mGamma0, mV0, mErr0, vW0;
	decl mX = <>;
	decl mU = <>;
	decl cT = rows(mY);
	decl cK	= columns(mY);

	// build deterministic variables
	if (cDetType == RCONST) mX = ones(cT, 1);
	else if (cDetType == CONST)  mU = ones(cT, 1);
	else if (cDetType == RTREND) mX = ones(cT, 1) ~ range(1, cT)';
	else if (cDetType == TREND)	 mU = ones(cT, 1) ~ range(1, cT)';

	// add restricted and unrestricted regressors
	if (sizeof(va_arglist()) >= 1)
	{
		mX ~= va_arglist()[0];
	}
	if (sizeof(va_arglist()) >= 2)
	{
		mU ~= va_arglist()[1];		
	}

	// build serie to pass to tecm
	series_builder(mY, mX, mU, cLags, &mDy, &mYx, &mUx);
	decl cT0 = rows(mDy);
	
	// compute logliks
	decl vLogLik = zeros(cK+1, 1);
	decl mPLRdist = zeros(cK, cIter);
	decl bVerb = M_VERB;
	for (decl i=0; i <= cK; ++i)
	{
		if (M_VERB) println("- Simulating case with ",i," cointegration vectors");
		M_VERB = 0;
		vLogLik[i] = tecm(mDy, mYx, mUx, dDF, i,
						  &mAlpha, &mBeta, &mGamma,
						  &mV, &mErr, &vW);
		decl mShocks=vW.*mErr;
		decl mSimY;
		if (i == cK) break;
		for (decl j=0; j < cIter; ++j)
		{
			decl mDySim, mYxSim, mUxSim;
			sim_ecm(mShocks[ranu(cT0,1)*cT0][], mY[:cLags][], i, cLags, mX, mU,
			mAlpha, mBeta, mGamma, &mSimY);
			series_builder(mSimY, mX, mU, cLags, &mDySim, &mYxSim, &mUxSim);
			mPLRdist[i][j] = 2*(
			tecm(mDySim, mYxSim, mUxSim, dDF, cK, &mAlpha0, &mBeta0, &mGamma0, &mV0, &mErr0, &vW0) -
			tecm(mDySim, mYxSim, mUxSim, dDF, i, &mAlpha0, &mBeta0, &mGamma0, &mV0, &mErr0, &vW0) );
		}
		M_VERB = bVerb;
	}
	M_VERB = bVerb;

	// computes PLR tests and prints output
	decl vPLR=2*(vLogLik[cK] - vLogLik[0 : cK-1]);
	decl vPval=sumr(mPLRdist .> vPLR)/cIter;
	println("%c",{"H0:rank<=","PLR","p-value"},
			range(0,cK-1)' ~ vPLR ~ vPval);	
}

boot_plr(const cIter, const mY, const cLags, const cDetType, dDF, ...)
// bootstrap Lucas' Pseudo LR test with Student's t with dDF degree of freedom
// with the method of Swensen (2006) Econometrica
// mY (T x K) endogenous series
// cLags (1 x 1) number of lags of differenced mY
// cDetType (1 x 1) type of deterministic part:
// NONE		: no deterministic
// RCONST	: restricted constant
// CONST	: unrestricted constant
// RTREND	: restricted trend
// TREND	: unrestricted linear trend (may lead to deterministic quadratic trends)
// dDF (1 x 1) degree of freedom of t
// optional arguments: [0] restricted regressors, [1] unrestricted regressors
// if only restricted regressors needed, pass <> as first optional arguments
{
	decl mDy, mYx, mUx;
	decl mAlpha, mBeta, mGamma, mUGamma, mV, mErr, vW;
	decl mAlpha0, mBeta0, mGamma0, mV0, mErr0, vW0;
	decl mX = <>;
	decl mU = <>;
	decl cT = rows(mY);
	decl cK	= columns(mY);

	// build deterministic variables
	if (cDetType == RCONST) mX = ones(cT, 1);
	else if (cDetType == CONST)  mU = ones(cT, 1);
	else if (cDetType == RTREND) mX = ones(cT, 1) ~ range(1, cT)';
	else if (cDetType == TREND)	 mU = ones(cT, 1) ~ range(1, cT)';

	// add restricted and unrestricted regressors
	if (sizeof(va_arglist()) >= 1)
	{
		mX ~= va_arglist()[0];
	}
	if (sizeof(va_arglist()) >= 2)
	{
		mU ~= va_arglist()[1];		
	}

	// build serie to pass to tecm
	series_builder(mY, mX, mU, cLags, &mDy, &mYx, &mUx);
	decl cT0 = rows(mDy);
	
	// compute logliks
	decl vLogLik = zeros(cK+1, 1);
	decl mPLRdist = zeros(cK, cIter);
	decl bVerb = M_VERB;
	tecm(mDy, mYx, mUx, dDF, cK, &mAlpha, &mBeta, &mUGamma, &mV, &mErr, &vW);
	decl mShocks=vW.*mErr;
	for (decl i=0; i <= cK; ++i)
	{
		if (M_VERB) println("- Simulating case with ",i," cointegration vectors");
		M_VERB = 0;
		vLogLik[i] = tecm(mDy, mYx, mUx, dDF, i,
						  &mAlpha, &mBeta, &mGamma,
						  &mV, &mErr, &vW);
		decl mSimY;
		if (i == cK) break;
		for (decl j=0; j < cIter; ++j)
		{
			decl mDySim, mYxSim, mUxSim;
			sim_ecm(mShocks[ranu(cT0,1)*cT0][], mY[:cLags][], i, cLags, mX, mU,
			mAlpha, mBeta, mUGamma, &mSimY);
			series_builder(mSimY, mX, mU, cLags, &mDySim, &mYxSim, &mUxSim);
			mPLRdist[i][j] = 2*(
			tecm(mDySim, mYxSim, mUxSim, dDF, cK, &mAlpha0, &mBeta0, &mGamma0, &mV0, &mErr0, &vW0) -
			tecm(mDySim, mYxSim, mUxSim, dDF, i, &mAlpha0, &mBeta0, &mGamma0, &mV0, &mErr0, &vW0) );
		}
		M_VERB = bVerb;
	}
	M_VERB = bVerb;

	// computes PLR tests and prints output
	decl vPLR=2*(vLogLik[cK] - vLogLik[0 : cK-1]);
	decl vPval=sumr(mPLRdist .> vPLR)/cIter;
	println("%c",{"H0:rank<=","PLR","p-value"},
			range(0,cK-1)' ~ vPLR ~ vPval);	
}