function [yf,P,IC,aR2,AIC,BIC]=cond_ARDL_DMS(yh,y1,x,h,xcond,lag,Pmax)
% =========================================================================
% DESCRIPTION
% This function estimates a horizon-specific autoregressive distributed lag
% (ARDL) model via OLS and uses it to produce a conditional point forecast.
%
% -------------------------------------------------------------------------
% INPUTS
%
%       yh    = dependent variable (a 1 x T row vector where T is the   
%               number of time periods)
%
%       y1    = version of dependent variable whose lags are to be used as 
%               predictors in the model (a 1 x T row vector)
%
%       x     = set of independent variables whose leads and lags will be 
%               used as predictors in the model (an N x T matrix where N is
%               the number of variables)
%
%       h     = forecast horizon (integer greater than 0)
%
%       xcond = conditional values of the series in x used to construct the
%               forecasts (an N x h array); nonmissing values indicate
%               which leads of which series in x are to be included in the
%               model and thus which values of which series are to be
%               conditioned on when constructing the forecasts (e.g.
%               suppose N=3 and h=4 and that xcond takes the following form
%                               [  NaN    NaN    NaN    NaN]
%                       xcond = [  NaN    NaN    NaN    0.5]
%                               [ -0.1    0.2    0.4    0.1]
%               then the model will include, as predictors, the second
%               variable in x, four periods ahead, and the third variable
%               in x, one through four periods ahead; the forecast will
%               then be constructed conditional on the second variable in x
%               equaling 0.5 four periods from the forecast origin and the
%               third variable in x equaling -0.1, 0.2, 0.4, and 0.1 in
%               one, two, three, and four periods from the forecast origin,
%               respectively)
%
%       lag   = variable determining number of lags (P) of x and y1 will be 
%               determined; options:
%                   1) lag is an integer => P=lag
%                   2) lag is the string 'AIC' => P determined by the 
%                      AIC
%                   3) lag is the string 'BIC' => P determined by the 
%                      BIC
%               note that P is bounded by 0 and Pmax, the next input
%               described below
%
%       Pmax  = maximum number of lags; regardless of the number of lags
%               included in the model, this number of observations will be
%               removed from the beginning of each series prior to
%               estimation
%
% OUTPUTS
%
%       yf    = forecasted value of yh, h steps ahead
%
%       P     = number of lags of x and y1 included in the model
%
%       IC    = values of information criterion for all lags considered
%               when using AIC/BIC
%
%       AIC   = value of the AIC for the chosen model
%
%       BIC   = value fo the BIC for the chosen model
%
%
% -------------------------------------------------------------------------
% SUBFUNCTION
%
%   getIC() - returns optimal number of lags (between 0 and Pmax) of y1 and
%             x to be included in the model based on the given information
%             criterion (either AIC or BIC)
%
% -------------------------------------------------------------------------
% MODEL DESCRIPTION
%
% Linear model regressing yh(t+h) on a set of predictors and a constant.
% The set of predictors can be separated into three categories:
%   1) lags of y1; i.e. y1(t),y1(t-1),...,y1(t-P+1)
%   2) lags of the N series in x; i.e. x1(t), x1(t-1), ..., x1(t-P+1),
%      x2(t), ..., xN(t), ..., xN(t-P+1)
%   3) leads of the N series in x; these are determined by the input
%      variable xcond; e.g. if xcond has all missing values except for
%      xcond(1,1), xcond(N,1), and xcond(N,h) then the only leads to be
%      included in the model are x1(t+1), xN(t+1), and xN(t+h)
%
% -------------------------------------------------------------------------
% NOTES
% Author: Joe McGillicuddy
% Date: 5/9/2017
% Version: 2014a
% Required Toolboxes: None
%
% yh,y1,x are assumed to be contemporaneous.
%
% Number of observations removed = Pmax+h-1
%
% =========================================================================
% FUNCTION

% -------------------------------------------------------------------------
% CHECKS

% Size of x (N = number of series, T =  number of time periods)
[N,T]=size(x);

% Check that time is the second dimension
assert(N<T);

% Check that yh and y1 contain only one series
assert(size(yh,1)==1);
assert(size(y1,1)==1);

% Check that the number of observations per series matches up across yh,
% y1, and x
assert(size(yh,2)==T);
assert(size(y1,2)==T);

% Check that xcond contains h observations of the N series in x
assert(size(xcond,1)==N);
assert(size(xcond,2)==h);

% -------------------------------------------------------------------------
% DETERMINE NUMBER OF LAGS

% Assert that input lag takes on a valid value; use that value to determine
% the method for lag selection:
%   1) lag_selection is an integer between 0 and Pmax => P=lag
%   2) lag_selection is the string AIC => P is determined by AIC
%   3) lag_selection is the string BIC => P is determined by BIC
if isnumeric(lag)
    if lag>=0 && lag<=Pmax && floor(lag)==lag
        P=lag;
        IC=NaN(Pmax+1,1);
    elseif lag<0 && floor(lag)==lag
        error('Number of lags selected is less than zero');
    elseif floor(lag)==lag
        error(['Number of lags selected exeeds allowed limit of ',...
            num2str(Pmax)]);
    else
        error('Input lag_selection must be an integer')
    end 
elseif strcmp(lag,'AIC') || strcmp(lag,'BIC')
    [P,IC]=getIC(yh,y1,x,h,xcond,Pmax,lag);
else
    error('Input lag_selection is misspecified');
end

% -------------------------------------------------------------------------
% SETUP

% Remove Pmax+h-1 observations from yh
y=yh((Pmax+h):end);

% -------------------------------------------------------------------------
% CONSTRUCT LAGS
% ylags = lags of y1 used to estimate the model
% ylags_f = lags of y1 in period T used to forecast yh in period T+h
% xlags = lags of x used to estimate the model
% xlags_f = lags of x in period T used to forecast yh in period T+h

if P>0
    % Preallocate memory
    ylags=NaN(P,T-Pmax-h+1); 
    ylags_f=NaN(P,1);
    xlags=NaN(P*N,T-Pmax-h+1); 
    xlags_f=NaN(P*N,1); 

    % Create lags
    for l=1:P
        ylags(l,:)=y1((Pmax+1-l):(end+1-h-l));
        ylags_f(l)=y1((end+1-l));
        xlags((1+N*(l-1)):(N*l),:)=x(:,(Pmax+1-l):(end+1-h-l));
        xlags_f((1+N*(l-1)):(N*l))=x(:,end+1-l);
    end
else
    ylags=[];
    ylags_f=[];
    xlags=[];
    xlags_f=[];
end

% -------------------------------------------------------------------------
% CONSTRUCT LEADS
% xleads = leads of x series used to estimate model
% xleads_f = leads of x series in period T used to forecast yh in period
%            T+h

% Preallocate memory
xleads=NaN(sum(sum(~isnan(xcond))),T-Pmax-h+1);
xleads_f=NaN(sum(sum(~isnan(xcond))),1);

% Row index for xleads and xleads_f; initialize to 1
i=1;

% Loop through series in x
for n=1:N
    
    % Leads of xn (the nth series of x) to include in the model
    Qn=find(~isnan(xcond(n,:)));
    
    % If number of leads of xn is at least 1...
    if ~isempty(Qn)
        % Loop through number of leads of xn
        for k=1:length(Qn)
            
            % Fill in row i of xleads with lead Qn(k) of xn
            % Fill in row i of xleads_f with lead Qn(k) of xn at time T
            xleads(i,:)=x(n,(Pmax+Qn(k)):(end-h+Qn(k)));
            xleads_f(i)=xcond(n,Qn(k));
            
            % Increase row index number by 1
            i=i+1;
            
        end
    end
end

% -------------------------------------------------------------------------
% ESTIMATE MODEL

% Matrix of predictors; includes constant, lags of y1, lags of x, and
% certain leads of x
X=[ones(1,size(y,2));ylags;xleads;xlags];

% Solve equation y=B*X
B=y*X'/(X*X');

% -------------------------------------------------------------------------
% CONSTRUCT FORECAST

% Values of predictors at time T
X_f=[1;ylags_f;xleads_f;xlags_f];

% Forecasted value 
yf=B*X_f;

% Unconditional forecast
% X2=[ones(1,size(y,2));ylags;xlags];
% B2=y*X2'/(X2*X2');
% X2_f=[1;ylags_f;xlags_f];
% yf_u=B2*X2_f;

% -------------------------------------------------------------------------
% ADJUSTED R-SQUARED

% Residuals
ehat=y-B*X;

% Sum of squared residuals
SSR=sum(ehat.^2);

% Total sum of squares
SST=sum((y-mean(y)).^2);

% Number of observations
n=size(X,2);

% Number of model parameters
k=size(X,1);

% Adjusted R-squared
aR2=1-(SSR/(n-k))/(SST/(n-1));
aR2=1-SSR/SST;
if P>0 % Otherwise might get negative number close to 0 if SSR/SST==1 due to precision error
    assert(aR2>=0);
end

% -------------------------------------------------------------------------
% AIC AND BIC

T2=size(y,2);
assert(k==size(B,1)*size(B,2));

AIC=T2*log(SSR/T2)+2*k;
BIC=T2*log(SSR/T2)+k*log(T2);

end


% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SUBFUNCTION

function [Pbest,IC]=getIC(yh,y1,x,h,xcond,Pmax,method)
% =========================================================================
% DESCRIPTION
% This function returns the optimal number of lags (between 0 and Pmax) for
% the model described above based on either the AIC or BIC as specified by
% the user.
%
% -------------------------------------------------------------------------
% INPUTS
%
%       yh     = dependent variable (a 1 x T row vector where T is the   
%                number of time periods)
%
%       y1     = version of dependent variable whose lags are to be used as 
%                predictors in the model (a 1 x T row vector)
%
%       x      = set of independent variables whose leads and lags will be 
%                used as predictors in the model (an N x T matrix where N is
%                the number of variables)
%
%       h      = forecast horizon (integer greater than 1)
%
%       xcond  = conditional values of the series in x used to construct the
%                forecasts (an N x h array); nonmissing values indicate
%                which leads of which series in x are to be included in the
%                model
%
%       Pmax   = maximum number of lags of y1 and x to include in the model
%
%       method = tring determining which information criterion used to
%                select the number of lags; options:
%                    1) 'AIC' => Akaike information criterion 
%                    2) 'BIC' => Bayesian information criterion 
%
% OUTPUTS
%
%       P      = number of lags of y1 and x as selected by the information
%                criterion
%
%       IC     = array containing values of the information criterion for
%                for each possible number of lags considered (0 to Pmax)         
%
% =========================================================================
% FUNCTION

% -------------------------------------------------------------------------
% SETUP

% Remove Pmax+h-1 observations from yh
y=yh(:,(Pmax+h):end);

% N = number of series in x, T = number of observations of the final series
% used for estimation
N=size(x,1);
T=length(y);

% -------------------------------------------------------------------------
% CREATE LAGS AND LEADS

% Create Pmax lags of x and y1
ylags_all=NaN(Pmax,T);
xlags_all=NaN(Pmax*N,T);
for l=1:Pmax
    ylags_all(l,:)=y1((Pmax+1-l):(end+1-h-l));
    xlags_all((1+N*(l-1)):(N*l),:)=x(:,(Pmax+1-l):(end+1-h-l));
end

% Create leads of x
xleads=NaN(sum(sum(~isnan(xcond))),T);
i=1;
for n=1:N
    Qn=find(~isnan(xcond(n,:)));
    if ~isempty(Qn)
        for k=1:length(Qn)
            xleads(i,:)=x(n,(Pmax+Qn(k)):(end-h+Qn(k)));
            i=i+1;
        end
    end
end

% -------------------------------------------------------------------------
% CALCULATE VALUES OF INFORMATION CRITERION

% Preallocate memory for values of the information criterion
IC=NaN(Pmax+1,1);

% Loop through possible number of lags
for P=0:Pmax
    
    if P==0
        ylags=[];
        xlags=[];
    else
        % Include P lags of y1 and x
        ylags=ylags_all(1:P,:);
        xlags=xlags_all(1:(N*P),:);
    end
    
    % Create matrix of predictors 
    X=[ones(1,T);ylags;xleads;xlags];
    
    % Solve eqation y=B*X
    B=y*X'/(X*X');
    
    % Sum of squared residuals
    SSR=sum((y-B*X).^2);
    
    % Number of parameters estimated
    k=size(B,1)*size(B,2);
    
    % Calculate value of AIC or BIC depending on selected method
    switch method
        case 'AIC'
            IC(P+1)=T*log(SSR/T)+2*k;
        case 'BIC'
            IC(P+1)=T*log(SSR/T)+k*log(T);
    end  
end

% -------------------------------------------------------------------------
% OPTIMAL NUMBER OF LAGS
% Find the number of lags that minimizes the information criterion
Pbest=find(IC==min(IC),1,'first')-1;

end

