% Determining the lag length and # of factors by checking the cross-validation
% Last checking: 05/23/2019

function [prediction_optimal, r_f_optimal, k_f_optimal]=cross_validation(XDATA,ZDATA,YDATA,yDATA,h,r_0,kmax,method)
RK_f=[kron([0:r_0]',ones(length([0:kmax]'),1)), kron(ones(length([0:r_0]'),1),[0:kmax]')];% specifying a range of # of factors and lags
if method==0 % I(1) level approach
[Tsample,Nsample]=size(XDATA);% sample size for estimating common factors
Tpresample=floor(Tsample*0.5);% specifying the pre-sample (1/2 of time series observations are employed to check the cross-validation)
prediction_bin=zeros(Tsample-Tpresample-h+1,length(RK_f));% bins for prediction
for tt=1:Tsample-Tpresample-h+1
 for ll=1:length(RK_f)
   prediction_bin(tt,ll)=prediction(XDATA(1:Tpresample+tt-1,:),ZDATA(1:Tpresample+tt-1,:),YDATA(1:Tpresample+h+tt-1,:),YDATA(Tpresample+tt-1),RK_f(ll,1),RK_f(ll,2),h,0);
 end
end
actual=YDATA(Tpresample+h:Tsample)*ones(1,length(RK_f));% actual Y
RMSE_L=sqrt(mean((prediction_bin-actual).^2))';% root mean squared errors
optimal_set=find(RMSE_L==min(RMSE_L));
r_f_optimal=RK_f(optimal_set,1);% optimal number of factors for the forecasting equation
k_f_optimal=RK_f(optimal_set,2);% optimal number of lags for the forecasting equation
prediction_optimal=prediction(XDATA,ZDATA,YDATA,YDATA(Tsample),r_f_optimal,k_f_optimal,h,0);% optimal prediction by checking the cross-validation

elseif method==1 % I(0) differenced approach
    DXDATA=XDATA;zDATA=ZDATA;
[Tsample_1,Nsample]=size(DXDATA);  
Tpresample_1=floor(Tsample_1*0.5);
prediction_binD=zeros(Tsample_1-Tpresample_1-h+1,length(RK_f));% bins for prediction
for tt=1:Tsample_1-Tpresample_1-h+1
 for ll=1:length(RK_f)
   prediction_binD(tt,ll)=prediction(DXDATA(1:Tpresample_1+tt-1,:),zDATA(1:Tpresample_1+tt-1,:),yDATA(1:Tpresample_1+h+tt-1,:),YDATA((Tpresample_1+1)+tt-1),RK_f(ll,1),RK_f(ll,2),h,1);
 end
end
actual=YDATA((Tpresample_1+1)+h:Tsample_1+1)*ones(1,length(RK_f));% actual Y
RMSE_D=sqrt(mean((prediction_binD-actual).^2))';% root mean squared errors
optimal_set=find(RMSE_D==min(RMSE_D));
r_f_optimal=RK_f(optimal_set,1);% optimal number of factors for the forecasting equation
k_f_optimal=RK_f(optimal_set,2);% optimal number of lags for the forecasting equation
prediction_optimal=prediction(DXDATA,zDATA,yDATA,YDATA(Tsample_1+1),r_f_optimal,k_f_optimal,h,1);% optimal prediction by checking the cross-validation
end

end