function [theta, Ft, Ga, Uhat, stat] = EstPanelPara(Y, X, T, J)
% This function implements the Estimation Procedure I of CGPX (2020) 
% 
% Input:
% Y                   (y_{11}, ..., y_{1T}, ..., y_{N1}, ..., y_{NT})'
% X                   (x_{11}, ..., x_{1T}, ..., x_{N1}, ..., x_{NT})'
% T                   The number of time periods
% J                   A user pre-specified number for intitial guess on the
%                     total number of factors
% 
% Output:
% theta               The esitmate of theta_o
% Ft                  The esitmate of F_o    T-by-J
% Ga                  The esitmate of G_o    N-by-J
% Uhat                The estimate of the error terms NT-by-1
  
[NT, d] = size(X);
NT1     = size(Y, 1);
  
if NT ~= NT1
    error('The dimensions of X and Y do not match')
end
      
if rem(NT, T) ~= 0
    error('The number of individuals is not correct')
end
  
N = NT/T;  % The number of individuals

% Randomly generate the factor matrix as the initial estimate to start the
% iteration
Ft = randn(T, J);

% Start the iteration 
MF = eye(T) - Ft/(Ft'*Ft)*(Ft');
d_matrix = zeros(d, d);
n_matrix = zeros(d, 1);
for i = 1:N
    ind = (i-1)*T + (1:T);
    d_matrix = d_matrix + X(ind, :)' * MF * X(ind, :);
    n_matrix = n_matrix + X(ind, :)' * MF * Y(ind, :);
end
theta1 = d_matrix \ n_matrix;
theta2 = zeros(d, 1);

% Set a criterion for the iteration to stop, i.e. stopping the iteration 
% when ||theta1 - theta2|| <= crit. Let no_iter record the number of 
% iteration. Break the iteration when no_iter > 100
crit    = 10^(-4);
no_iter = 0;
  
while sqrt((theta1 - theta2)'*(theta1 - theta2)) > crit
    no_iter = no_iter + 1;
    if no_iter > 100 
        break 
    end
    
    % Update the estimate of F_o given theta1
    [ve, ~] = EstFPara(Y, X, theta1, T);
    Ft      = ve(:, 1:J);
    
    % Update the estimate of theta_o with new Ft, and store it in theta2
    MF = eye(T) - (Ft*(Ft'))/T;
    d_matrix = zeros(d, d);
    n_matrix = zeros(d, 1);
    for i = 1:N
        ind = (i-1)*T + (1:T);
        d_matrix = d_matrix + X(ind, :)' * MF * X(ind, :);
        n_matrix = n_matrix + X(ind, :)' * MF * Y(ind, :);
    end
    theta2 = d_matrix \ n_matrix;

    % Update the estimate of F_o given theta2
    [ve, ~] = EstFPara(Y, X, theta2, T);
    Ft      = ve(:, 1:J);
    
    % Update the estimate of theta_o with new Ft, and store it in theta1
    MF = eye(T) - (Ft*(Ft'))/T;
    d_matrix = zeros(d, d);
    n_matrix = zeros(d, 1);
    for i = 1:N
        ind = (i-1)*T + (1:T);
        d_matrix = d_matrix + X(ind, :)' * MF * X(ind, :);
        n_matrix = n_matrix + X(ind, :)' * MF * Y(ind, :);
    end
    theta1 = d_matrix \ n_matrix;
end

% Output for theta_o
theta = theta1;

% Output for the esitmate of F_o
[ve, ~] = EstFPara(Y, X, theta, T);
Ft      = ve(:, 1:J);

% Output for the esitmate of G_o
Ga = zeros(N, J);
for i = 1:N
    ind = (i-1)*T + (1:T);
    Ga(i, :) = (Y(ind) - X(ind, :) * theta)'*Ft;
end
Ga = Ga/T;

Uhat = Y - X*theta;
for i = 1:N
    ind = (i-1)*T + (1:T);
    Uhat(ind, 1) = Uhat(ind, 1) - Ft * Ga(i, :)';
end

hstar = 2.34 * sqrt( mean(Uhat.^2) ) / ( (T*T)^(1/5) );
%disp(hstar)

% Start the constancy test
Kh = zeros(T,T);
for t = 2:T
    for s = 1:(t-1)
        Kh(t,s) = Kern( (t-s)/(T*hstar) );
        Kh(s,t) = Kh(t,s);
    end
end
UKU = ( Uhat * (Uhat') ) .* kron(ones(N, N), Kh);

UKU2 = reshape(reshape(Uhat, [T, N])', [N*T, 1]);
UKU2 = UKU2 * (UKU2');
UKU2 = UKU2 .* (kron( Kh, ones(N, N) - eye(N) ));

LNT = sum( UKU, 'all' );
LNT = LNT - sum( UKU(1:T, 1:T), 'all' );
BNT = 0;
for i = 1:N
    ind_i = (i-1)*T + (1:T);
    LNT = LNT - sum( UKU(ind_i, ind_i), 'all' );
    
    for j = 1:(i-1)
        ind_j = (j-1)*T + (1:T);
        BNT = BNT + sum( UKU2(ind_i, ind_j), 'all' ).^2;
    end
end
BNT = BNT*2;

% The following code of calculating LNT and BNT is easy, but extremely slow
% for t = 2:T
%     for s = 1:(t-1)
%         Kh(t,s) = Kern( (t-s)/(T*hstar) );
%         
%         for i1 = 2:N
%             for j1 = 1:(i1-1)
%                 
%                 ind_i1 = (i1-1)*T + t;
%                 ind_j1 = (j1-1)*T + s;
%                 LNT = LNT + Uhat(ind_i1)*Uhat(ind_j1)*Kh(t,s);
%                 
%                 for i2 = 2:N
%                     for j2 = 1:(i2-1)
%                         
%                         ind_i2 = (i2-1)*T + t;
%                         ind_j2 = (j2-1)*T + s;
%                         BNT = BNT + Uhat(ind_i1)*Uhat(ind_j1)...
%                             *Uhat(ind_i2)*Uhat(ind_j2)*(Kh(t,s)^2);
%                         
%                     end
%                 end
%                 
%             end
%         end
%         
%     end
% end
% LNT = LNT*4;
% BNT = BNT*8;

tempL = (N^2)*(T^2);
tempB = (N^4)*(T^2);
stat = sqrt(T*T)* (LNT/tempL) / sqrt(2*BNT / tempB);

end