%% Bailey, N., Holly, S. and Pesaran, H. P. (2015). A Two Stage Approach to Spatio-Temporal Analysis with Strong and Weak Cross-Sectional Dependence. 
                                                    %Journal of Applied Econometrics, forthcoming.

%% housekeeping
clear all
clc

%% Settings
factor=4;       % number of global PCs: 2,3,4,5,6,7,8
reg_gl=0;       % 0 when using regional factors (CSA or PCA), 1 when using global factors (PCA) 
pc_cs=0;        % 2 when using regional principal components (2 per region), 0 when using regional cross sectional averages

%% Input Data
%{
% Reads from .xlsx files
% State level data (s_variable, identifier=1-51)
s_CPI = xlsread('BPS_hp_data.xlsx','State_CPI','b239:ay382'); % State level CPI; 1975Q1-2010Q4; (AK) missing
s_CPI_ID = xlsread('BPS_hp_data.xlsx','State_CPI','b2:ay2'); % State level CPI identifier

% MSA level data (msa_variable, identifier=1001-1366)
msa_HP = xlsread('BPS_hp_data.xlsx','MSA_house_prices','b3:nc146'); % MSA level house prices; 1975Q1-2010Q4
msa_HP_sID = xlsread('BPS_hp_data.xlsx','MSA_house_prices','b1:nc1'); % MSA level house prices state identifier
msa_HP_mID = xlsread('BPS_hp_data.xlsx','MSA_house_prices','b2:nc2'); % MSA level house prices msa identifier

msa_Dist = xlsread('BPS_hp_data.xlsx','MSA_distances','c3:nd368'); % MSA level distances
msa_Dist_sID = xlsread('BPS_hp_data.xlsx','MSA_distances','c1:nd1'); % MSA level distances state identifier
msa_Dist_mID = xlsread('BPS_hp_data.xlsx','MSA_distances','c2:nd2'); % MSA level distances msa identifier

% Seasonal dummies
s_d= xlsread('BPS_hp_data.xlsx','Seasonal dummies','b2:d145'); % 3 seasonal dummies; 1975Q1-2010Q4

% Principal Component Analysis
pc_m= xlsread('BPS_hp_data.xlsx','global factors','a2:h144'); % 8 global factors
pc_2reg= xlsread('BPS_hp_data.xlsx','regional factors','a2:p144'); % 2 regional factors per region (8 regions)
%}

% Reads from .csv files
% State level data (s_variable, identifier=1-51)
f_CPI=csvread('State_CPI.csv',1,1);
s_CPI =f_CPI(238:end,:);  % State level CPI; 1975Q1-2010Q4; (AK) missing
s_CPI_ID =f_CPI(1,:); % State level CPI identifier

% MSA level data (msa_variable, identifier=1001-1366)
f_msa_HP=csvread('MSA_house_prices.csv',0,1);
msa_HP =f_msa_HP(3:end,:); % MSA level house prices; 1975Q1-2010Q4
msa_HP_sID =f_msa_HP(1,:); % MSA level house prices state identifier
msa_HP_mID =f_msa_HP(2,:); % MSA level house prices msa identifier

f_msa_Dist=csvread('MSA_distances.csv',0,0);
msa_Dist =f_msa_Dist(3:end,3:end); % MSA level distances
msa_Dist_sID =f_msa_Dist(1,3:end); % MSA level distances state identifier
msa_Dist_mID =f_msa_Dist(2,3:end); % MSA level distances msa identifier

% Seasonal dummies
s_d=csvread('Seasonal_dummies.csv',1,1); % 3 seasonal dummies; 1975Q1-2010Q4

% Principal Component Analysis
pc_m=csvread('Global_factors.csv',1,0); % 8 global factors
pc_2reg=csvread('Regional_factors.csv',1,0); % 2 regional factors per region (8 regions)

%% Data prepatation (house prices and distance)
% Exclude MSAs located in Alaska and Hawaii
n=size(msa_HP,2);
hp_prel=vertcat(msa_HP_sID,msa_HP_mID,msa_HP);
dist_prel=vertcat(msa_Dist_sID,msa_Dist_mID,msa_Dist);
for i=1:n;
    if hp_prel(1,i)==46 || hp_prel(1,i)==48 || dist_prel(1,i)==46 || dist_prel(1,i)==48
       hp_prel(:,i)=0;
       dist_prel(:,i)=0;
    end
end
B=zeros(size(hp_prel,1),1);
B1=zeros(size(dist_prel,1),1);
hp_prel = hp_prel(:,~all(hp_prel == repmat(B,1,n),1));
dist_prel = dist_prel(:,~all(dist_prel == repmat(B1,1,n),1));
dist_f=horzcat(msa_Dist_sID',msa_Dist_mID',dist_prel(3:end,:))';
for i=1:n;
    if dist_f(1,i)==46 || dist_f(1,i)==48
       dist_f(:,i)=0;
    end
end
B2=zeros(size(dist_f,1),1);
dist_f = dist_f(:,~all(dist_f == repmat(B2,1,n),1))';
fill_in=zeros(2,2);
dist_pp=[fill_in dist_f(:,1:2)'];
dist_ff=vertcat(dist_pp,dist_f);

dist_ff1=sortrows(dist_ff,1);
dist_ff2=sortrows(dist_ff1',1);
dist_fff= dist_ff2(3:end,3:end);

cpi_s=vertcat(s_CPI_ID,s_CPI);
s=size(cpi_s,2);
for i=1:s;
    if cpi_s(1,i)==48 % 46 (Hawaii) is already excluded
       cpi_s(:,i)=0;
    end
end
B4=zeros(size(cpi_s,1),1);
cpi_s = cpi_s(:,~all(cpi_s == repmat(B4,1,s),1));
cpi_s=cpi_s';
p_s=sortrows(cpi_s,1);
p_s=p_s';
p_sf=p_s(2:end,:);

% Real house prices
hhp_prel=vertcat(dist_f(:,1)',hp_prel(2:end,:));
hp_prel1=hhp_prel';
hp_prel2=sortrows(hp_prel1,1);
hp_prel3=hp_prel2';
hp=hp_prel3(3:end,:);
rhp=zeros(size(hp,1),size(hp,2));
for i=1:size(hp,2)
    r_s=find(ismember(p_s(1,:)',hp_prel3(1,i)),1);    
    rhp(:,i)=log(hp(:,i)./p_sf(:,r_s));
end

% Real house price changes
d_rhp=zeros(size(hp,1),size(hp,2));
for i=1:size(hp,1)
    if i==1
    d_rhp(i,:)=0;
    else d_rhp(i,:)=rhp(i,:)-rhp(i-1,:);
    end
end
d_rhp=d_rhp(2:end,:);

% Real house price changes (seasonally adjusted)
N=size(d_rhp,2);
T=size(d_rhp,1);
x=zeros(T,N);
for i=1:N
    Y=d_rhp(:,i);
    X=[ones(T,1) s_d(2:end,:)];
    Coef=(X'*X)\X'*Y;
    x(:,i)=Y-X*Coef;
end

%% MSA housing price regressions
  % Regional CS vs regional PC vs global PC
  [res,res_dm,nr]=CS_PC_est(x,hp_prel3,pc_m,pc_2reg,N,T,0,0,4); % CSA regional method (pc_cs=0; reg_gl=0; factor=4 - not relevant)
  [res_pc,res_pc_dm,nr_pc]=CS_PC_est(x,hp_prel3,pc_m,pc_2reg,N,T,2,0,4); % PC regional method (pc_cs=2; reg_gl=0; factor=4 - not relevant)
  [res_gpc,res_gpc_dm,nr_gpc]=CS_PC_est(x,hp_prel3,pc_m,pc_2reg,N,T,0,1,factor); % PC regional method (pc_cs=0 - not relevant; reg_gl=1; factor - range 2-8)
        
 %% Estimated W matrix using Multiple Testing
 a_size2=0.05;
 [R_u_thr,R_u_p,R_u_n] = MT(res_dm,a_size2,nr); % Using regional CS approach
 [Rpc_u_thr,Rpc_u_p,Rpc_u_n] = MT(res_pc_dm,a_size2,nr_pc); % Using regional PC approach

 % Cross-sectional averages regional approach
 W_hat=R_u_thr; 
 W1_hat=W_hat-eye(size(res,1)); % W_hat
 W_hatp=R_u_p; 
 W1_hatp=R_u_p-eye(size(res,1)); % W_hat+
 W_hatn=R_u_n+eye(size(res,1));
 W1_hatn=R_u_n; % W_hat-
 
 % Principal components regional approach
 Wpc_hat=Rpc_u_thr; 
 W1pc_hat=Wpc_hat-eye(size(res_pc,1)); % W_hat
 Wpc_hatp=Rpc_u_p; 
 W1pc_hatp=Rpc_u_p-eye(size(res_pc,1)); % W_hat+
 Wpc_hatn=Rpc_u_n+eye(size(res_pc,1));
 W1pc_hatn=Rpc_u_n; % W_hat-

%% W matrix based on geographical distance
% W_distance (1s on diagonal)
n1=size(dist_fff,2);
[W50] = W_dist(50,dist_fff);
[W100] = W_dist(100,dist_fff);
[W200] = W_dist(200,dist_fff);
% W_distance (0s on diagonal)
W50_1=W50-eye(n1); 
W100_1=W100-eye(n1); 
W200_1=W200-eye(n1); 

%% Comparison of W matrices
% Comparison of Distance vs Estimated W using CS regional approach
[Cont_50p, Chisq_50p]=W_comp(W50,W_hatp,N); 
[Cont_50n, Chisq_50n]=W_comp(W50,W_hatn,N);
[Cont_100p, Chisq_100p]=W_comp(W100,W_hatp,N);
[Cont_100n, Chisq_100n]=W_comp(W100,W_hatn,N);
[Cont_200p, Chisq_200p]=W_comp(W200,W_hatp,N);
[Cont_200n, Chisq_200n]=W_comp(W200,W_hatn,N);

% Comparison of Distance vs Estimated W using PC regional approach
[Contpc_50p, Chisqpc_50p]=W_comp(W50,Wpc_hatp,N); 
[Contpc_50n, Chisqpc_50n]=W_comp(W50,Wpc_hatn,N);
[Contpc_100p, Chisqpc_100p]=W_comp(W100,Wpc_hatp,N);
[Contpc_100n, Chisqpc_100n]=W_comp(W100,Wpc_hatn,N);
[Contpc_200p, Chisqpc_200p]=W_comp(W200,Wpc_hatp,N);
[Contpc_200n, Chisqpc_200n]=W_comp(W200,Wpc_hatn,N);

% Comparison of Estimated W using CS vs PC regional approaches
[Cont_p, Chisq_p]=W_comp(W_hatp,Wpc_hatp,N);
[Cont_n, Chisq_n]=W_comp(W_hatn,Wpc_hatn,N);

%% Output of Tables 2, 3, 4 and 5 in Section 4.2.4 of BHP paper
disp('Table 2: Contigency table for W_cs+ and W_cs- vs W_100');
disp(' ');
disp(['W100 vs W_cs+  ','W100 vs W_cs-']);
disp(' ');
disp(num2str([Cont_100p Cont_100n]));
disp(' ');

disp('Table 3: Pearson Chi_sq statistics');
disp(' ');
disp(['W50         ','W100         ','W200         ']);
disp(' ');
disp(num2str([Chisq_50p Chisq_100p Chisq_200p]));
disp(num2str([Chisq_50n Chisq_100n Chisq_200n]));
disp(' ');

disp('Table 4: Pearson Chi_sq statistics');
disp(' ');
disp(['W50         ','W100         ','W200         ']);
disp(' ');
disp(num2str([Chisqpc_50p Chisqpc_100p Chisqpc_200p]));
disp(num2str([Chisqpc_50n Chisqpc_100n Chisqpc_200n]));
disp(' ');

disp('Table 5: Contigency table for W_cs+ and W_cs- vs W_pc+ and W_pc-');
disp(' ');
disp(['W_cs+ vs W_ps+  ','W_cs- vs W_ps-']);
disp(' ');
disp(num2str([Cont_p Cont_n]));
disp(' ');



