new;
_dxmiss=0/0;

/*Bailey, N., Holly, S. and Pesaran, H. P. (2015). A Two Stage Approach to Spatio-Temporal Analysis with Strong and Weak Cross-Sectional Dependence. 
                                                   Journal of Applied Econometrics, forthcoming.*/

/*********** Input Data *******************/
load x_or1[363,143]="xdata.txt"; /* HP data */
load x_cs1[363,143]="rescsregr.txt"; /* HP residual data from cs regression */
load x_rpc1[363,143]="res2pcreg.txt"; /* HP residual data from regional pc regression (2pcs per region) */

/* HP residuals from regression with 2,3,...,8 PCs */
load x_g2pc1[363,143]="res2pc.txt";
load x_g3pc1[363,143]="res3pc.txt";
load x_g4pc1[363,143]="res4pc.txt";
load x_g5pc1[363,143]="res5pc.txt";
load x_g6pc1[363,143]="res6pc.txt";
load x_g7pc1[363,143]="res7pc.txt";
load x_g8pc1[363,143]="res8pc.txt";

/* Regional differenced HP data (sa) */
load x_nengl1[15,143]="nengl.txt";
load x_mideast1[36,143]="meast.txt";
load x_seast1[114,143]="seast.txt";
load x_glake1[61,143]="glake.txt";
load x_plain1[32,143]="plain.txt";
load x_swest1[38,143]="swest.txt";
load x_rmount1[22,143]="rmount.txt";
load x_fwest1[45,143]="fwest.txt";

/********** Data Preparation ************/
a_size=0.05;

/* Choice of dataset */
x_or=x_or1'; 
x_cs=x_cs1'; 
x_rpc=x_rpc1'; 

x_g2pc=x_g2pc1'; 
x_g3pc=x_g3pc1'; 
x_g4pc=x_g4pc1'; 
x_g5pc=x_g5pc1'; 
x_g6pc=x_g6pc1'; 
x_g7pc=x_g7pc1'; 
x_g8pc=x_g8pc1'; 

x_nengl=x_nengl1';
x_mideast=x_mideast1';
x_seast=x_seast1';
x_glake=x_glake1';
x_plain=x_plain1';
x_swest=x_swest1';
x_rmount=x_rmount1';
x_fwest=x_fwest1';

/* Standardise data */
{xor_st}=standx(x_or);
{xcs_st}=standx(x_cs);
{xrpc_st}=standx(x_rpc);
{x2pc_st}=standx(x_g2pc);
{x3pc_st}=standx(x_g3pc);
{x4pc_st}=standx(x_g4pc);
{x5pc_st}=standx(x_g5pc);
{x6pc_st}=standx(x_g6pc);
{x7pc_st}=standx(x_g7pc);
{x8pc_st}=standx(x_g8pc);

/*********** CD test *************/
{av_c_or,cd_or}=CD(x_or);
{av_c_cs,cd_cs}=CD(x_cs);
{av_c_rpc,cd_rpc}=CD(x_rpc);
{av_c_2pc,cd_2pc}=CD(x_g2pc);
{av_c_3pc,cd_3pc}=CD(x_g3pc);
{av_c_4pc,cd_4pc}=CD(x_g4pc);
{av_c_5pc,cd_5pc}=CD(x_g5pc);
{av_c_6pc,cd_6pc}=CD(x_g6pc);
{av_c_7pc,cd_7pc}=CD(x_g7pc);
{av_c_8pc,cd_8pc}=CD(x_g8pc);

/********** Exponent of cross-sectional dependence ************/
{a_or,se_or}=atildeall(xor_st,a_size);
{a_cs,se_cs}=atildeall(xcs_st,a_size);
{a_rpc,se_rpc}=atildeall(xrpc_st,a_size);
{a_2pc,se_2pc}=atildeall(x2pc_st,a_size);
{a_3pc,se_3pc}=atildeall(x3pc_st,a_size);
{a_4pc,se_4pc}=atildeall(x4pc_st,a_size);
{a_5pc,se_5pc}=atildeall(x5pc_st,a_size);
{a_6pc,se_6pc}=atildeall(x6pc_st,a_size);
{a_7pc,se_7pc}=atildeall(x7pc_st,a_size);
{a_8pc,se_8pc}=atildeall(x8pc_st,a_size);

/********** Bai and Ng test for number of PCs and PCA **********/
opt=2; /* All 6 Information criteria are reported */
/* Choice of maximum number of factors (4 or 8 in paper) */
maxk8=8; 
maxk4=4;

{nof_nat8} = bai(x_or,maxk8,&penall,opt);
{nof_nat4} = bai(x_or,maxk4,&penall,opt);

{nof_ne8} = bai(x_nengl,maxk8,&penall,opt);
{nof_ne4} = bai(x_nengl,maxk4,&penall,opt);
{nof_me8} = bai(x_mideast,maxk8,&penall,opt);
{nof_me4} = bai(x_mideast,maxk4,&penall,opt);
{nof_se8} = bai(x_seast,maxk8,&penall,opt);
{nof_se4} = bai(x_seast,maxk4,&penall,opt);
{nof_gl8} = bai(x_glake,maxk8,&penall,opt);
{nof_gl4} = bai(x_glake,maxk4,&penall,opt);
{nof_pl8} = bai(x_plain,maxk8,&penall,opt);
{nof_pl4} = bai(x_plain,maxk4,&penall,opt);
{nof_sw8} = bai(x_swest,maxk8,&penall,opt);
{nof_sw4} = bai(x_swest,maxk4,&penall,opt);
{nof_rm8} = bai(x_rmount,maxk8,&penall,opt);
{nof_rm4} = bai(x_rmount,maxk4,&penall,opt);
{nof_fw8} = bai(x_fwest,maxk8,&penall,opt);
{nof_fw4} = bai(x_fwest,maxk4,&penall,opt);

/* First m Principal Components */
{or_mpc}=factorxx(x_or,8);

{ne_mpc}=factorxx(x_nengl,2);
{me_mpc}=factorxx(x_mideast,2);
{se_mpc}=factorxx(x_seast,2);
{gl_mpc}=factorxx(x_glake,2);
{pl_mpc}=factorxx(x_plain,2);
{sw_mpc}=factorxx(x_swest,2);
{rm_mpc}=factorxx(x_rmount,2);
{fw_mpc}=factorxx(x_fwest,2);

/* ***************************** Output ******************************* */
"";"";
"Average pairwise correlations, CD statistics, Exponents of CSD (alpha) and s.e. ";
av_c_or~cd_or~a_or~se_or;
av_c_cs~cd_cs~a_cs~se_cs;
av_c_rpc~cd_rpc~a_rpc~se_rpc;
av_c_2pc~cd_2pc~a_2pc~se_2pc;
av_c_3pc~cd_3pc~a_3pc~se_3pc;
av_c_4pc~cd_4pc~a_4pc~se_4pc;
av_c_5pc~cd_5pc~a_5pc~se_5pc;
av_c_6pc~cd_6pc~a_6pc~se_6pc;
av_c_7pc~cd_7pc~a_7pc~se_7pc;
av_c_8pc~cd_8pc~a_8pc~se_8pc;
"";"";
"Table 1 (Section 4.1.2 of BHP paper): Selected no. of factors (Bai & Ng procedure - 6 Information Criteria)";
"National - New Engl. - Mid East - South East - Gr. Lakes - Plains - South West - Rock. Mount. - Far West";
nof_nat8~nof_ne8~nof_me8~nof_se8~nof_gl8~nof_pl8~nof_sw8~nof_rm8~nof_fw8;
"";"";
nof_nat4~nof_ne4~nof_me4~nof_se4~nof_gl4~nof_pl4~nof_sw4~nof_rm4~nof_fw4;
"";"";
"Strongest Principal Components";
"Original Data (8 global factors)";
or_mpc;
"";"";
"Regional Data (2 factors per region): New Engl. - Mid East - South East - Gr. Lakes - Plains - South West - Rock. Mount. - Far West";
ne_mpc~me_mpc~se_mpc~gl_mpc~pl_mpc~sw_mpc~rm_mpc~fw_mpc;


/* ******************************************************** Procedures ******************************************************* */
/* Standardise data */
proc standx(x);
local n,t,m_x,std_x,x_stand;
n=cols(x);
t=rows(x);
m_x=meanc(x)';
std_x=stdc(x)';
x_stand=zeros(t,n);
for i(1,n,1);
x_stand[.,i]=(x[.,i]-m_x[1,i])/std_x[1,i];
endfor;
retp(x_stand);
endp;

/* Pesaran, M.H. (2015). Testing Weak Cross-sectional Dependence in Large Panels. Econometrics Reviews (forthcoming) */
/* CD test */
proc (2)=CD(x);
local n,t,corr_m,low_diag,seq_units,pair_corr,avg_p_corr,wdc,reject_wdc;
n=cols(x);
t=rows(x);
corr_m=corrx(x);
low_diag=vech(corr_m);
seq_units=seqa(1,1,rows(low_diag));
pair_corr=rev(sortc(low_diag~seq_units,1));
pair_corr=pair_corr[n+1:rows(pair_corr),1];
avg_p_corr=(2/(n*(n-1)))*sumc(pair_corr);
wdc=sqrt((t)/2)*sqrt(n*(n-1))*avg_p_corr;
if abs(wdc)>1.96;
reject_wdc=1;
else;
reject_wdc=0;
endif;
retp(avg_p_corr,wdc);
endp;


/* Bai, J. and Ng, S. (2002). Determining the Number of Factors in Approximate Factor Models. Econometrica, 70:191-221*/
proc factorxx(x,k);
local rej,f;
if rows(x)<=cols(x);
   {rej,f}=eighv(x*x');
   f=rev(f')';
   retp(f[.,1:k]);
else;
   {rej,f}=eighv(x'x);
   f=rev(f')';
   retp(x*f[.,1:k]);
endif;
endp;

proc bai(y,maxk,&pen,opt);
local pen:proc,y2,f,i,v,sigma,fac;
y2=(y-meanc(y)')./stdc(y)';
f=factorxx(y2,maxk);
fac={};
sigma=meanc(meanc((y2-f*(y2/f))^2));
i=0;
do while i<=maxk;
   if i==0;
     v=meanc(meanc(y2^2));
   else;
     v=meanc(meanc((y2-(f[.,1:i]*(y2/f[.,1:i])))^2));
   endif;
   if opt==1;
     fac=fac|((v+i*sigma*pen(rows(y2),cols(y2)))');
   else;
     fac=fac|(
((v+i*sigma*pen(rows(y2),cols(y2)))')~((ln(v)+i*pen(rows(y2),cols(y2)))'));
   endif;
   i=i+1;
endo;
retp(minindc(fac)-1);
endp;

proc pen1(t,n);
retp(ln(minc(t|n))/(minc(t|n)));
endp;

proc pen2(t,n);
retp( ((n+t)/(n*t))*ln(minc(t|n)));
endp;

proc penall(t,n);
local pen1,pen2,pen3;
pen1=((n+t)/(n*t))*ln(((n*t)/(n+t)));
pen2=((n+t)/(n*t))*ln(minc(t|n));
pen3=ln(minc(t|n))/(minc(t|n));
retp(pen1|pen2|pen3);
endp;

/* Bailey, N., Kapetanios, G. and Pesaran, H. P. (2015) Exponent of cross-sectional Dependence: Estimation and Inference, Journal of Applied Econometrics (forthcoming)

Temporal structure on factors; cross-sectional dependence on errors
{a_tilde,a_thrtilde,omega_tild,omega_thrtild}=atildeall(x,a_size)

Input: x (TxN), a_size
N: cross section dimension
T: time series dimension
a_size: scalar for significance level of thresholding: set to eg. 0.01, 0.05, 0.10 

Output:
Bias corrected estimate of alpha and respective s.e.
 */
/* a_tilde estimates: temporal structure on factors; cross-sectional dependence on errors */
proc (2)=atildeall(x,a_size);
local n,t,p,z,ln_z,x_bar1,x_bar1_c,std_x_bar1,pc,ln_var,x_bar,m_x_bar,x_bar_stand,i,x_bar_2m,m_x_bar_2m,x_bar_2m_st,x_bar_2m_st_lag,
v_all,v_1,dv,rhs,b,s_b,e_nw,sse_nw,sig2_nw,v_f_2,c_avg,m_c_avg,c_avg_2,index,e,e_bar,m_e_bar,e_bar_stand,e_bar_2m,v_e_bar,s_hat,a_dot,
a_tilde,rhsx,coefx,residx,ssex,sdx,dfx,t_test,p_n,theta,size,x_str,x_str1,xstr_bar,musqr_thr,a_thrtilde,ggg_o,c_avg_sel,m_c_avg_sel,
frasel,s_frasel,ggg_t,c_avg_selt,m_c_avg_selt,fraselt,s_fraselt,j,s_ttest,s_size,omega_tild;

n=cols(x);
t=rows(x);
p=ceil(t^(1/3));
z=seqa(1,1,n); /* cross sectional trend */
ln_z=ln(z);
x_bar1=meanc(x');
x_bar1_c=x_bar1;
std_x_bar1=stdc(x_bar1_c);
ln_var=ln(std_x_bar1^2);
x_bar=x_bar1_c./std_x_bar1; /* standardise the cross-sectional avgs */
m_x_bar=meanc(x_bar);
x_bar_stand=zeros(t,1);
i=1;
do while i<=t;
x_bar_stand[i,.]=x_bar[i,.]-m_x_bar;
i=i+1;
endo;
/*Newey-West method*/
x_bar_2m=x_bar_stand^2;
m_x_bar_2m=meanc(x_bar_2m);
x_bar_2m_st=zeros(t,1);
for i(1,t,1);
x_bar_2m_st[i,.]=x_bar_2m[i,.]-m_x_bar_2m;
endfor;
x_bar_2m_st_lag=zeros(rows(x_bar_2m_st),p);
for i(1,p,1);
x_bar_2m_st_lag[.,i]=lagn(x_bar_2m_st,i);
endfor;
v_all=x_bar_2m_st~x_bar_2m_st_lag;
v_1=v_all[p+1:t,.];
dv=v_1[.,1];
rhs=v_1[.,2:p+1];
b=inv(rhs'*rhs)*rhs'*dv;
s_b=sumc(b);
e_nw=dv-rhs*b;
sse_nw=e_nw'*e_nw;
sig2_nw=sse_nw/(t-cols(rhs));
v_f_2=sig2_nw/(1-s_b)^2;

c_avg=inv(x_bar'*x_bar)*x_bar'*x; /* OLS estimate standardised cross-sectional coefficients*/
m_c_avg=meanc(c_avg');
{pc}=getpc(x,4);
c_avg_2=inv(pc'*pc)*pc'*x; /* OLS estimate non standardised cross-sectional coefficients*/
index=rev(sortc(abs(c_avg_2')~z,1));
e=x-pc*c_avg_2; /* calculate residuals from non standardised cross-sectionals regression */
e_bar=meanc(e');
m_e_bar=meanc(e_bar);
e_bar_stand=zeros(t,1);
i=1;
do while i<=t;
e_bar_stand[i,.]=e_bar[i,.]-m_e_bar;
i=i+1;
endo;
e_bar_2m=e_bar_stand^2;
v_e_bar=meanc(e_bar_2m);
s_hat=n*v_e_bar;

a_dot=1+(1/2)*(ln_var/ln(n));
a_tilde=a_dot-(1/2)*(s_hat/(n*ln(n)*std_x_bar1^2));

rhsx=ones(t,1)~x_bar1;
coefx=inv(rhsx'*rhsx)*rhsx'*x;
residx=x-rhsx*coefx;
ssex=residx'*residx/(t-cols(rhsx));
sdx=sqrt(diag(ssex*inv(x_bar1'*x_bar1)));
dfx=t-cols(rhsx);
t_test=coefx[2,.]'./sdx[.,1];
size=zeros(n,1);
x_str=zeros(t,n);
s_ttest=rev(sortc(abs(t_test)~z,1));
j=1;
do while  j<=cols(coefx);
p_n=a_size/(n-j+1);
theta=cdfni(1-p_n/2);
  if abs(s_ttest[j,1])>=theta;
   size[j,1]=1;
  else;
   size[j,1]=0;
  endif;
j=j+1;
endo;
s_size=sortc(size~s_ttest[.,2],2);
x_str=s_size[.,1]'.*x;
x_str=x_str';
x_str1=delif(x_str,x_str[.,1].==0);
if x_str1==miss(1,1);
   musqr_thr=1;
  else;
   x_str1=x_str1';
   xstr_bar=meanc(x_str1');
   musqr_thr=meanc((xstr_bar-meanc(xstr_bar))^2);
endif;
a_thrtilde=a_tilde-(1/2)*(ln(musqr_thr)/ln(n));

ggg_o=round(n^(a_tilde));
    if ggg_o>=n;
       ggg_o=n;
    elseif ggg_o<1;
        ggg_o=1;
    else;
       ggg_o=ggg_o;
    endif;
c_avg_sel=rev(sortc(c_avg'~abs(c_avg'),2));
c_avg_sel=c_avg_sel[1:ggg_o,1];
m_c_avg_sel=meanc(c_avg_sel);
frasel=zeros(1,ggg_o);
for i(1,ggg_o,1);
  frasel[.,i]=(c_avg_sel[i,1]-m_c_avg_sel)^2;
endfor;
s_frasel=sumc(frasel');

ggg_t=round(n^(a_thrtilde));
    if ggg_t>=n;
       ggg_t=n;
    elseif ggg_t<1;
        ggg_t=1;
    else;
       ggg_t=ggg_t;
    endif;
c_avg_selt=rev(sortc(c_avg'~abs(c_avg'),2));
c_avg_selt=c_avg_selt[1:ggg_t,1];
m_c_avg_selt=meanc(c_avg_selt);
fraselt=zeros(1,ggg_t);
for i(1,ggg_t,1);
  fraselt[.,i]=(c_avg_selt[i,1]-m_c_avg_selt)^2;
endfor;
s_fraselt=sumc(fraselt');
omega_tild=((1/t)*(v_f_2)+(4/n)*(n^(1-a_thrtilde)*s_fraselt/(ggg_t-1)))^(1/2)/(2*ln(n)); /* 90% CI: 1.65, 95% CI: 1.96 */ 
retp(a_thrtilde,omega_tild); /*output*/
endp;

proc getpc(ax,j1);
local j,x,n,t,first_j,eigvals,eigvecs,evals,evecs,pc;
x=ax;
n=cols(ax);
t=rows(ax);
j=j1; /* or 'factor' if more than the maximum pc is needed */
if n<t;
first_j=seqa(n,-1,j);
{eigvals,eigvecs}=eigrs2(x'x); /*sorted in ascending order*/
evals=submat(eigvals,first_j,1); /* pick j largest eigenvalues */
evecs=submat(eigvecs,0,first_j);
pc=x*evecs;
elseif n>=t;
first_j=seqa(t,-1,j);
{eigvals,eigvecs}=eigrs2(x*x'); /*sorted in ascending order*/
evals=submat(eigvals,first_j,1); /* pick j largest eigenvalues */
evecs=submat(eigvecs,0,first_j);
pc=evecs; /* eigenvectors of xx' = PC of x */
endif;
retp(pc);
endp;
