%% this file is for the results of the section on blocks 

clear all
close all
clc

H = 100;
G = 14;
T = 1;

START = 1997
END = 2006

count = 1;

mem = input('type of household:\n (1) 1 member\n (2) 2 members\n (3) 2-3 members\n (4) 2-4 members\n');

for S = START:T:2006
    start_year = S;
    
    yg = zeros(H,G,T);
    te = zeros(H,T);
    w = [];
    
    for t = 1:T
        if mem == 1
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-2) '_' num2str(start_year+t-1) 'def_mem1.csv']
        elseif mem == 2
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-2) '_' num2str(start_year+t-1) 'def_mem2.csv']
        elseif mem == 3
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-2) '_' num2str(start_year+t-1) 'def_mem2_3.csv']
        elseif mem == 4
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-2) '_' num2str(start_year+t-1) 'def_mem2_4.csv']
        end
       
        data=csvread(filename,1,1);
        
        data(:,G+2) = [];
        
        yg(:,:,t) = data(:,2:G+1);
        te(:,t) = data(:,1);
        
        for g=1:G
            bs(:,g,t) =  yg(:,g,t);
        end
        
        clear data
    end
    
    w = reshape(bs,H,G*T);
    
    %% remove budget shares >1 and <0
    [row col]= find(w>=1 | w<0);
    if length(row)>0
        for i =1:length(row)
            w(row(i),col(i))= mean(w(:,col(i)));
        end
    end
    w(:,[1:T]*G)=[];
    
    %% center data
    [H N] = size(w);
    ss = std(w);
    mm = mean(w);
    ss1 = ones(size(w ,1) , 1)*ss;
    mm1 = ones(size(w, 1) , 1)*mm;
    x = w-mm1;
        
    %% PCA
    for r=1:6;
        C = (x(1:H,:))'*(x(1:H,:))/(H-1);
        
        [R D] = eig(C);
        
        [D,IX] = sort((diag(D)));                % sort eigenvalues and eigenvectors
        D = flipud(D);
        IX = flipud(IX);
        R = R(:,IX);                             % now arrange the eigenvalues  and eigenvectors according to the decreasing order of eigenvalues
        D = diag(D);
        
        EV(count,:) = (diag(D./trace(D)))';
        count=count+1;
        
        R = R(:,1:r);
        D = D(1:r,1:r);
        
        L = R.*sqrt(N);
        F_PCA = x*L./N;
        
        u = [F_PCA(1:H,:)];
        
        common_PCA = u*inv(u'*u)*u'*x;
        F_PCA1 = F_PCA;
           
        %% PCA ON BLOCKS
        r1=T*r;
        Cb = zeros(size(C));
        for t=1:T
            Cb(t*(G-1)-G+2:(t)*(G-1),t*(G-1)-G+2:(t)*(G-1))=C(t*(G-1)-G+2:(t)*(G-1),t*(G-1)-G+2:(t)*(G-1));
        end
        
        [Rb Db] = eig(Cb);
        
        [Db,IXb] = sort((diag(Db)));                % sort eigenvalues and eigenvectors
        Db = flipud(Db);
        IXb = flipud(IXb);
        Rb = Rb(:,IXb);                             % now arrange the eigenvalues  and eigenvectors according to the decreasing order of eigenvalues
        Db = diag(Db);
        
        Rb = Rb(:,1:r1);
        Db = Db(1:r1,1:r1);
        
        Lb = Rb.*sqrt(T*G);
        F_PCA_b = x*Lb./(T*G);
        for yy = 1:T
            if r>1
                index = [yy yy+T]
            else
                index = yy;
            end
            F_PCA2 = F_PCA_b(:,index);
            
            num1 = trace(F_PCA1'*F_PCA2*(F_PCA2'*F_PCA2)^(-1)*F_PCA2'*F_PCA1);
            denom1 = trace(F_PCA1'*F_PCA1);
            DIST1(yy,r)=num1/denom1;   
         end
        
        %% PCA ON SINGLE YEARS
        Cs = zeros(G,G);
        for t=1:T
            Cs = C(t*(G-1)-G+2:(t)*(G-1),t*(G-1)-G+2:(t)*(G-1));
            
            [Rs Ds] = eig(Cs);
            
            [Ds,IXs] = sort((diag(Ds)));                % sort eigenvalues and eigenvectors
            Ds = flipud(Ds);
            IXs = flipud(IXs);
            Rs = Rs(:,IXs);                             % now arrange the eigenvalues  and eigenvectors according to the decreasing order of eigenvalues
            Ds = diag(Ds);
            
            Rs = Rs(:,1:r);
            Ds = Ds(1:r,1:r);
            
            Ls = Rs.*sqrt(G);
            F_PCA_s = x(:,t*(G-1)-G+2:(t)*(G-1))*Ls./G;
            F_PCA3 = F_PCA_s;
            num2 = trace(F_PCA3'*F_PCA2*(F_PCA2'*F_PCA2)^(-1)*F_PCA2'*F_PCA3);
            denom2 = trace(F_PCA3'*F_PCA3);
            DIST2(t,r)=num2/denom2;
        end
    end
end

