%% this file is for the main results of the paper

clear all
close all
clc

H = 100;
G = 14;
T = 10;

START = 1977
END = START+T-1

count = 1;

mem = input('type of household:\n (1) 1 member\n (2) 2 members\n (3) 2-3 members\n (4) 2-4 members\n');

for S = START:T:END
    start_year = S;
    
    bs = zeros(H,G,T);
    te = csvread('tot_exp.csv',1,1);
    te = te(:,mem);
    w = [];
    
    for t = 1:T
        if mem == 1
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-1) 'def_mem1.csv']
        elseif mem == 2
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-1) 'def_mem2.csv']
        elseif mem == 3
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-1) 'def_mem2_3.csv']
        elseif mem == 4
            filename = ['data/' num2str(START) '_' num2str(END) '/dat' num2str(start_year+t-1) 'def_mem2_4.csv']
        end
        
        data = csvread(filename,1,1);
        
        data(:,G+2) = [];
        
        bs(:,:,t) = data(:,2:G+1);
        
        clear data
    end
    
    w = reshape(bs,H,G*T);
    
    %% remove budget shares >1 and <0
    [row col]= find(w>=1 | w<0);
    if length(row)>0
        for i =1:length(row)
            w(row(i),col(i))= mean(w(:,col(i)));
        end
    end
    
    %% reorder budget shares
    c = 1;
    for g=1:G
        for t=1:T
            w1(:,c) = w(:,(t*G-14)+g);
            a(c) = (t*G-14)+g;
            c=c+1;
        end
    end
    
    w = w1(:,1:T*(G-1));  % erase BS of miscellaneous
    
    %% average BS
    for i=1:G-1
        BS_av(i,:) = mean(mean(w(:,(i-1)*T+1:i*T)),2);
        BS_av_poor(i,:) = mean(mean(w(1:50,(i-1)*T+1:i*T)),2);
        BS_av_rich(i,:) = mean(mean(w(51:100,(i-1)*T+1:i*T)),2);
    end
    %% center data
    [H N] = size(w);
    ss = std(w);
    mm = mean(w);
    ss1 = ones(size(w ,1) , 1)*ss;
    mm1 = ones(size(w, 1) , 1)*mm;
    x = w-mm1;
    
    %% number of common factors
%     rmax = 15;
%     cmax = 2;
%     nbck = floor(N/20);
%     
%     ABC_crit(x,rmax,cmax,nbck)
%     [n4(count,1),n5(count,1),n6(count,1)] = BN_crit2(x,rmax);
%     no(count,1) = onatski(x,rmax);
    
    r = 3;
    
    %% PCA
    C = (x(1:H,:))'*(x(1:H,:))/(H-1);
    
    [R D] = eig(C);
    
    [D,IX] = sort((diag(D)));                % sort eigenvalues and eigenvectors
    D = flipud(D);
    IX = flipud(IX);
    R = R(:,IX);                             % now arrange the eigenvalues  and eigenvectors according to the decreasing order of eigenvalues
    D = diag(D);
    
    EV(count,:) = (diag(D./trace(D)))';
    count=count+1;
    
    R = R(:,1:r);
    D = D(1:r,1:r);
    
    L = R.*sqrt(N);
    F_PCA = x*L./N;
    
    u = F_PCA(1:H,:);
    
    common_PCA = u*inv(u'*u)*u'*x;
    
    %% ICA
    B0 =  jadeR(x',r);
    F_ICA = (B0*x')';
    
    v = F_ICA(1:H,:);
    B = inv(v'*v)*v'*x;
    
    common_ICA = v*B;  % must be equal to common_PCA
        
    %% identification of sign
    if F_ICA(1,1) < 0
        F_ICA(:,1) = -F_ICA(:,1);
        B(1,:) = -B(1,:);
    end
    if F_ICA(end,2) < 0
        F_ICA(:,2) = -F_ICA(:,2);
        B(2,:) = -B(2,:);
    end
    
    %% loadings
    for i=1:G-1
        BB = B';
        B_m(i,:) = mean(BB((i-1)*T+1:i*T,:));
    end
    
    B_m2 = B_m.^2;
    B_m = B_m./sqrt(ones(G-1,1)*sum(B_m2));
    B_m2 = B_m2./sqrt(ones(G-1,1)*sum(B_m2));
    
    %% correlations
    for h=1:r
        for i=1:N
            [CP(i,h),PVAL(i,h)]=corr(F_ICA(:,h),x(:,i));
            %         CS(i,h)=corr(F_ICA(:,h),x(:,i),'type','Spearman');
            %         CK(i,h)=corr(F_ICA(:,h),x(:,i),'type','Kendall');
        end
    end
    
    [row, col] = find(PVAL>=0.05);
    
    for i=1:G-1
        CP_m(i,:) = mean(CP((i-1)*T+1:i*T,:));
        
        for h=1:r
            rho = CP_m(i,h);
            t = sign(rho) .* Inf;
            k = (abs(rho) < 1);
            t(k) = rho(k).*sqrt((H-2)./(1-rho(k).^2));
            p_m(i,h) = 2*tcdf(-abs(t),H-2);
        end
    end
    
    %% bootstrap factors
    [H N] = size(x);
    
    M = 1;
    
    for m=1:M
        U = 1+(H-1)*rand(H,1);
        U = sort(round(U));
        
        w = x(U,:);
        
        ss = std(w);
        mm = mean(w);
        mm1 = ones(size(w, 1) , 1)*mm;
        w = w-mm1;
        
        B0sim =  jadeR(w',r);
        F_ICA2 = (B0sim*w')';
        
        MM = find(abs(F_ICA2(1,:))==max(abs(F_ICA2(1,:))));
          
        if F_ICA2(1,1) < 0
            F_ICA2(:,1) = -F_ICA2(:,1);
        end
        if F_ICA2(end,2) < 0
            F_ICA2(:,2) = -F_ICA2(:,2);
        end
        for j=1:r
            F_ICA_sim(:,j,m) = F_ICA2(:,j);
        end
        clear F_ICA2
    end
     
    %% nonparametric-fits
    b = 8.62*max(te)/100;
    p = 1;
    kern = 2;
    absc = te';
    
    for j=1:r
        fit(:,:,j)=locpolyest(absc',F_ICA(:,j),b,p,kern);
        for m=1:M
            m
            fit_boot(:,:,j,m)=locpolyest(absc',squeeze(F_ICA_sim(:,j,m)),b,p,kern);
        end
    end
    
    for j=1:r
        SSE(1,j) = sum((fit(:,1,j)-F_ICA(:,j)).^2);
        SSR(1,j) = sum((fit(:,1,j)).^2);
        R2np(1,j) = 1-SSE(1,j)./SSR(1,j);
    end
     
     %% parametric fits
%         l1 = input('min percentile\n');
%         l2 = input('max percentile\n');
%        
%         tes = (te(l1:l2))./std(te(l1:l2));
%         absc = tes';
%     
%         degofr = (l2-l1)/(l2-l1-1);
%     
%         for j=1:r
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_lin,[1 1]);
%             R2af(1,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_quad,[1 1]);
%             R2af(2,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_inv,[1 1]);
%             R2af(3,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_inv2,[1 1]);
%             R2af(4,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_log,[1 1]);
%             R2af(5,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_log_quad,[1 1]);
%             R2af(6,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%             [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_xlog,[1 1]);
%             R2af(7,j) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%         end
%     
%         for j=1:2
%             if j==1
%                 % log(x)
%                 [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_log,[1 1]);
%                 R2(1,1) = 1-sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 R2a(1,1) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 beta_ols(:,1) = beta;
%                 for m=1:M
%                     [beta,residual,J,SIGMA,mse] = nlinfit(absc,squeeze(F_ICA_sim(l1:l2,j,m))',@fit_engel_log,[1 1]);
%                     beta_boot(:,1,m) = beta;
%                 end
%                 % x^-1
%                 [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_inv,[1 1]);
%                 R2(2,1) = 1-sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 R2a(2,1) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 beta_ols(:,2) = beta;
%                 for m=1:M
%                     [beta,residual,J,SIGMA,mse] = nlinfit(absc,squeeze(F_ICA_sim(l1:l2,j,m))',@fit_engel_inv,[1 1]);
%                     beta_boot(:,2,m) = beta;
%                 end
%                 
%             elseif j == 2 
%                 % x^2
%                 [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_quad,[1 1]);
%                 R2(3,1) = 1-sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 R2a(3,1) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 beta_ols(:,3) = beta;
%                 for m=1:M
%                     [beta,residual,J,SIGMA,mse] = nlinfit(absc,squeeze(F_ICA_sim(l1:l2,j,m))',@fit_engel_quad,[1 1]);
%                     beta_boot(:,3,m) = beta;
%                 end
%                 % xlog
%                 [beta,residual,J,SIGMA,mse] = nlinfit(absc,F_ICA(l1:l2,j)',@fit_engel_xlog,[1 1]);
%                 R2(4,1) = 1-sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 R2a(4,1) = 1-degofr*sum(residual.^2)/sum((F_ICA(l1:l2,j)-mean(F_ICA(l1:l2,j))).^2);
%                 beta_ols(:,4) = beta;
%                 for m=1:M
%                     [beta,residual,J,SIGMA,mse] = nlinfit(absc,squeeze(F_ICA_sim(l1:l2,j,m))',@fit_engel_xlog,[1 1]);
%                     beta_boot(:,4,m) = beta;
%                 end
%             end
%         end
%         
%         
%         beta_se_ols = [std(beta_boot(1,1,:));
%             std(beta_boot(2,1,:));
%             std(beta_boot(1,2,:));
%             std(beta_boot(2,2,:));
%             std(beta_boot(1,3,:));
%             std(beta_boot(2,3,:));
%             std(beta_boot(1,4,:));
%             std(beta_boot(2,4,:))];
%         
%         beta_ols = reshape(beta_ols,8,1);
%         TAB = [beta_ols beta_se_ols beta_ols./beta_se_ols 1-tcdf(abs(beta_ols./beta_se_ols),l2-l1+1-2)];
end
%% plots of the factors and Engel curves nonparametric fits
% q=[1 1.64];
% 
% for i=2:2
%     figure
%     hold all
%     plot(te,-squeeze(fit(:,1,i)),'-k','LineWidth',2)
%     plot(te,-squeeze(fit(:,1,i))+q(1)*squeeze(std(fit_boot(:,1,i,:),1,4)),'--k','LineWidth',2)
%     plot(te,-squeeze(fit(:,1,i))-q(1)*squeeze(std(fit_boot(:,1,i,:),1,4)),'--k','LineWidth',2)
%     plot(te,-squeeze(fit(:,1,i))+q(2)*squeeze(std(fit_boot(:,1,i,:),1,4)),':k','LineWidth',2)
%     plot(te,-squeeze(fit(:,1,i))-q(2)*squeeze(std(fit_boot(:,1,i,:),1,4)),':k','LineWidth',2)
%     plot(te,-F_ICA(:,i),'.k','MarkerSize',15)
%     axis tight
%     hx = xlabel('Total expenditure $(x_h)$');
%     yname = ['$\tilde{\gamma}_' num2str(gcf) '^*\,(x_h)$'];
%     hy = ylabel(yname);
%     set([hx hy],'Interpreter','latex','FontSize',16)
%     set(gca,'FontSize',16)
%     box on
% %     if mem <= 2
% %         figname2 = ['figs/fit_' num2str(END) '_' num2str(gcf) '_' num2str(mem) 'm.fig']
% %     elseif mem > 2
% %         figname2 = ['figs/fit_' num2str(END) '_' num2str(gcf) '_2' num2str(mem) 'm.fig']
% %     end
% %     saveas(gcf,figname2)
% end
% 
% for i=2:2
%     figure
%     hold all
%     plot(te,-squeeze(fit(:,2,i)),'-k','LineWidth',2)
%     plot(te,zeros(size(absc,2),1),'-k','LineWidth',1)
%     plot(te,-squeeze(fit(:,2,i))+q(1)*squeeze(std(fit_boot(:,2,i,:),1,4)),'--k','LineWidth',2)
%     plot(te,-squeeze(fit(:,2,i))-q(1)*squeeze(std(fit_boot(:,2,i,:),1,4)),'--k','LineWidth',2)
%     plot(te,-squeeze(fit(:,2,i))+q(2)*squeeze(std(fit_boot(:,2,i,:),1,4)),':k','LineWidth',2)
%     plot(te,-squeeze(fit(:,2,i))-q(2)*squeeze(std(fit_boot(:,2,i,:),1,4)),':k','LineWidth',2)
%     plot(te,zeros(size(absc,2),1),'ok','MarkerSize',5)
%     axis tight
%     hx = xlabel('Total expenditure $(x_h)$');
%     yname = ['$\tilde{\delta}_' num2str(gcf-r) '^*\,(x_h)$'];
%     hy = ylabel(yname);
%     set([hx hy],'Interpreter','latex','FontSize',16)
%     set(gca,'FontSize',16)
%     box on
% %     if mem <= 2   
% %         figname2 = ['figs/fit_' num2str(END) '_' num2str(gcf-r) '_deriv_' num2str(mem) 'm.fig']
% %     elseif mem > 2
% %         figname2 = ['figs/fit_' num2str(END) '_' num2str(gcf-r) '_deriv_2' num2str(mem) 'm.fig']
% %     end
% %     saveas(gcf,figname2)
% end

%% NOTES 1987-1996
% 1 member
% switch factor 1 and 2
% factor 1 and 2 have to change sign
% 2 members
% switch factor 1 and 2
% 2-3 members
% switch factor 1 and 2
% change sign factor 1
% 2-4 members
% switch factor 1 and 2
% change sign factor 1


%% NOTES 1977-1986
% 2 members
% switch factor 1 and 2
% 2-3 members
% switch factor 1 and 2
% 2-4 members
% switch factor 1 and 2
% change sign factor 1
