% This is the main file of Matlab code for the data analysis of
% "The Millennium Peak in Club Convergence - A New Look at Distributional
% Changes in the Wealth of Nations", written by Melanie Krause
% This code was prepared and revised on 25 July 2016.
% All errors are my own.

randn('seed',300714)
rand('seed',300714)

% Start by importing the dataset data.txt into Matlab:
uiimport('data.txt')
% Select comma as delimiter, range A2:G5167, variable names row 1, import
% as column vectors

N=123;
T=length(X)/N;
years=1970:1:2011;

% List of countries (ordered):
for i=1:N
    countries(i,1)=country((i-1)*T+1,1);
    countrycodes(i,1)=countrycode((i-1)*T+1,1);
end

% Create a matrix with the incomes per capita in each country (row) in each
% year (column):
Y_raw=zeros(N,T);
for i=1:N
    for t=1:T
        Y_raw(i,t)=X((i-1)*T+t,1);
    end
end

mu=mean(Y_raw)
median=median(Y_raw)
sigma=sqrt(var(Y_raw))
sigma_intquart=iqr(Y_raw,1)/(2*0.6745)

%Standardized values Y:
Y=zeros(N,T);
for i=1:N
    for t=1:T
        Y(i,t)=(Y_raw(i,t)-mean(Y_raw(:,t)))/sqrt(var(Y_raw(:,t)));
    end
end

%Table for Appendix with all countries, raw and standardized income per cap!
Table=cell(N,14);
for i=1:123
    Table(i,1)={countrycodes(i)};
    Table(i,2)={'&'};
    Table(i,3)={countries(i)};
    Table(i,4)={'&'};
    Table(i,5)={round(Y_raw(i,1)*100)/100};
    Table(i,6)={'&'};
    Table(i,7)={round(Y_raw(i,42)*100)/100};
    Table(i,8)={'&'};
    Table(i,9)={round(Y(i,1)*10000)/10000};
    Table(i,10)={'&'};
    Table(i,11)={round(Y(i,42)*10000)/10000};
    Table(i,12)={'&'};
    Table(i,13)={round(((Y_raw(i,42)/Y_raw(i,1))^(1/41)-1)*10000)/10000};
    Table(i,14)={'\\'};
end

% Plot the kernel density estimates at different points in time
[fs1,xis1,u1] = ksdensity(Y(:,1),'npoints',100);
plot(xis1,fs1)
hold on 
[fs2,xis2,u2] = ksdensity(Y(:,36), 'npoints',100);
plot(xis2,fs2)

% Calculate Critical Bandwidth for m-modality (1-5) in the first year (calls MM_CritBW function)
CB_test= MM_CritBW(Y(:,1), 1, 5, 300, 0.05)
% Check if you can reject the Silverman static multimodality test (as
% suggested by Bianchi, calls the MM_Bianchi function)
Signific_test = MM_Bianchi(Y(:,1), 1, 5, 300, 0.05, 50)

% Now the Silverman test with uni- and bimodality for all 42 years: 
% store the p-values for unimodality (500 replications)
Bianchi1_sig=zeros(T,1); % for unimodality
Bianchi2_sig=zeros(T,1); % for bimodality
for t=1:42
Bianchi1_sig(t) = MM_Bianchi(Y(:,t), 1, 1, 300, 0.05, 5000);
Bianchi2_sig(t) = MM_Bianchi(Y(:,t), 2, 2, 300, 0.05, 5000);
end

% The core of this paper: Calculate the critical bandwidth for unimodality
% so that you can later check its evolution
CB=zeros(T,1);
for t=1:42
CB(t)= MM_CritBW(Y(:,t), 1, 1, 300, 0.05);
end


% Now compare the evolution of CB to that of Gini, Wolfson's polarization and 
% ER polarization (alpha = 0.25 and alpha = 1). Work with mean-standardized data!
% This calls the functions wolfsongini.m as well as er_pola.m
Wolfson=zeros(T,1);
Gini=zeros(T,1);
ER025=zeros(T,1);
ER1=zeros(T,1);
alpha025 = 0.25; % for ER-Pola (between 0.25 and 1)
alpha1 = 1;
for t=1:42
    wg=wolfsongini(Y_raw(:,t)/mean(Y_raw(:,t)));
    Wolfson(t)=wg(1);
    Gini(t)=wg(2);
    ER025(t)=er_pola(Y_raw(:,t)/mean(Y_raw(:,t)), alpha025);
    ER1(t)=er_pola(Y_raw(:,t)/mean(Y_raw(:,t)), alpha1);
end


% Significance of Change in CB in any two time periods (Bootstrap)
% (as well as change in Gini, Wolfson, ER-Pola)
% Difference from year t to 1970:
Diff1970=zeros(42,5);
for t=2:42
        Diff1970(t,1)=MM_CritBW(Y(:,t), 1, 1, 300, 0.05)-MM_CritBW(Y(:,1), 1, 1, 300, 0.05);
        WolfsonVec=wolfsongini(Y_raw(:,t)/mean(Y_raw(:,t)))-wolfsongini(Y_raw(:,1)/mean(Y_raw(:,1)));
        Diff1970(t,2)=WolfsonVec(1);
        Diff1970(t,5)=WolfsonVec(2);
        Diff1970(t,3)=er_pola(Y_raw(:,t)/mean(Y_raw(:,t)), 1)-er_pola(Y_raw(:,1)/mean(Y_raw(:,1)), 1);
        Diff1970(t,4)=er_pola(Y_raw(:,t)/mean(Y_raw(:,t)), 0.25)-er_pola(Y_raw(:,1)/mean(Y_raw(:,1)), 0.25);
end

trials=5000;
Signif1970=zeros(42,5);
Boot1970=zeros(42,5,trials); % to see results for entry i,j, see boot(i,j,:)
for t=2:42 
  for trial=1:trials
    random=ceil(rand(N,1)*N);
    sampY1_raw = Y_raw(random,1);
    sampY2_raw = Y_raw(random,t);
    sampY1 = Y(random,1);
    sampY2 = Y(random,t);
    Boot1970(t,1,trial)=MM_CritBW(sampY2, 1, 1, 300, 0.05)-MM_CritBW(sampY1, 1, 1, 300, 0.05);
    bootWolfsonVec=wolfsongini(sampY2_raw/mean(sampY2_raw))-wolfsongini(sampY1_raw/mean(sampY1_raw));
    Boot1970(t,2,trial) = bootWolfsonVec(1);
    Boot1970(t,5,trial)= bootWolfsonVec(2);
    Boot1970(t,3,trial)=er_pola(sampY2_raw/mean(sampY2_raw), 1)-er_pola(sampY1_raw/mean(sampY1_raw), 1);
    Boot1970(t,4,trial)=er_pola(sampY2_raw/mean(sampY2_raw), 0.25)-er_pola(sampY1_raw/mean(sampY1_raw), 0.25);
end
end

%Sorting and Significance Determination
Boot1970sorted = sort(Boot1970,3);
for t=1:42
    for col=1:5
if Diff1970(t,col)>=0
   index_pos= find(Boot1970sorted(t,col,:) >=0,1);  % index of first pos entry (should be lower than 0.05*trials) 
   if numel(index_pos)==0
       index_pos=0; % if no pos entries, it's zero
   end
   Signif1970(t,col) = (index_pos-1)/trials;

else index_neg=find(Boot1970sorted(t,col,:) >=0,1); % index of first pos entry (should be higher than 0.05*trials) 
   if numel(index_neg)==0
       index_neg=trials; % if no neg entries, it's trials
   end
  Signif1970(t,col) = (trials-index_neg)/trials;
end
    end
end

% Find the antimode between the two groups in Kernel density estimation
% and identify the position of outliers

antimode=zeros(42,1);
outlier1thresh=zeros(42,1);
outlier2thresh=zeros(42,1);
for t=1:42
    [f,xist,ut] = ksdensity(Y(:,t), 'npoints',1000);
      f_diff=diff(f'); % discrete derivative 
       for z=2:length(f_diff)
           if f_diff(z) > 0 & f_diff(z-1) <0
               indic(z)=1;
           else
               indic(z)=0;
           end
       end
       antimode_ind=find(indic==1)
       %for threshold between poor and rich
       if f(antimode_ind(1))==min(f(antimode_ind(1)-100:antimode_ind(1)+100)) % it's really local min
          antimode(t)=xist(antimode_ind(1)); 
       else 
          antimode(t)=xist(antimode_ind(2));
       end
       % for threshold between rich and first outlier
       if numel(antimode_ind)==2
           outlier1thresh(t)=xist(antimode_ind(2));
       elseif numel(antimode_ind)==4
           outlier1thresh(t)=xist(antimode_ind(3));
       elseif antimode(t)==xist(antimode_ind(1)) & f(antimode_ind(2)) ==min(f(antimode_ind(2)-100:antimode_ind(2)+100)) 
           outlier1thresh(t)=xist(antimode_ind(2));
       else outlier1thresh(t)=xist(antimode_ind(3));
       end
       % for threshold between first and second outlier
       if numel(antimode_ind)==2
           outlier2thresh(t)=outlier1thresh(t);
       elseif numel(antimode_ind)==3
           outlier2thresh(t)=xist(antimode_ind(3));
       else outlier2thresh(t)=xist(antimode_ind(4));
       end
end
       

%Antimode in absolute terms:
antimode(42)*sqrt(var(Y_raw(:,42)))+mean(Y_raw(:,42))
    

% Club Membership
Club_Membership=zeros(N,T);
for i=1:N
    for t=1:T
        if Y(i,t) <= antimode(t)
            Club_Membership(i,t)=1;
        else
             Club_Membership(i,t)=2;
        end
    end
end

%Find countries that changed clubs
Mobility=zeros(N,1);
for i=1:N
    if Club_Membership(i,:)== ones(1,T) 
     Mobility(i,1)=0;
    elseif Club_Membership(i,:)== 2*ones(1,T) 
    else
     Mobility(i,1)=1;
    end
end

for i=1:N
    if Club_Membership(i,42)==2
    countries(i)
    end
end
sum(Club_Membership(:,42)==2)

for i=1:N
    if Mobility(i)==1
    countries(i)
    end
end