%  This program performs the Subsampling bootstrap for ***DEA AGGREGATE*** efficiencies
%  of TWO (non-intesecting) sub-groups (can be extended to more sub-groups) of the entire 
%  sample and for ***AGGREGATE*** efficiency of the entire sample. 
%  Programmed by L.Simar and V.Zelenyuk ( adapted from and compatible 
%  to other programs of L.Simar)  
%
%  uses functions
%           eff = deasel(X,Y,ori,rts,prdl);  to estimate DEA scores
%           effkb=DEA_SubSampl_Ag_2_Smpl.m      to obtain the bootstrap estimates
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%            X   : Matrix of input(s)  (n x p)
%            Y   : Matrix of output(s) (n x k)
%            ori : Orientation of the computation
%                     'I'    = Input orientation
%                     'O'    = output orientation
%            rts : Assumption on returns to scale (text)
%                     'NIRS' = Non increasing rts
%                     'NDRS' = Non decreasing rts
%                     'CRS'  = Constant rts
%                     'VRS'  = Variable rts
%            prdl: Selection of primal or dual approach
%                     'P'    = Primal
%                     'D'    = Dual%
%------------------------------------------------------------
%
% **********************************
%
clear all
close all

tic

% Fix the number of Bootstrap loops B and the level of conf. int. (1-alpha)

B=2000;       kapa1 = 0.7;      kapa2 = 0.7;         alpha=0.05;     

load Ex2_BS_Agg_ppr;      

[n,p]=size(X);      [n,q]=size(Y);      nobs=[1:n]';

group1= nobs<=na;       % flag group1         
group2= nobs>=na+1;     % flag group2

rts='VRS';      ori='O';        prdl='D';       % type of DEA model

if strcmp('NIRS',rts)
   fprintf('\n * Non-increasing returns to scale \n')
        disp('   -------------------------------')
elseif strcmp('NDRS',rts)
   fprintf('\n * Non-decreasing returns to scale \n')
        disp('   -------------------------------')
elseif strcmp('VRS',rts)
   fprintf('\n * Variable returns to scale \n')
        disp('   -------------------------')
else
   fprintf('\n * Constant returns to scale \n')
        disp('   -------------------------')
end

% *********************************************************************
%  Use DEA with the original data set
% *********************************************************************
        eff = deasel(X,Y,ori,rts,prdl);
            eff=round(10000*eff)/10000;     % rounding to 4 decimals

% *********************************************************************
%  Rename, for future purposes

alleff=eff;     alln=n;     allX=X;     allY=Y;     allnobs=nobs;

eff1=eff(group1,:);             eff2=eff(group2,:);
X1=X(group1,:);                 X2=X(group2,:);
Y1=Y(group1,:);                 Y2=Y(group2,:);
[n1,p]=size(X1);                [n2,p]=size(X2);
nobs1=[1:n1]';                  nobs2=[1:n2]';


% *********************************************************************
% Computation of Aggregate efficiencies 
% *********************************************************************
% Check if input or output oriented!!!!!
%
if strcmp('I',ori)   % input oriented case
%  computation of price independent weights 
%  for details, see Simar and Zelenyuk (2003)--ext. of Fare and Zelenyuk 2003, EJOR.

        %     FOR GROUP 1
        %   **************    
[S1, S1k] = P_indep_weights(X, X1);

    AgEff1=eff1'*S1k;
    TrueAgEff1=vA'*S1k;

        %     FOR GROUP 2
        %   **************    
    
[S2, S2k] = P_indep_weights(X, X2);

    AgEff2=eff2'*S2k;
    TrueAgEff1=vB'*S2k;
  
else   %output oriented case

        %     FOR GROUP 1
        %   **************    
[S1, S1k] = P_indep_weights(Y, Y1);

    AgEff1=eff1'*S1k;
    TrueAgEff1=vA'*S1k;

        %     FOR GROUP 2
        %   **************    
[S2, S2k] = P_indep_weights(Y, Y2);

    AgEff2=eff2'*S2k;
    TrueAgEff2=vB'*S2k;
    
end

    EntAgEff = AgEff1*S1 + AgEff2*S2;

    CompAgEf = AgEff1 / AgEff2;
    
    TruCompAgEf = TrueAgEff1 / TrueAgEff2;
    
    EntTrueAgEff = TrueAgEff1*S1 + TrueAgEff2*S2;

        disp('weights are')
        disp([S1  S2])

    AgEff=[AgEff1 ; AgEff2; EntAgEff];
        disp('EstAgEff')
        disp([AgEff'])

    TrueAgEff = [TrueAgEff1 ; TrueAgEff2; EntTrueAgEff];
        disp('TrueAgEff')
        disp([TrueAgEff'])


EstMeans =  [mean(eff1); mean(eff2); mean(eff)];

TruMeans =  [mean(vA); mean(vB); mean([vA ; vB]) ]; % known only in MC

mu=mean(eff);               sigma=std(eff);

% *********************************************************************
        %  CHOICE of Bandwidth

% choose a bandwith by Sheather and Jones (1991, JR Stat. Soc., B) Method 
% ******************************************************************
    ref_1= sort([vA ; 2-vA]) ;              ref_2= sort([vB ; 2-vB]) ;         
        [n1,pm]=size(ref_1);                     [n2,pm]=size(ref_2);

    h1t = Sh_J_Run(ref_1) ;                  h2t = Sh_J_Run(ref_2) ; 

    Kxx = Nkernel_Li(ref_1,ref_1,h1t);       Kyy = Nkernel_Li(ref_2,ref_2,h2t);

% Note correction for sample size due to reflection
    fhat1 = sum(Kxx,2)/(n1*h1t/2);             fhat2 = sum(Kyy,2)/(n2*h2t/2);

    
% here, now FOR OUTPUT oriented only !!! To be adjusted for the INPUT orientation

 %  Eliminate the points at the BOUNDARY ('ones') in estimation of both
 %  bandwidth and density 

    eff1=eff1(eff1>1);                      eff2=eff2(eff2>1);
        [n1m,pm]=size(eff1);                    [n2m,pm]=size(eff2);

% REFLECT the data  (see Silverman, 1986)
    ref_1_dea= sort([eff1 ; 2-eff1]) ;      ref_2_dea= sort([eff2 ; 2-eff2]) ;         
        [n1,pm]=size(ref_1_dea);                [n2,pm]=size(ref_2_dea);
% Compute the h's
    h1 = Sh_J_Run(ref_1_dea);               h2 = Sh_J_Run(ref_2_dea);

 % Plotting the Histograms and Kernel Est. Densities for both Groups
% *********************************************************************
%     [t1, f1] = est_dens_eff_plot(eff1,h1,ori);
%     [t2, f2] = est_dens_eff_plot(eff2,h2,ori);
%         figure
%             plot(t1,f1,'-', t2,f2,'-.')
%             %   axis([1  3  0  max(max(fhat1_dea), max(fhat2_dea))*1.1]);
  
%****************************************************************************************

Kxx = Nkernel_Li(ref_1_dea,ref_1_dea,h1);   Kyy = Nkernel_Li(ref_2_dea,ref_2_dea,h2);

% Note correction for sample size due to reflection
fhat1_dea = sum(Kxx,2)/(n1*h1/2);             fhat2_dea = sum(Kyy,2)/(n2*h2/2);

        figure
             subplot(2,2,1), plot(ref_1, fhat1, '-' , ref_2, fhat2, '-.');            
                 title('i. Est. from true efficiencies for A and Z')
                    axis([1.00  2.5  0  3])
             subplot(2,2,2), plot(ref_1_dea, fhat1_dea, ref_2_dea, fhat2_dea);
                 title('ii. Est. from DEA-est.efficiencies for A and Z')
                    axis([1.00  2.5  0  max(max(fhat1_dea), max(fhat2_dea))*1.1])
             subplot(2,2,3), plot(ref_1, fhat1, '-' , ref_1_dea, fhat1_dea, '-.');    
                 title('iv. Sub-group A: True vs DEA-est. efficiencies')
                    axis([1.00  2  0  4.5])
             subplot(2,2,4), plot(ref_2, fhat2, '-' , ref_2_dea, fhat2_dea, '-.');            
                 title('iv. Sub-group Z: True vs DEA-est. efficiencies')
                    axis([1.00  2.5  0  max(max(fhat1_dea), max(fhat2_dea))*1.1])
                    
                    
% ********************************************************************                    
    eff=alleff;     %  NOTE all eff will be sent for bootstrap, including ones!
% ********************************************************************                    

t0=clock;

% *********************************************************************
                        % BOOTSTRAP loop            
% *********************************************************************
                        
% effkb is a matrix where the results will be stored
% it will be a matrix  for 2 SUB-Groups and Entire Efficiency

effkb=DEA_SubSampl_Ag_2_Smpl(B,X,Y,h1,h2,eff,rts,ori,prdl, group1, group2, kapa1, kapa2);
    fprintf(' Elapsed time for the Bootstrap loop with B = %6.0f \n',B)
        CPU=etime(clock,t0);
            disp(CPU)

Results=[AgEff; EstMeans; CompAgEf; mean(eff1)/mean(eff2) ];            
True=[TrueAgEff; TruMeans; TruCompAgEf; mean(vA)/mean(vB) ];    % known only in MC

    meanefb=mean(effkb')';
    sigefb=std(effkb')';
        effbiascorr=2*Results-meanefb;
% Note: True Agg Eff are known only in MC; delete for emperical applications
            disp('Results True biacor_results  est_bias  est_std')
            disp([Results True effbiascorr  Results-effbiascorr sigefb])

% ******************************************************************
% BasicBootstrap method for Confidence Intervals (JAS paper)
%
    diffkb=effkb-kron(ones(1,B), Results);  
        limstar=(prctile(diffkb',[100*alpha/2  100*(1-alpha/2)]))';
            BBlow=Results-limstar(:,2);
            BBup=Results-limstar(:,1);

% ******************************************************************
% percentile method on bias corrected values (Simar and Wilson, 1998, MS paper)
%
%
fprintf(' Confidence Intervals at the level : %6.3f \n',1-alpha)
disp(' ===================================')
disp(' BB=Basic Bootstrap, (Simar-Wilson,2000, JAS)')
disp(' ------------------------------------------------------------------')

% Note: True Agg Eff are known only in MC; delete for emperical applications
disp('Results  BBlow True BBup ')
disp([Results BBlow  True BBup ])

diffkbAgEff=diffkb(1:3,:);
figure
    boxplot((kron(ones(1,B),AgEff)-diffkbAgEff)')
        title('Agg. Eff. of Group A, B and Together' )

diffkbMeans=diffkb(4:6,:);
figure
    boxplot((kron(ones(1,B),AgEff)-diffkbMeans)')
        title('Means Eff. of Group A, B and Together' )


%  Estimatioion of Density of Agg eff        
        s1=std(effkb(1,:)) ;       r1=iqr(effkb(1,:)) ;
        s2=std(effkb(2,:)) ;       r2=iqr(effkb(2,:)) ;
        s3=std(effkb(3,:)) ;       r3=iqr(effkb(3,:)) ;
 
 % These densities look bell shaped so Silverman_h must work well
 % other methods are possible (e.g., use Shether and Jones method used above) 
 % but would be more computer-intensive for large B
 %  NOTE reflection may be needed here too (if close to the boundary)! 
 
        h1ag = 1.06*min([s1 r1/1.349]*B^(-1/5)) ;
        h2ag = 1.06*min([s2 r2/1.349]*B^(-1/5)) ;
        h3ag = 1.06*min([s3 r3/1.349]*B^(-1/5)) ;
        
        K1 = Nkernel_Li(sort(effkb(1,:)'),sort(effkb(1,:)'),h1ag);      fhat1 = sum(K1,2)/(B*h1ag);
        K2 = Nkernel_Li(sort(effkb(2,:)'),sort(effkb(2,:)'),h2ag);      fhat2 = sum(K2,2)/(B*h2ag);
        K3 = Nkernel_Li(sort(effkb(3,:)'),sort(effkb(3,:)'),h3ag);      fhat3 = sum(K3,2)/(B*h3ag);

figure
   plot(sort(effkb(1,:)'), fhat1, '-', sort(effkb(2,:)'), fhat2, '--', sort(effkb(3,:)'), fhat3, '-.' )
      title('Plot of densities of aggregate efficiencies, estimated from their bootstrap values.')

toc

        save Ex1_BS_Agg_ppr effkb X Y kapa1 kapa2
 
                