%Michal Paluch, Alois Kneip, and Werner Hildenbrand,
%"Individual vs. Aggregate Income Elasticities for Heterogeneous Populations", Journal of Applied Econometrics, forthcoming

%This MATLAB code carries out the entire empirical analysis presented in the paper.
%Input arguments:
%	cat - commodity group (6 = food,...,11 = total)
%	year - estimation for a given year from 74 to 93
%	bsflag - indicator, whether bootstrap should be carried out (1 = yes, 0 = no). Default is zero.
%	skipflag - indicator, whether the estimation on the true sample should be skipped (i.e., only bootstrap will run). (1 = yes, 0 = no). Default is zero
%	B - number of bootstrap replications. Default is 500.


function z = phk_final(cat,year,bsflag,skipflag,B)
if nargin <3;    bsflag = 0;    skipflag = 0;    B = 500;end;
if nargin<4;    skipflag = 0;    B = 500;end;
if nargin<5;    B = 500;end;
tol = 1e-10;

%open data file
infilem = sprintf('E:/phk/data/basicsamples/basicsample_%d.txt',year);
data = dlmread(infilem,' ');

A = regrmat_N(data); %generation of A - matrix with attributes for the regression 
% A contains [se-dummy,ue-dummy,ret-dummy,n.adults,n.children,n.working,age]
d = [log(data(:,cat)),A,data(:,12),log(data(:,11))];A=[]; %d is a Nx10 matrix with 
%[log expenditure for the commodity,se-dummy,ue-dummy,ret-dummy,n.adults,n.children,n.working,age,log income,log total expenditure]

%open file with optimal bandwidths
bwfile  = sprintf('E:/phk/results/all/bwall_%d_%d.txt',cat,year);
bw = dlmread(bwfile); %9x3 matrix of bw for [mLR,MLR,HM] [0,0,hy] in the first row, whereas
h = bw(2:9,1); %mLR optimal bandwidth vector for the (mean) regression of log(c) on y and a
H = bw(2:9,2); %MLR optimal bandwidth vector for the (mean) regression of c on y and a
hcdf = bw(2:9,3); %HM optimal bandwidth vector for the conditional cdf estimation of log(c) given y and a - values for y and a.
hy = bw(1,3); %HM optimal bandwidth vector for the conditional cdf estimation of log(c) given y and a - value for log(c)

%read data with optimal bandwidths for total expenditure - relevant for the computation of budget elasticities 
bwfiletot  = sprintf('E:/phk/results/all/bwall_11_%d.txt',year);
bwtot = dlmread(bwfiletot); %9x3 matrix of bw for [mLR,MLR,HM] [0,0,hexp] in the first row
ht = bwtot(2:9,1); Ht = bwtot(2:9,2); hcdft = bwtot(2:9,3);hyt = bwtot(1,3);

[n,m] = size(d);
BB = n; %default size of the bootstrap sample = n

%initiate vectors / matrices
W = zeros(n,1);Kcdf = zeros(n,1);Km = zeros(n,1);L = zeros(n,1);uch = zeros(n,2);udh_o = zeros(n,3);udh_u = zeros(n,3);Fhat = zeros(n,1); beta = zeros(n,3); betatot = zeros(n,3); betamean = zeros(n,3); betaagg = zeros(n,3); Wt = zeros(n,1);Kcdft = zeros(n,1);Kmt = zeros(n,1);Lt = zeros(n,1);Fhatt = zeros(n,1);Wcdf = zeros(n,1);Wcdft = zeros(n,1);Ncdf = zeros(n,1);Ncdft = zeros(n,1);
tic,

%the cummulative Epanechnikov kernel
uh = (-sqrt(5):0.05:sqrt(5))';kh = 0.335*(1 - (uh.^2)/5);Kh = cumsum(kh,1)./sum(kh,1);
y = d(:,1);ytot = d(:,10);yagg = exp(d(:,1)); %response variables log exp category, log total exp, cat exp

if skipflag ~= 1, %skip the estimation for the true sample needed for bs

zc = d(:,8:9);%continuous regressors
zd_u = d(:,2:4); %discrete unordered regressors
zd_o = d(:,5:7); %discrete ordered regressors

T = n;
%estimation of four elasticity types
%1) individual elasticites of expenditure for a commodity group wrt income (using Hoderlein and Mammen approach - quantile regression)
%2) individual elasticites of total expenditure (=budget) wrt income (using Hoderlein and Mammen approach - quantile regression)
%=> elasticities for a commodity group wrt budget (not income) is computed by 1) divided by 2).
%3) estimation of the average derivative of the regression of log(c) on y and a (for beta_mean) (Li and Racine approach - mean regression)
%4) estimation of the average derivative of the regression of c on y and a (for beta_agg) (Li and Racine approach - mean regression)

for i = 1:T;

%Estimation of elasticity types 1) and 2) is carried out in two steps. First, the conditional quantile position of log(c) given y and a is estimated. Second, the local linear quantile regression of log(c) on y and a is carried out. Estimations for 1) and 2) are carried out parallelly.

    %estimation of the cummulative kernel at y(i,1)
    uy = (ones(n,1)*y(i,1) - y)/hy; %for the commodity group
	gcdf = kint(uy,uh,Kh); 
	
    uyt = (ones(n,1)*ytot(i,1) - ytot)/hyt;	%for total expenditure
    gcdft = kint(uyt,uh,Kh); 
	
	%input for the kernel function: for each i, a vector of zc(i) - z
	zc = d(:,8:9);%continuous regressors
	zd_u = d(:,2:4); %discrete unordered regressors
	zd_o = d(:,5:7); %discrete ordered regressors
	
	%Estimation of the conditional quantile position and the cdf for the commodity group
	Kcdf(:,1) = k(uch(:,1),hcdf(1,1)).*k(uch(:,2),hcdf(2,1)); %kernel weights, continuous vars
    Lcdf(:,1) = l_u(udh_u(:,1),hcdf(3,1)).*l_u(udh_u(:,2),hcdf(4,1)).*l_u(udh_u(:,3),hcdf(5,1)).*l_o(udh_o(:,1),hcdf(6,1)).*l_o(udh_o(:,2),hcdf(7,1)).*l_o(udh_o(:,3),hcdf(8,1)); %kernel weights, categorical vars
    Wcdf(:,1) = Kcdf.*Lcdf;    Ncdf(:,1) = Wcdf(:,1).*gcdf;  in = Wcdf>tol;  %total weights
    Fhat(i,1) = sum(Ncdf(in,1),1)/sum(Wcdf(in,1),1); %estimates for the conditional quantiles
       
	%Estimation of the conditional quantile position and the cdf for total consumption (suffix "t" on variable names)
    Kcdft(:,1) = k(uch(:,1),hcdft(1,1)).*k(uch(:,2),hcdft(2,1)); %kernel weights, continuous vars
    Lcdft(:,1) = l_u(udh_u(:,1),hcdft(3,1)).*l_u(udh_u(:,2),hcdft(4,1)).*l_u(udh_u(:,3),hcdft(5,1)).*l_o(udh_o(:,1),hcdft(6,1)).*l_o(udh_o(:,2),hcdft(7,1)).*l_o(udh_o(:,3),hcdft(8,1));%kernel weights, categorical vars
    Wcdft(:,1) = Kcdft.*Lcdft;    Ncdft(:,1) = Wcdft(:,1).*gcdft;  in = Wcdft>tol;  %total weights
    Fhatt(i,1) = sum(Ncdft(in,1),1)/sum(Wcdft(in,1),1); %estimate for the conditional quantiles
  
	
	%Local linear quantile regression of log(c) on y and a.
	htau = bwtrans(Fhat(i,1)); %adjusting opt mean bw to opt quantile bw for continuous var (Yu and Jones) (see function below)
	Km(:,1) = k(uch(:,1),htau*h(1,1)).*k(uch(:,2),htau*h(2,1)); %kernel weights, continuous vars
    L(:,1) = l_u(udh_u(:,1),h(3,1)).*l_u(udh_u(:,2),h(4,1)).*l_u(udh_u(:,3),h(5,1)).*l_o(udh_o(:,1),h(6,1)).*l_o(udh_o(:,2),h(7,1)).*l_o(udh_o(:,3),h(8,1)); %kernel weights, categorical vars
    W(:,1) = Km.*L;    in = W>tol;  WW = W(in,1); %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xi_x = [ones(n,1),uch]; %Nx3 matrix of [1, (age - age_i),(y - y_i)] for local regressors
    xi_x = xi_x(in,:);    %only observations with positive weights
	xsize =size(xi_x,2);    
	xi_xw = xi_x.*(WW*ones(1,xsize)); yloc = y(in,1).*WW;
	%weighted local regression using rq.m
    beta(i,:) = rq(xi_xw, yloc, Fhat(i,1))'; %local elasticity for the commodity group
    
	htaut = bwtrans(Fhatt(i,1)); %adjusting opt mean bw to opt quantile bw for continuous var (Yu and Jones) (see function below)
	Kmt(:,1) = k(uch(:,1),htaut*ht(1,1)).*k(uch(:,2),htaut*ht(2,1)); %kernel weights, continuous vars
    Lt(:,1) = l_u(udh_u(:,1),ht(3,1)).*l_u(udh_u(:,2),ht(4,1)).*l_u(udh_u(:,3),ht(5,1)).*l_o(udh_o(:,1),ht(6,1)).*l_o(udh_o(:,2),ht(7,1)).*l_o(udh_o(:,3),ht(8,1)); %kernel weights, categorical vars
    Wt(:,1) = Kmt.*Lt;    in = Wt>tol;  WWt = Wt(in,1); %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xi_xx = [ones(n,1),uch]; %Nx3 matrix of [1, (age - age_i),(y - y_i)] for local regressors
    xi_x = xi_xx(in,:);    %only observations with positive weights
	xsize =size(xi_x,2);    
	xi_xw = xi_x.*(WWt*ones(1,xsize));yloctot = ytot(in,1).*WWt;
	%weighted local regression using rq.m
    betatot(i,:) = rq(xi_xw, yloctot, Fhatt(i,1))'; %local elasticity for total expenditure
	
%Now estimation of type 3) elasticity: Mean regression of logc on y and a
    Kmean(:,1) = k(uch(:,1),h(1,1)).*k(uch(:,2),h(2,1)); %kernel weights, continuous vars
    Wmean(:,1) = Kmean.*L;    in = Wmean>tol; %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xx = 0;  xy = 0;
    for j = seq(in,1)'; %adding up 2x2 matrices for positive weights only
        %for local linear regression
        xx = xx + Wmean(j,1)*[1,uch(j,:);uch(j,:)',uch(j,:)'*uch(j,:)];
        xy = xy + Wmean(j,1)*[1;uch(j,:)']*y(j,1);
    end;
    betamean(i,:) = (xx\xy)'; %local weighted ols to obtain the regression coefficients. The slope is the estimate for the derivative.
	
	
%Now estimation of type 4) elasticity: Mean regression of c on y and a
    Kagg(:,1) = k(uch(:,1),H(1,1)).*k(uch(:,2),H(2,1)); %kernel weights, continuous vars
    Lagg(:,1) = l_u(udh_u(:,1),H(3,1)).*l_u(udh_u(:,2),H(4,1)).*l_u(udh_u(:,3),H(5,1)).*l_o(udh_o(:,1),H(6,1)).*l_o(udh_o(:,2),H(7,1)).*l_o(udh_o(:,3),H(8,1)); %kernel weights, categorical vars
    Wagg(:,1) = Kagg.*Lagg;    in = Wagg>tol; %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xx = 0;  xy = 0;
    for j = seq(in,1)'; %adding up 2x2 for positive weights only
        %for local linear regression
        xx = xx + Wagg(j,1)*[1,uch(j,:);uch(j,:)',uch(j,:)'*uch(j,:)];
        xy = xy + Wagg(j,1)*[1;uch(j,:)']*yagg(j,1);
    end;
    betaagg(i,:) = (xx\xy)'; %local weighted ols to obtain the regression coefficients. The slope is the estimate for the derivative.
end;

z = [d(1:T,:),beta(1:T,3),betatot(1:T,3),betamean(1:T,3),betaagg(1:T,3)]; %All relevant results are collected and written into the following file
outfile = sprintf('E:/phk/results/phk_final_%d_%d.txt',cat,year);
dlmwrite(outfile,z,'\t');
toc,
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%% BOOTSTRAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if bsflag == 1,
	%Initiate the matrices
    W = zeros(BB,1);Kcdf = zeros(BB,1);Km = zeros(BB,1);L = zeros(BB,1);uch = zeros(BB,2);udh_o = zeros(BB,3);udh_u = zeros(BB,3);Fhat = zeros(BB,1); beta = zeros(BB,3); betatot = zeros(BB,3); betamean = zeros(BB,3); betaagg = zeros(BB,3); Wt = zeros(BB,1);Kcdft = zeros(BB,1);Kmt = zeros(BB,1);Lt = zeros(BB,1);Fhatt = zeros(BB,1);Wcdf = zeros(BB,1);Wcdft = zeros(BB,1);Ncdf = zeros(BB,1);Ncdft = zeros(BB,1);

for b=1:B;  
    tic,
sample = randint(BB,1,[1 n]);ds = d(sample,:); %draw the bootstrap sample (size BB = n) from the original file

%The estimation for the bootstrap sample follows the same steps as above.

y = ds(:,1); ytot = ds(:,10);
zc = ds(:,8:9);%continuous regressors
zd_u = ds(:,2:4); %discrete unordered regressors
zd_o = ds(:,5:7); %discrete ordered regressors
T = n;

for i = 1:T;
%estimation of the cummulative kernel at y(i,1)
    uy = (ones(n,1)*y(i,1) - y)/hy; %for the commodity group
	gcdf = kint(uy,uh,Kh); 
	
    uyt = (ones(n,1)*ytot(i,1) - ytot)/hyt;	%for total expenditure
    gcdft = kint(uyt,uh,Kh); 
	
	%input for the kernel function: for each i, a vector of zc(i) - z
	zc = d(:,8:9);%continuous regressors
	zd_u = d(:,2:4); %discrete unordered regressors
	zd_o = d(:,5:7); %discrete ordered regressors
	
	%Estimation of the conditional quantile position and the cdf for the commodity group
	Kcdf(:,1) = k(uch(:,1),hcdf(1,1)).*k(uch(:,2),hcdf(2,1)); %kernel weights, continuous vars
    Lcdf(:,1) = l_u(udh_u(:,1),hcdf(3,1)).*l_u(udh_u(:,2),hcdf(4,1)).*l_u(udh_u(:,3),hcdf(5,1)).*l_o(udh_o(:,1),hcdf(6,1)).*l_o(udh_o(:,2),hcdf(7,1)).*l_o(udh_o(:,3),hcdf(8,1)); %kernel weights, categorical vars
    Wcdf(:,1) = Kcdf.*Lcdf;    Ncdf(:,1) = Wcdf(:,1).*gcdf;  in = Wcdf>tol;  %total weights
    Fhat(i,1) = sum(Ncdf(in,1),1)/sum(Wcdf(in,1),1); %estimates for the conditional quantiles
       
	%Estimation of the conditional quantile position and the cdf for total consumption (suffix "t" on variable names)
    Kcdft(:,1) = k(uch(:,1),hcdft(1,1)).*k(uch(:,2),hcdft(2,1)); %kernel weights, continuous vars
    Lcdft(:,1) = l_u(udh_u(:,1),hcdft(3,1)).*l_u(udh_u(:,2),hcdft(4,1)).*l_u(udh_u(:,3),hcdft(5,1)).*l_o(udh_o(:,1),hcdft(6,1)).*l_o(udh_o(:,2),hcdft(7,1)).*l_o(udh_o(:,3),hcdft(8,1));%kernel weights, categorical vars
    Wcdft(:,1) = Kcdft.*Lcdft;    Ncdft(:,1) = Wcdft(:,1).*gcdft;  in = Wcdft>tol;  %total weights
    Fhatt(i,1) = sum(Ncdft(in,1),1)/sum(Wcdft(in,1),1); %estimate for the conditional quantiles
  
	
	%Local linear quantile regression of log(c) on y and a.
	htau = bwtrans(Fhat(i,1)); %adjusting opt mean bw to opt quantile bw for continuous var (Yu and Jones) (see function below)
	Km(:,1) = k(uch(:,1),htau*h(1,1)).*k(uch(:,2),htau*h(2,1)); %kernel weights, continuous vars
    L(:,1) = l_u(udh_u(:,1),h(3,1)).*l_u(udh_u(:,2),h(4,1)).*l_u(udh_u(:,3),h(5,1)).*l_o(udh_o(:,1),h(6,1)).*l_o(udh_o(:,2),h(7,1)).*l_o(udh_o(:,3),h(8,1)); %kernel weights, categorical vars
    W(:,1) = Km.*L;    in = W>tol;  WW = W(in,1); %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xi_x = [ones(n,1),uch]; %Nx3 matrix of [1, (age - age_i),(y - y_i)] for local regressors
    xi_x = xi_x(in,:);    %only observations with positive weights
	xsize =size(xi_x,2);    
	xi_xw = xi_x.*(WW*ones(1,xsize)); yloc = y(in,1).*WW;
	%weighted local regression using rq.m
    beta(i,:) = rq(xi_xw, yloc, Fhat(i,1))'; %local elasticity for the commodity group
    
	htaut = bwtrans(Fhatt(i,1)); %adjusting opt mean bw to opt quantile bw for continuous var (Yu and Jones) (see function below)
	Kmt(:,1) = k(uch(:,1),htaut*ht(1,1)).*k(uch(:,2),htaut*ht(2,1)); %kernel weights, continuous vars
    Lt(:,1) = l_u(udh_u(:,1),ht(3,1)).*l_u(udh_u(:,2),ht(4,1)).*l_u(udh_u(:,3),ht(5,1)).*l_o(udh_o(:,1),ht(6,1)).*l_o(udh_o(:,2),ht(7,1)).*l_o(udh_o(:,3),ht(8,1)); %kernel weights, categorical vars
    Wt(:,1) = Kmt.*Lt;    in = Wt>tol;  WWt = Wt(in,1); %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xi_xx = [ones(n,1),uch]; %Nx3 matrix of [1, (age - age_i),(y - y_i)] for local regressors
    xi_x = xi_xx(in,:);    %only observations with positive weights
	xsize =size(xi_x,2);    
	xi_xw = xi_x.*(WWt*ones(1,xsize));yloctot = ytot(in,1).*WWt;
	%weighted local regression using rq.m
    betatot(i,:) = rq(xi_xw, yloctot, Fhatt(i,1))'; %local elasticity for total expenditure
	
%Now estimation of type 3) elasticity: Mean regression of logc on y and a
    Kmean(:,1) = k(uch(:,1),h(1,1)).*k(uch(:,2),h(2,1)); %kernel weights, continuous vars
    Wmean(:,1) = Kmean.*L;    in = Wmean>tol; %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xx = 0;  xy = 0;
    for j = seq(in,1)'; %adding up 2x2 matrices for positive weights only
        %for local linear regression
        xx = xx + Wmean(j,1)*[1,uch(j,:);uch(j,:)',uch(j,:)'*uch(j,:)];
        xy = xy + Wmean(j,1)*[1;uch(j,:)']*y(j,1);
    end;
    betamean(i,:) = (xx\xy)'; %local weighted ols to obtain the regression coefficients. The slope is the estimate for the derivative.
	
	
%Now estimation of type 4) elasticity: Mean regression of c on y and a
    Kagg(:,1) = k(uch(:,1),H(1,1)).*k(uch(:,2),H(2,1)); %kernel weights, continuous vars
    Lagg(:,1) = l_u(udh_u(:,1),H(3,1)).*l_u(udh_u(:,2),H(4,1)).*l_u(udh_u(:,3),H(5,1)).*l_o(udh_o(:,1),H(6,1)).*l_o(udh_o(:,2),H(7,1)).*l_o(udh_o(:,3),H(8,1)); %kernel weights, categorical vars
    Wagg(:,1) = Kagg.*Lagg;    in = Wagg>tol; %total weights. In order to increase the computation, all observations with zero (tol = 1e-10) weights are eliminated
    xx = 0;  xy = 0;
    for j = seq(in,1)'; %adding up 2x2 for positive weights only
        %for local linear regression
        xx = xx + Wagg(j,1)*[1,uch(j,:);uch(j,:)',uch(j,:)'*uch(j,:)];
        xy = xy + Wagg(j,1)*[1;uch(j,:)']*yagg(j,1);
    end;
    betaagg(i,:) = (xx\xy)'; %local weighted ols to obtain the regression coefficients. The slope is the estimate for the derivative.

end;

zbs1 = [ds(1:T,:),beta(1:T,3),betatot(1:T,3),betamean(1:T,3),betaagg(1:T,3)]; %All relevant results are collected and written into the following file
outfile = sprintf('E:/phk/results/phk_finalbs_%d_%d_%d.txt',b,cat,year);
dlmwrite(outfile,zbs1,'\t');

MLR = mean(zbs1(,14))/mean(exp(zbs1(,1))); %bootstrap counterpart to betaagg
mLR = mean(zbs1(,13)); %bootstrap counterpart to betamean
mHM = mean(zbs1(,11)); %bootstrap counterpart to the mean local elasticity for the commodity group (Hoderlein and Mammen)
covmat = cov(exp(zbs1(,1)),zbs1(,11))/mean(exp(zbs1(,1))); %bootstrap counterpart to the covariance term
bias = covmat(2,1); %bootstrap counterpart to the covariance term

mHMb = mean(zbs1(,11)./zbs1(,12)) %bootstrap counterpart to the mean local budget elasticity (Hoderlein and Mammen)

zbs2 = [MLR,mLR,mHM,bias,mHMb]; %Bootstrap counterparts are collected and written into the following file
outfile = sprintf('E:/phk/results/phk_finalbs_summary_%d_%d.txt',cat,year);
dlmwrite(outfile,zbs2,'\t','-append');

toc
end %end BS repl

end %end if bsflag


function z = regrmat_N(d);
%creates dummies for groups wrt empstat and stacks the regression matrix
%X does not contain a constant == 1
n = size(d,1);
%dummies for the employment status (basis is "employee")
se = zeros(n,1);
se(d(:,1) == 2) = 1;
ue = zeros(n,1); %unemployed and unoccupied
ue(d(:,1) == 3 | d(:,1) == 4 | d(:,1) == 5 | d(:,1) == 7) = 1;
ret = zeros(n,1); %retired
ret(d(:,1) == 6) = 1;
n_adults = d(:,3) - d(:,4);
%d(:,5) = number of working persons inthe household
X = [se,ue,ret,n_adults,d(:,4),d(:,5),d(:,2)]; %3 unordered cat, 3 ordered cat and 1 continuous (+1 income)
z = X;

function z = bwtrans(tau), %adaptation of the optimal bandwidth for quantile regression starting with the optimal bandwidth for the mean regression
a = tau.*(1-tau)./(normpdf(norminv(tau)).^2);
z = a.^(1/6);

function z = kint(x,uh,Kh);%continuous kernel (Epanechnikov) version to integrate
z = interp1(uh,Kh,x,'linear','extrap');

function z = k(uch,h); %continuous kernel (Epanechnikov)
u = (uch)/h;
in = abs(u) <= sqrt(5);
z = 0.335*(1 - (u.^2)/5).*in;

function z = l_u(udh,lambda); %unordered kernel
in = udh == 0;
z = 0.5*ones(size(udh,1),1)*lambda;
z(in,:) = 1 - lambda;

function z = l_o(udh,lambda); %ordered kernel
d = abs(udh);
in = d == 0;
z = ((1 -lambda)/2)*(lambda.^d);
z(in,:) = 1 - lambda;
