%  Brent R. Hickman and Timothy P. Hubbard
% hickmanbr@uchicago.edu and timothy.hubbard@colby.edu

% please cite our Journal of Applied Econometrics paper "Replacing Sample
% Trimming with Boundary Correction in Nonparametric Estimation of
% First-Price Auctions" if this code is helpful to you.

% Replication of "Asymmetry in First-Price Auctions" by Campo, Perrigne,
% and Vuong (JAE 2003)


function [keepjj,keepss,evalgridjj,fjj,evalgridss,fss, ...
    keepjmix,keepsmix,evalgridjmix,fjmix,evalgridsmix, ...
    fsmix] = replicateCPV2012fcn(trimCPVway);

lwidth = 2;
fsize = 14;
set(0,'defaulttextinterpreter','latex','Defaulttextfontsize',fsize);

plotind = 0

load Ocs702.dat

kernel = 'triweight';

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% STAGE 1: create necessary matrices for analysis and assign auction ids
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

NT = length(Ocs702);
T = NT/2;

% create a variable for the auction id
for i=1:NT
    if mod(i,2) == 1
        Ocs702(i,3) = ceil(i/2);
    else
        Ocs702(i,3) = ceil((i - 1)/2);
    end
end

% matrix of all data:
% col 1: bid
% col 2: dummy (1 if joint, 0 if solo)
% col 3: auction id number
% col 4: log transformation of bids
% col 5: max of rival's log-transformed bid
% col 6: pseudo values (constructed below)
% col 7: winning bid indicator
alldata = Ocs702;
alldata(:,4) = log(1 + alldata(:,1));

% obtain max of rivals' bids at each auction---just opponent's bid given
% only one other bidder at auction
append = [];
for i=1:T
    ind = alldata((alldata(:,3) == i),:);
    if size(ind,1) ~= 2
        fprintf('Error in auction ID assignemnt\n')
    end
    append = [append; ind(2,4); ind(1,4)];
end
alldata(:,5) = append;

% create matrices of all solo and all joint bids
jointdata = Ocs702(Ocs702(:,2) == 1,:);
solodata = Ocs702(Ocs702(:,2) == 0,:);

% matrices of auctions based on types---joint vs joint (jj), solo vs solo
% (ss), joint vs solo (js), solo vs joint (sj)
jj = [];
ss = [];
mix = [];
for i=1:T
    ind = alldata((alldata(:,3) == i),:);
    if size(ind,1) ~= 2
        fprintf('Error in auction ID assignemnt\n')
    end
    if ind(1,2) == 1 && ind(2,2) == 1
        jj = [jj; ind];
    elseif ind(1,2) == 0 && ind(2,2) == 0
        ss = [ss; ind];
    elseif (ind(1,2) == 1 && ind(2,2) == 0) || ...
            (ind(1,2) == 0 && ind(2,2) == 1)
        mix = [mix; ind];
    end
end
jmix = mix((mix(:,2) == 1),:);
smix = mix((mix(:,2) == 0),:);


% replicate table 1 in CPV
fprintf('Variable\t#Obs\tMean\tSTD\tMin\tMax\t\tWithin STD\n')
fprintf('All\t\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    length(alldata),mean(alldata(:,1)),std(alldata(:,1)), ...
    min(alldata(:,1)),max(alldata(:,1)),0)
fprintf('Joint\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    length(jointdata),mean(jointdata(:,1)),std(jointdata(:,1)), ...
    min(jointdata(:,1)),max(jointdata(:,1)),0)
fprintf('Solo\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    length(solodata),mean(solodata(:,1)),std(solodata(:,1)), ...
    min(solodata(:,1)),max(solodata(:,1)),0)
fprintf('J vs J\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    length(jj),mean(jj(:,1)),std(jj(:,1)), ...
    min(jj(:,1)),max(jj(:,1)),0)
fprintf('J vs S\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t\t%.2f\n', ...
    length(jmix),mean(jmix(:,1)),std(jmix(:,1)), ...
    min(jmix(:,1)),max(jmix(:,1)),0)
fprintf('S vs J\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    length(smix),mean(smix(:,1)),std(smix(:,1)), ...
    min(smix(:,1)),max(smix(:,1)),0)
fprintf('S vs S\t\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t\t%.2f\n', ...
    length(ss),mean(ss(:,1)),std(ss(:,1)), ...
    min(ss(:,1)),max(ss(:,1)),0)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% STAGE 2: step 1 of nonparametric estimation---recover pseudo valuations
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% estimate cdfs nonparametrically
[Ghatjj,hGjj] = kscdf2dCPV(jj(:,5),jj(:,4),jj(:,4),jj(:,4),kernel);
[Ghatss,hGss] = kscdf2dCPV(ss(:,5),ss(:,4),ss(:,4),ss(:,4),kernel);
[Ghatjmix,hGjmix] = kscdf2dCPV(jmix(:,5),jmix(:,4),jmix(:,4),jmix(:,4), ...
    kernel);
[Ghatsmix,hGsmix] = kscdf2dCPV(smix(:,5),smix(:,4),smix(:,4),smix(:,4), ...
    kernel);

% estimate pdfs nonparametrically
[ghatjj,hgjj] = kspdf2dCPV(jj(:,5),jj(:,4),jj(:,4),jj(:,4),kernel);
[ghatss,hgss] = kspdf2dCPV(ss(:,5),ss(:,4),ss(:,4),ss(:,4),kernel);
[ghatjmix,hgjmix] = kspdf2dCPV(jmix(:,5),jmix(:,4),jmix(:,4),jmix(:,4), ...
    kernel);
[ghatsmix,hgsmix] = kspdf2dCPV(smix(:,5),smix(:,4),smix(:,4),smix(:,4), ...
    kernel);

% construct pseudo values
d = jj(:,4)';
v = exp(d).*(1 + Ghatjj./ghatjj) - 1;
jj(:,6) = v';

d = ss(:,4)';
v = exp(d).*(1 + Ghatss./ghatss) - 1;
ss(:,6) = v';

d = jmix(:,4)';
v = exp(d).*(1 + Ghatjmix./ghatjmix) - 1;
jmix(:,6) = v';

d = smix(:,4)';
v = exp(d).*(1 + Ghatsmix./ghatsmix) - 1;
smix(:,6) = v';


% trimming J vs J case
if trimCPVway == 1
    low = 0;
    high = max(jj(:,4));
else
    low = min(jj(:,4));
    high = max(jj(:,4));
end
keepjj = trimsymmetricCPV(jj,low,high,hGjj,hgjj);
ntrimjj = (size(jj,1) - size(keepjj,1))/2;
% trimming S vs S case
if trimCPVway == 1
    low = 0;
    high = max(ss(:,4));
else
    low = min(ss(:,4));
    high = max(ss(:,4));
end
keepss = trimsymmetricCPV(ss,low,high,hGss,hgss);
ntrimss = (size(ss,1) - size(keepss,1))/2;
% trimming in asymmetric case
if trimCPVway == 1
    low = 0;
    high = max(max(smix(:,4)),max(jmix(:,4)));
else
    low = min(min(smix(:,4)),min(jmix(:,4)));
    high = max(max(smix(:,4)),max(jmix(:,4)));
end
[keepjmix,trimjmix] = trimasymmetricCPV(jmix,low,high,hGjmix,hgjmix);
[keepsmix,trimsmix] = trimasymmetricCPV(smix,low,high,hGsmix,hgsmix);
% need to throw out entire auction in trimming, so see which got thrown out
% in jmix and smix and make sure that happens in other case
idcol = 3;
ids = trimjmix(:,idcol);
for i=1:length(ids)
    aucid = ids(i);
    ind = keepsmix((keepsmix(:,idcol) == aucid),:);
    if isempty(ind) == 0
        keepsmix((keepsmix(:,idcol) == aucid),:) = [];
    end
end
ids = trimsmix(:,idcol);
for i=1:length(ids)
    aucid = ids(i);
    ind = keepjmix((keepjmix(:,idcol) == aucid),:);
    if isempty(ind) == 0
        keepjmix((keepjmix(:,idcol) == aucid),:) = [];
    end
end
ntrimjmix = (size(jmix,1) - size(keepjmix,1));
ntrimsmix = (size(smix,1) - size(keepsmix,1));

% print some results to screen
fprintf('\n\nFirst-stage bandwidths (see 195 and 198 of CPV to compare):\n')
fprintf('Case\th_G\t\th_g\t\t#trim\n')
fprintf('J vs J\t%.2f\t%.2f\t%d\n',hGjj,hgjj,ntrimjj)
fprintf('J vs S\t%.2f\t%.2f\t%d\n',hGjmix,hgjmix,ntrimjmix)
fprintf('S vs J\t%.2f\t%.2f\t%d\n',hGsmix,hgsmix,ntrimsmix)
fprintf('S vs S\t%.2f\t%.2f\t%d\n',hGss,hgss,ntrimss)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% STAGE 3: step 2 of nonparametric estimation; estimate marginal
% distribution of valuations
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

vcol = 6;
usehardle = 1;
ngrid = 500;
evalgridjj = linspace(min(keepjj(:,vcol)),max(keepjj(:,vcol)),ngrid);
Ltjj = size(keepjj,1)/2;
Ltjj = size(jj,1)/2;
[fjj,hfjj] = kspdf_no_bc_CPV(keepjj(:,vcol),kernel,evalgridjj,Ltjj*2,usehardle);
evalgridss = linspace(min(keepss(:,vcol)),max(keepss(:,vcol)),ngrid);
Ltss = size(keepss,1)/2;
Ltss = size(ss,1)/2;
[fss,hfss] = kspdf_no_bc_CPV(keepss(:,vcol),kernel,evalgridss,Ltss*2,usehardle);
evalgridjmix = linspace(min(keepjmix(:,vcol)),max(keepjmix(:,vcol)),ngrid);
Ltjmix = size(keepjmix,1);
Ltjmix = size(jmix,1);
[fjmix,hfjmix] = kspdf_no_bc_CPV(keepjmix(:,vcol),kernel,evalgridjmix,Ltjmix,usehardle);
evalgridsmix = linspace(min(keepsmix(:,vcol)),max(keepsmix(:,vcol)),ngrid);
Ltsmix = size(keepsmix,1);
Ltsmix = size(smix,1);
[fsmix,hfsmix] = kspdf_no_bc_CPV(keepsmix(:,vcol),kernel,evalgridsmix,Ltsmix,usehardle);

% print some results to screen
fprintf('\n\nSecond-stage bandwidths (see 196 and 198 of CPV to compare):\n')
fprintf('Case\th_f\t\tL_t\n')
fprintf('J vs J\t%.2f\t%d\n',hfjj,Ltjj)
fprintf('J vs S\t%.2f\t%d\n',hfjmix,Ltjmix)
fprintf('S vs J\t%.2f\t%d\n',hfsmix,Ltsmix)
fprintf('S vs S\t%.2f\t%d\n',hfss,Ltss)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% STAGE 4: replicate some figures and final table
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if plotind == 1
    % figure 1 of CPV on page 196
    figure
    set(gcf,'DefaultLineLineWidth',lwidth)
    set(gca,'FontSize',fsize)
    scatter(keepjj(:,1),keepjj(:,6),'.b')
    hold all
    scatter(keepss(:,1),keepss(:,6),'.r')
    title('Figure 1 of CPV on page 196: Inverse bidding strategies')
    legend('J vs J','S vs S','Location','Northwest')
    axis([0 3200 0 15000])
    
    % figure 2 of CPV on page 197
    figure
    set(gcf,'DefaultLineLineWidth',lwidth)
    set(gca,'FontSize',fsize)
    plot(evalgridjj,fjj,':b',evalgridss,fss,'-r')
    title('Figure 2 of CPV on page 197: Marginal densities of private values')
    legend('J vs J','S vs S','Location','Northeast')
    axis([0 15000 0 0.0005])
    
    % figure 3 of CPV on page 198
    figure
    set(gcf,'DefaultLineLineWidth',lwidth)
    set(gca,'FontSize',fsize)
    scatter(keepjmix(:,1),keepjmix(:,6),'.b')
    hold all
    scatter(keepsmix(:,1),keepsmix(:,6),'.r')
    title('Figure 3 of CPV on page 198: Inverse bidding strategies')
    legend('J vs S','S vs J','Location','Northwest')
    axis([0 1600 0 10000])
    
    % figure 4 of CPV on page 199
    figure
    set(gcf,'DefaultLineLineWidth',lwidth)
    set(gca,'FontSize',fsize)
    plot(evalgridjmix,fjmix,':b',evalgridsmix,fsmix,'-r')
    title('Figure 4 of CPV on page 199: Marginal densities of private values')
    legend('J vs S','S vs J','Location','Northeast')
    axis([0 9000 0 0.00045])
    
end

Ltjj = size(keepjj,1)/2;
Ltss = size(keepss,1)/2;
Ltjmix = size(keepjmix,1);
Ltsmix = size(keepsmix,1);

% determine winning bidder, compute rents and summarize in table
uniqueids = unique(keepjj(:,idcol));
finaljj = [];
for i=1:Ltjj
    aucid = uniqueids(i);
    ind = keepjj((keepjj(:,idcol) == aucid),:);
    if size(ind,1) ~= 2
        fprintf('Error not two bids at symmetric auction\n')
    end
    [C,I] = max(ind(:,1));
    ind(I,7) = 1;
    [C,I] = min(ind(:,1));
    ind(I,7) = 0;
    finaljj = [finaljj; ind];
end
uniqueids = unique(keepss(:,idcol));
finalss = [];
for i=1:Ltss
    aucid = uniqueids(i);
    ind = keepss((keepss(:,idcol) == aucid),:);
    if size(ind,1) ~= 2
        fprintf('Error not two bids at symmetric auction\n')
    end
    [C,I] = max(ind(:,1));
    ind(I,7) = 1;
    [C,I] = min(ind(:,1));
    ind(I,7) = 0;
    finalss = [finalss; ind];
end
uniqueids = unique(keepjmix(:,idcol));
finaljmix = [];
finalsmix = [];
winjmix = [];
winsmix = [];
for i=1:Ltjmix
    aucid = uniqueids(i);
    indjmix = keepjmix((keepjmix(:,idcol) == aucid),:);
    if size(indjmix,1) ~= 1
        fprintf('Error not oen bid at asymmetric auction\n')
    end
    indsmix = keepsmix((keepsmix(:,idcol) == aucid),:);
    if size(indjmix,1) ~= 1
        fprintf('Error not oen bid at asymmetric auction\n')
    end
    if indjmix(1,1) > indsmix(1,1)
        indjmix(1,7) = 1;
        indsmix(1,7) = 0;
        winjmix = [winjmix; indjmix];
    elseif indjmix(1,1) < indsmix(1,1)
        indjmix(1,7) = 0;
        indsmix(1,7) = 1;
        winsmix = [winsmix; indsmix];
    end
    finaljmix = [finaljmix; indjmix];
    finalsmix = [finalsmix; indsmix];
end

% calculate summary statistics
winjj = finaljj((finaljj(:,7) == 1),:);
winss = finalss((finalss(:,7) == 1),:);
winjj(:,8) = (winjj(:,6) - winjj(:,1))./winjj(:,6)*100;
winss(:,8) = (winss(:,6) - winss(:,1))./winss(:,6)*100;
winjmix(:,8) = (winjmix(:,6) - winjmix(:,1))./winjmix(:,6)*100;
winsmix(:,8) = (winsmix(:,6) - winsmix(:,1))./winsmix(:,6)*100;
allwins = [winjj; winss; winjmix; winsmix];
alljointwins = [winjj; winjmix];
allsolowins = [winss; winsmix];


% replicate table 2 in CPV
fprintf('\n\nVariable\t#Obs\tMean\tSTD\t\tMin\t\tMax\n')
fprintf('All\t\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(allwins,1),mean(allwins(:,8)),std(allwins(:,8)), ...
    min(allwins(:,8)),max(allwins(:,8)))
fprintf('Joint\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(alljointwins,1),mean(alljointwins(:,8)),std(alljointwins(:,8)), ...
    min(alljointwins(:,8)),max(alljointwins(:,8)))
fprintf('Solo\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(allsolowins,1),mean(allsolowins(:,8)),std(allsolowins(:,8)), ...
    min(allsolowins(:,8)),max(allsolowins(:,8)))
fprintf('J vs J\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(winjj,1),mean(winjj(:,8)),std(winjj(:,8)), ...
    min(winjj(:,8)),max(winjj(:,8)))
fprintf('J vs S\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(winjmix,1),mean(winjmix(:,8)),std(winjmix(:,8)), ...
    min(winjmix(:,8)),max(winjmix(:,8)))
fprintf('S vs J\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(winsmix,1),mean(winsmix(:,8)),std(winsmix(:,8)), ...
    min(winsmix(:,8)),max(winsmix(:,8)))
fprintf('S vs S\t\t%d\t\t%.2f\t%.2f\t%.2f\t%.2f\n', ...
    size(winss,1),mean(winss(:,8)),std(winss(:,8)), ...
    min(winss(:,8)),max(winss(:,8)))

% auction ids that survived trimming
survivetrimids = [keepjj(:,idcol); keepss(:,idcol); ...
    keepjmix(:,idcol); keepsmix(:,idcol)];
survivetrimids = unique(survivetrimids);
allid = unique(alldata(:,idcol));
trimid =  setdiff(allid,survivetrimids)
length(trimid)


% do a scatter plot with different indicators for which points would be
% trimmed
saverun = 0;
trim_comparison_scatter;
