
%
% This program uses data from single_explore.do to
% estimate the SUR used as the auxilary model in the
% indirect inference paper.  
% USES PLANT LEVEL DATA
%
% Produces the indirect inference moments 
%

clear variables

diary plt_sur_xtra.txt, disp('diary on');

% Need to set path for Lesage's routines, etc.
 p = genpath('e:\copeland\Matlab_routines');
 addpath(p)

% Loading in data, creating variables
% 1.pin_sal_US 2.mrkt_price 3.prod 4.lag_levelprc 5.lag_levelsal 6.lag_levelinv
% 7.lag_levelinvsq 8.myindx 9.myindx_sq 10.prod_flg 11.inv_infer 12.cmpy 13.seg
% 14.myear 15.plt 16.mdl 17.indx 18.p_flg 19.days_supply 20.day_flg 21.I_flg

  % This is plant level data (as opposed to model level data)
  % Note that certain variables (e.g. model) are meaningless
  load data_moments_plt.csv
  data_moments=data_moments_plt;
  clear data_moments_plt
  
  sales  = data_moments(:,1); 
  price = data_moments(:,2); 
  prod = data_moments(:,3);
  lag_prc = data_moments(:,4); 
  lag_sal = data_moments(:,5); 
  lag_inv = data_moments(:,6); 
  lag_invsq = data_moments(:,7); 
  myindx = data_moments(:,8);
  myindx_sq = data_moments(:,9); 
  prod_flg = data_moments(:,10); 
  inv_infer = data_moments(:,11); 
  company = data_moments(:,12); 
  mrktseg = data_moments(:,13);
  myear = data_moments(:,14); 
  plant = data_moments(:,15); 
  model = data_moments(:,16); 
  month_indx = data_moments(:,17); 
  p_flg = data_moments(:,18);
  days_supply = data_moments(:,19); 
  day_flg = data_moments(:,20);
  I_flg = data_moments(:,21);

  clear data_moments
  disp('Data loaded');
  
% adding quadratic terms
  lag_prc2 = lag_prc.*lag_prc;
  lag_sal2 = lag_sal.*lag_sal;
  lag_inv2 = lag_inv.*lag_inv;
  
  
%
% Creating fixed effects for plants, counting obs per plant
%
  nmbr_plt = max(plant);
  wgt_plt = zeros(nmbr_plt,1);
  mat_plt = zeros(size(plant,1),nmbr_plt);
  for i=1:nmbr_plt
    indx = find(plant==i);
    mat_plt(indx,i)=1;
    wgt_plt(i) = size(indx,1);
  end


  % Weighting variables -- used later to agg. plant dummies into one number
  sum_wgt = sum(wgt_plt);
  wgt_horz = kron(ones(1,nmbr_plt),wgt_plt);
  wgt_vert = kron(wgt_plt',ones(nmbr_plt,1));    
  
%
% Estimating the SUR (Lesage's code)
%
% Need to run two SURS, as production equation has fewer
% observations than sales/prices/days_supply equation
%

  % SUR without production
  indxA = find(lag_prc>0 & lag_sal>0 & lag_inv>0 & sales>0 & price>0);
%  indxA = indxA(1:1500); -- done to check A0/B0 construction.
  nobsA = length(indxA);
  
  % Taking out the group means (group==plant)
    ddd = mat_plt(indxA,:);
    wgtsum = sum(ddd);   
    % Sales  
    y1 = sales(indxA); 
    mean_sal = ((y1'*ddd)./wgtsum)';
    demean_sal = y1-ddd*mean_sal;
    % Price
    y2 = price(indxA);   
    mean_prc = ((y2'*ddd)./wgtsum)';
    demean_prc = y2-ddd*mean_prc;
    % Independent variables
    x = [lag_prc(indxA) lag_sal(indxA) lag_inv(indxA) myindx(indxA)];
%%    x = [lag_prc(indxA) lag_sal(indxA) lag_inv(indxA) myindx(indxA) lag_prc2(indxA) lag_sal2(indxA) lag_inv2(indxA)];
    [l,w] = size(x);
    mean_x = ((x'*ddd)./kron(ones(w,1),wgtsum))';
    demean_x = x - ddd*mean_x;
    
  y_sysA(1).eq = demean_sal;
  y_sysA(2).eq = demean_prc;

  x_sysA(1).eq = demean_x;
  x_sysA(2).eq = demean_x;
  res_surA = sur(2,y_sysA,x_sysA,1);

  disp('SUR A R-squared');
  res_surA.rsqr
  
  % Storing sales and price vector
  y_sal = y1;
  y_prc = y2;
  y_indxA = myindx(indxA)-1;

%  Used to check that de-meaning was done properly  
  y_chck(1).eq = sales(indxA);
  y_chck(2).eq = price(indxA);

  x_chck(1).eq = [lag_prc(indxA) lag_sal(indxA) lag_inv(indxA) myindx(indxA) mat_plt(indxA,:)];
  x_chck(2).eq = x_chck(1).eq;

  res_CHCKA = sur(2,y_chck,x_chck,1);
%
%  [res_surA(1).beta(1:4) res_CHCKA(1).beta(1:4) res_surA(2).beta(1:4) res_CHCKA(2).beta(1:4)]
%  [res_surA(1).tstat(1:4) res_CHCKA(1).tstat(1:4) res_surA(2).tstat(1:4) res_CHCKA(2).tstat(1:4)]

     
  % SUR with production
  indxB = find(lag_prc>0 & lag_sal>0 & lag_inv>0 & sales>0 & price>0 & prod_flg>3 & p_flg<1);
  nobsB = length(indxB);

  % Taking out the group means (group==plant)
    ddd = mat_plt(indxB,:);
    wgtsum = sum(ddd);   
    % Sales  
    y1 = sales(indxB); 
    mean_sal = ((y1'*ddd)./wgtsum)';
    demean_sal = y1-ddd*mean_sal;
    % Price
    y2 = price(indxB);   
    mean_prc = ((y2'*ddd)./wgtsum)';
    demean_prc = y2-ddd*mean_prc;
    % Production
    y3 = prod(indxB);   
    mean_prd = ((y3'*ddd)./wgtsum)';
    demean_prd = y3-ddd*mean_prd;    
    % Independent variables
    x = [lag_prc(indxB) lag_sal(indxB) lag_inv(indxB) myindx(indxB)];
%%    x = [lag_prc(indxB) lag_sal(indxB) lag_inv(indxB) myindx(indxB) lag_prc2(indxB) lag_sal2(indxB) lag_inv2(indxB)];
    [l,w] = size(x);
    mean_x = ((x'*ddd)./kron(ones(w,1),wgtsum))';
    demean_x = x - ddd*mean_x;
    
  y_sysB(1).eq = demean_sal;
  y_sysB(2).eq = demean_prc;
  y_sysB(3).eq = demean_prd;

  x_sysB(1).eq = demean_x;
  x_sysB(2).eq = demean_x;
  x_sysB(3).eq = demean_x;

  res_surB = sur(3,y_sysB,x_sysB,1);

  disp('SUR B R-squared');
  res_surB.rsqr
  
  % Storing production vector
  y_prd = y3;
  y_indxB = myindx(indxB)-1;

  %Check on de-meaning
  y_chck(1).eq = sales(indxB);
  y_chck(2).eq = price(indxB);
  y_chck(3).eq = prod(indxB);
  %
  x_chck(1).eq = [lag_prc(indxB) lag_sal(indxB) lag_inv(indxB) myindx(indxB) mat_plt(indxB,:)];
  x_chck(2).eq = x_chck(1).eq;
  x_chck(3).eq = x_chck(1).eq;
  %
  res_CHCKB = sur(3,y_chck,x_chck,1);
  %
  [res_surB(1).beta(1:4) res_CHCKB(1).beta(1:4) res_surB(2).beta(1:4) res_CHCKB(2).beta(1:4) res_surB(3).beta(1:4) res_CHCKB(3).beta(1:4)]
  [res_surB(1).tstat(1:4) res_CHCKB(1).tstat(1:4) res_surB(2).tstat(1:4) res_CHCKB(2).tstat(1:4) res_surB(3).tstat(1:4) res_CHCKB(3).tstat(1:4)]

  disp('SURs estimated');

%
%  Estimating means of sales, price and production.
%

XX = [ones(size(y_sal)) y_indxA y_indxA.^2];
  res_sal = ols(y_sal, XX);
XX = [ones(size(y_prc)) y_indxA y_indxA.^2];
  res_prc = ols(y_prc, XX);
XX = [ones(size(y_prd)) y_indxB y_indxB.^2];
  res_prd = ols(y_prd, XX);

  disp('Mean sales, price and production');
  [res_sal.beta, res_prc.beta, res_prd.beta]


%
% Numerically computing A0 and B0 for SUR A (without production)
%

  % Hessian
  Y = [y_sysA(1).eq y_sysA(2).eq];
  X = [x_sysA(1).eq];
  PI = [res_surA(1).beta res_surA(2).beta];
  D = triu(res_surA(1).sigma);
  obsA = length(y_sysA(1).eq);
  
  hessL = hessianGH(Y,X,PI,D);
  A0A = (1/obsA)*hessL;
  
  disp('Computed Hessian for SUR A');

  % Saving data on coefs, etc. (not currently used)
    ncoefA = size(x_sysA(1).eq,2)*2
    r = find(D~=0);
    ncovA = length(r)

  %
  %  compute Newey-West standard errors
  %  nwse returns VCV
  %  we want B0, which is the inverse of the VCV
  %  we have to scale by T properly
  %  note: A0 + B0 = 0 if model is correctly specified
  %  Use 2 lags for Newey West
  [ B0A ] = nw_vcv(Y,X,PI,D,2);

  %  [V,S]=nwse([ res_ols1.resid res_ols2.resid],X,10)

  disp('Computed Newey Wesy for SUR A');

  %
  % As a check, compute OLS standard errors and report
  % alongside Hessian/Newey-Wesy standard errors
  % 
  res_ols1A = ols(y_sysA(1).eq, x_sysA(1).eq);
  res_ols2A = ols(y_sysA(2).eq, x_sysA(2).eq);
    
  hess_se = sqrt(diag(inv(-obsA*A0A)));
  ols_se = [res_ols1A.bstd; res_ols2A.bstd; ones(3,1)];
  nw_se   = sqrt(diag(inv(obsA*B0A)));

% Std errors from SUR command
%tmp = [res_surA(1).beta./res_surA(1).tstat; res_surA(2).beta./res_surA(2).tstat; ones(3,1)];

  disp('For SUR A without production');  
  disp(' Hessian; Newey-West;  OLS ');
  disp([hess_se nw_se ols_se]); 
  
%  res_cov = cov(res_ols1.resid,res_ols2.resid)
  
%
% Numerically computing A0 and B0 for SUR B (with production)
%

  % Hessian
  Y = [y_sysB(1).eq y_sysB(2).eq y_sysB(3).eq];
  X = [x_sysB(1).eq];
  PI = [res_surB(1).beta res_surB(2).beta res_surB(3).beta]; 
  D = triu(res_surB(1).sigma);
  obsB = length(y_sysB(1).eq);
  hessL = hessianGH(Y,X,PI,D);
  A0B = (1/obsB)*hessL;
  
  % Saving data on coefs, etc.
  ncoefB = size(x_sysB(1).eq,2)*3
  r = find(D~=0);
  ncovB = length(r)
    
  disp('Computed Hessian for SUR B');

  % Newey-West
  [ B0B ] = nw_vcv(Y,X,PI,D,2);
  
  disp('Computed Newey Wesy for SUR B');

  % OLS standard errors
  res_ols1 = ols(y_sysB(1).eq, x_sysB(1).eq);
  res_ols2 = ols(y_sysB(2).eq, x_sysB(2).eq);
  res_ols3 = ols(y_sysB(3).eq, x_sysB(3).eq);
    
  hess_se = sqrt(diag(inv(-obsB*A0B)));
  ols_se  = [res_ols1.bstd; res_ols2.bstd; res_ols3.bstd; ones(6,1)];
  nw_se   = sqrt(diag(inv(obsB*B0B)));

  disp('For SUR B with production');  
  disp('Hessian; Newey-West;  OLS ');
  disp([hess_se nw_se ols_se]);

%
% Numerically computing A0 and B0 for separate OLS regression on production
%

  Y = y_sysB(3).eq;  X = x_sysB(3).eq;
  PI = res_ols3.beta; D = res_ols3.sige;
  obsB = length(Y);
  hessL = hessianGH(Y,X,PI,D);
  A0q = (1/obsB)*hessL;
  [ B0q ] = nw_vcv(Y,X,PI,D,2);

%
% Numerically computing A0 and B0 for three separate OLS regressions
%

  % Hessians and Newey-WEst
  % sales
    Y = y_sal;  X = [ones(size(y_sal)) y_indxA y_indxA.^2];
    PI = res_sal.beta; D = res_sal.sige;
    obsA = length(y_sal);
    hessL = hessianGH(Y,X,PI,D);
    A0sal = (1/obsA)*hessL;
    [ B0sal ] = nw_vcv(Y,X,PI,D,2);
  % price
    Y = y_prc;  X = [ones(size(y_prc)) y_indxA y_indxA.^2];
    PI = res_prc.beta; D = res_prc.sige;
    obsA = length(y_prc);
    hessL = hessianGH(Y,X,PI,D);
    A0prc = (1/obsA)*hessL;
    [ B0prc ] = nw_vcv(Y,X,PI,D,2);
  % production
    Y = y_prd;  X = [ones(size(y_prd)) y_indxB y_indxB.^2];
    PI = res_prd.beta; D = res_prd.sige;
    obsB = length(y_prd);
    hessL = hessianGH(Y,X,PI,D);
    A0prd = (1/obsB)*hessL;
    [ B0prd ] = nw_vcv(Y,X,PI,D,2);

  disp('Computed Hessians & Newey-West for OLSs');
  
  % OLS standard errors
  XX = [ones(size(y_sal)) y_indxA y_indxA.^2];
  res_olsse1 = ols(y_sal, XX);
  XX = [ones(size(y_prc)) y_indxA y_indxA.^2];
  res_olsse2 = ols(y_prc, XX);
  XX = [ones(size(y_prd)) y_indxB y_indxB.^2];
  res_olsse3 = ols(y_prd, XX);
    
  hess_se = [sqrt(diag(inv(-obsA*A0sal))); sqrt(diag(inv(-obsA*A0prc))); sqrt(diag(inv(-obsB*A0prd)))];
  ols_se  = [res_olsse1.bstd; 1; res_olsse2.bstd; 1; res_olsse3.bstd; 1;];
  nw_se   = [sqrt(diag(inv(obsA*B0sal))); sqrt(diag(inv(obsA*B0prc))); sqrt(diag(inv(obsB*B0prd)))];

  disp('For OLS (sale,price,production)');  
  disp(' Hessian; Newey-West;  OLS ');
  disp([hess_se nw_se ols_se]);


%
% Combining the [A0A B0A] with [A0B B0B] appropriately for SURS and adding on 3 OLS
% Two approaches: A) use SURA, single OLS on production and 3 OLS equations
% and B) use SURB with 3 OLS equations
% Note: only use means from 3 OLS equations (not variance of residuals) 
%

  % First Approach, use SURA with single OLS on production

  A01 = zeros(25,25);
  B01 = zeros(25,25);

  m = size(A0A);
  n = size(A0q);
  A01(1:m,1:m) = A0A;
  A01(m+1:m+n,m+1:m+n) = A0q;
  A01(m+n+1:m+n+3,m+n+1:m+n+3) = A0sal(1:3,1:3);
  A01(m+n+4:m+n+6,m+n+4:m+n+6) = A0prc(1:3,1:3);
  A01(m+n+7:m+n+9,m+n+7:m+n+9) = A0prd(1:3,1:3);

  B01(1:m,1:m) = B0A;
  B01(m+1:m+n,m+1:m+n) = B0q;
  B01(m+n+1:m+n+3,m+n+1:m+n+3) = B0sal(1:3,1:3);
  B01(m+n+4:m+n+6,m+n+4:m+n+6) = B0prc(1:3,1:3);
  B01(m+n+7:m+n+9,m+n+7:m+n+9) = B0prd(1:3,1:3);

  % Second approach, use SURB 
  m = size(A0B);
  A02 = zeros(m+3);
  B02 = zeros(m+3);
  A02(1:m,1:m) = A0B;
  A02(m+1:m+3,m+1:m+3) = A0sal(1:3,1:3);
  A02(m+4:m+6,m+4:m+6) = A0prc(1:3,1:3);
  A02(m+7:m+9,m+7:m+9) = A0prd(1:3,1:3);
  
  B02(1:m,1:m) = B0B;
  B02(m+1:m+3,m+1:m+3) = B0sal(1:3,1:3);
  B02(m+4:m+6,m+4:m+6) = B0prc(1:3,1:3);
  B02(m+7:m+9,m+7:m+9) = B0prd(1:3,1:3);

% Reporting a few results
disp('Obs in SUR A'); disp(obsA);
disp('Obs in SUR B'); disp(obsB);

% Saving results
%%save weight_mats_plt_xtra A01 B01 A02 B02 

D = triu(res_surA(1).sigma); D1 = D(find(D~=0));
coefs1 = [res_surA(1).beta; res_surA(2).beta; D1; res_ols3.beta; res_ols3.sige; res_sal.beta; res_prc.beta; res_prd.beta];

D = triu(res_surB(1).sigma); D2 = D(find(D~=0));
coefs2 = [res_surB(1).beta; res_surB(2).beta; res_surB(3).beta; D2; res_sal.beta; res_prc.beta; res_prd.beta];

%%save moments_data_plt_xtra coefs1 coefs2 obsA obsB

% computing std errors on variances
% see p 499 in Statistical Theory, 4th edition

rS = res_surA(1).resid;
rP = res_surA(2).resid;
rQ = res_surB(3).resid;
ll=length(rS)
lll=length(rQ)

se_sigma_rS = sum(rS.*rS)/((ll-1)*sqrt(ll));
se_sigma_rP = sum(rP.*rP)/((ll-1)*sqrt(ll));
se_sigma_rQ = sum(rQ.*rQ)/((lll-1)*sqrt(lll));



% Displaying results
disp(' Coefs for three regressions and std errors')
disp(' Sales ');
disp([res_surA(1).beta res_surA(1).beta./res_surA(1).tstat]);
disp(' Price ');
disp([res_surA(2).beta res_surA(2).beta./res_surA(2).tstat]);
disp(' Production ');
disp([res_surB(3).beta res_surB(3).beta./res_surB(3).tstat]);
disp('Sigma_Sq from sales, price, and production');
disp([D1' D2(6)]);
disp('Std errors');
disp([se_sigma_rS se_sigma_rP se_sigma_rQ]);

disp('Means/std from OLS, sales, price, production');
disp([res_sal.beta res_prc.beta res_prd.beta; res_sal.bstd res_prc.bstd res_prd.bstd]);

disp('R-squares, using fixed effects OLS');
disp('Sales, Price, Prod');
disp([res_CHCKA(1).rsqr; res_CHCKA(2).rsqr; res_CHCKB(3).rsqr]);

disp('Thats it ');

diary off
