function [gmm se asyvar sargan] = gravityiv1(Y,X,Z,gmm0)
%% Calculates GMM1 point estimator and standard error 
%% for a two-way exponential regression model on an n-by-n network without loops (i.e., no self-links)
%% using instrumental variables
% INPUT: 
% Y = n-by-n array of outcomes; diagonal should be set to zero
% X = d-dimensional cell of n-by-m matrices of regressors ; diagonals should be set to zero
% Z = q-dimensional cell of n-by-m matrices of instruments; diagonals should be set to zero
% starting value for optimization (gmm0).
% OUTPUT: 
% point estimate (gmm); 
% standard error (se);
% asymptotic covariance matrix (asyvar)

% NOTES: 
% The diagonal entries in the above matrices will be re-set to zero
% This algorithm is optimized for speed in both optimization and calculation of the standard error

%% Demean regressors to deal with possible non-negativity of covariates
Y = Y-diag(diag(Y)); XX = X;
[n, m] = size(Y); % sample size
[d, ~] = size(X); % number of regressors
[q, ~] = size(Z); % number of moment conditions
for k=1:d, X{k} = X{k}-mean(mean(X{k})); X{k} = X{k}-diag(diag(X{k})); end
%for k=1:q, Z{k} = Z{k}-mean(mean(Z{k})); Z{k} = Z{k}-diag(diag(Z{k})); end


[n ~] = size(Y); nn = nchoosek(n,2); mm = nchoosek(n-2,2); rho = nn*mm;
% Minimization of GMM problem by Newton's method, starting at gmm0
V = eye(q); 
[gmm, condition, numiter, S, M] = Newton(@QuadraticForm,gmm0,Y,X,Z,V); 
V = Vmatrix(gmm,Y,X,Z);
[gmm, condition, numiter, S, M] = Newton(@QuadraticForm,gmm ,Y,X,Z,V); 
% Estimation of variance of moment conditions
J = (S/rho);
[V] = Vmatrix(gmm,Y,X,Z); 
% Sargan test statistic
sargan = ((n*M/rho)'*inv(V)*(n*M/rho));
[((n*M/rho)'*inv(V)*(n*M/rho)), chi2inv(.95,q-d)],

% Construction of standard errors
Upsilon = inv(J'*inv(V)*J); asyvar = Upsilon/(n*(n-1)); se = sqrt(diag(Upsilon)/(n*(n-1)));

%% Evaluation of GMM problem at fixed parameter value psi
function [criterion score Hessian H S] = QuadraticForm(psi,Y,X,Z,V)
% dimensions
[n, m] = size(Y); % sample size
[d, ~] = size(X); % number of regressors
[q, ~] = size(Z); % number of moment conditions
% variable definitions
index = zeros(n,n); for k=1:d, index = index+X{k}*psi(k); end % linear index 
phi   = exp(index); % exponential transform  
error =     Y./phi; % disturbance
d_error = cell(d,1); for k=1:d, d_error{k} = error.*X{k}; end % derivative of disturbance
% averages
error_i = sum(error,2);  
error_j = sum(error,1); m_error = sum(sum(error));
d_error_i =  cell(d,1); for k=1:d, d_error_i{k} = sum(d_error{k},2)  ; end
d_error_j =  cell(d,1); for k=1:d, d_error_j{k} = sum(d_error{k},1)  ; end 
m_derror  =  cell(d,1); for k=1:d, m_derror{k} = sum(sum(d_error{k})); end
% score vector
S = zeros(q,1); 
% full block 
for k=1:q, S(k) = sum(sum(error.*Z{k}))*sum(sum(error)) - sum(sum((error_i*error_j).*Z{k})); end
% correction term
c_error = error*error;
for k=1:q, 
    A = sum(sum(Z{k}.*(error.*error'+c_error)))-sum(error_j'.*sum(Z{k}.*error,2))-sum(error_i'.*sum(Z{k}.*error,1));
    S(k) = S(k)+ A;
end

% Jacobian matrix
H = zeros(q,d); 
% full block
for k=1:q,
    for j=1:d,
        H(k,j) = sum(sum(Z{k}.*error.*(X{j}*m_error+m_derror{j}) - Z{k}.*(error_i*d_error_j{j}+d_error_i{j}*error_j)));
    end
end
H = -H;
% correction term
%c_derror  =  cell(d,1); for k=1:d, c_derror{k} = (error.*X{k}).*error + error.*(error.*X{k}) ; end
c_derror  =  cell(d,1); for k=1:d, c_derror{k} = (error.*X{k})*error + error*(error.*X{k}) ; end
for k=1:q,
    for j=1:d,
        A1 = - sum(sum(Z{k}.*(error.*error').*(X{j}+X{j}') + Z{k}.*c_derror{j}));
        A2 =   sum(sum(Z{k}.*X{j}.*error,2).*error_j')+ sum(sum(Z{k}.*X{j}.*error,1).*error_i');
        A3 =   sum(sum(Z{k}.*error,2).*sum(d_error{j},1)')+sum(sum(Z{k}.*error,1).*sum(d_error{j},2)');
        H(k,j) = H(k,j) + A1 + A2 + A3; 
    end
end
invV = inv(V);
% objective function
criterion = -  S'*invV*S;
score     = -2*H'*invV*S;
Hessian   = -2*H'*invV*H;

%% Estimation of the asymptotic variance of the moment conditions

function [mVar] = Vmatrix2(psi,Y,X,Z)
% dimensions
[n, m] = size(Y); % sample size
[d, ~] = size(X); % number of regressors
[q, ~] = size(Z); % number of moment conditions
% variable definitions
index = zeros(n,m); for k=1:d, index = index+X{k}*psi(k); end % linear index 
phi   = exp(index); % exponential transform  
error =     Y./phi; % disturbance

xi = cell(q,1); for i=1:q, xi{i}=zeros(n,m); end
for i=1:n
    for j=1:m
        for i2=1:n
            for j2=1:m
                for k=1:q,
                    if i2~=i && i2~=j && j2~=i && j2~=j && i2~=j2 
                       addition = ((Z{k}(i,j)-Z{k}(i,j2))-(Z{k}(i2,j)-Z{k}(i2,j2))).*(error(i,j)*error(i2,j2)-error(i,j2)*error(i2,j));
                    else
                       addition = 0;
                    end
                    xi{k}(i,j) = xi{k}(i,j) +  addition; 
                end
            end
        end
    end
end

for k=1:q, 
    xi{k} = 4*xi{k}/((n-2)*(n-3));
    xi{k} = xi{k} - diag(diag(xi{k})); 
end

for k=1:q,
    for j=1:q,
        mVar(k,j) = mean(mean(xi{k}.*xi{j})); 
    end
end


function [mVar] = Vmatrix(psi,Y,X,Z)
% dimensions
[n, m] = size(Y); % sample size
[d, ~] = size(X); % number of regressors
[q, ~] = size(Z); % number of moment conditions
% variable definitions
index = zeros(n,m); for k=1:d, index = index+X{k}*psi(k); end % linear index 
phi   = exp(index); % exponential transform  
error =     Y./phi; % disturbance
xerror = cell(q,1); for k=1:q, xerror{k} = error.*Z{k}; end 

uXu = cell(q,1); for k=1:q, uXu{k} = error*Z{k}'*error; end

u   = sum(sum(error)); xu = cell(q,1); for k=1:q, xu{k} = sum(sum(xerror{k})); end
u_i = sum(error,2); xu_i =  cell(q,1); for k=1:q, xu_i{k} = sum(xerror{k},2) ; end 
u_j = sum(error,1); xu_j =  cell(q,1); for k=1:q, xu_j{k} = sum(xerror{k},1) ; end 
xuu_j = cell(q,1); for k=1:q, xuu_j{k} = sum(Z{k}.*(u_i*ones(1,m)),1); end
xuu_i = cell(q,1); for k=1:q, xuu_i{k} = sum(Z{k}.*(ones(n,1)*u_j),2); end

xu_ij = cell(q,1); for k=1:q, xu_ij{k} = Z{k}*error'; end
%xu_ji = cell(q,1); for k=1:q, xu_ji{k} = Z{k}'*error; end
xu_ji = cell(q,1); for k=1:q, xu_ji{k} = (Z{k}'*error)'; end

%uxu_ij = cell(q,1); for k=1:q, uxu_ij{k} = error*(Z{k}.*error); end
%uxu_ji = cell(q,1); for k=1:q, uxu_ji{k} = (error.*Z{k})*error; end
uxu_ij = cell(q,1); for k=1:q, uxu_ij{k} = (Z{k}.*error)*error; end
uxu_ji = cell(q,1); for k=1:q, uxu_ji{k} = error*(Z{k}.*error); end

xi = cell(q,1); mVar = zeros(q,q);
for k=1:q,
    fullterm = error.*(Z{k}*u+xu{k})-(Z{k}.*(u_i*u_j)+uXu{k}) + (xu_i{k}*u_j+u_i*xu_j{k})...
             - error.*(xuu_i{k}*ones(1,m)+ones(n,1)*xuu_j{k});   
    A1 = Z{k}.*error.*(error'-(ones(n,1)*u_j)'-(u_i*ones(1,n))');
    A2 = Z{k}.*(error*error)+Z{k}'.*(error.*error');
    A3 = -error.*((ones(n,1)*xu_j{k})'+(xu_i{k}*ones(1,n))');
    A4 = error.*(xu_ij{k}+xu_ji{k});
    A5 = - (uxu_ij{k} + uxu_ji{k});
    A = A1+A2+A3+A4+A5;
    
    xi{k} = fullterm + A;
    %xi{k} = fullterm;
    xi{k} = 4*xi{k}/((n-2)*(n-3)); xi{k} = xi{k} - diag(diag(xi{k}));
end

for k=1:q,
    for j=1:q,
        mVar(k,j) = mean(mean(xi{k}.*xi{j})); 
    end
end


%% Newton algorithm used for optimization
function [x condition it J SS]=Newton(FUN,x,varargin) % varargout
% maximises FUN, starting at x by Newton-Raphson method
tol=1e-7; maxit=100; smalleststep=.5^20;
it=1; condition=1; improvement=1; k=length(x);
[f g H J S] =feval(FUN,x,varargin{:}); %varargout
while it<=maxit && condition==1 && improvement==1;
    [s1 s2]=size(H); if s1==s2 && s2>1 d=-inv(H)*g; else d=-g./H; end      
    step=1; improvement=0;
    while step>=smalleststep && improvement==0;
        [ff gg HH JJ SS] =feval(FUN,x+step*d,varargin{:}); %varargout
        bounded = sum(sum(isnan(HH)))==0 & sum(sum(isinf(HH)))==0;
        if (ff-f)/abs(f)>=-1e-7 & bounded==1;
            improvement=1; condition=sqrt(step*step*(d'*d))>tol & (ff-f)>tol; % & (SS'*SS)>tol;
            x=x+step*d; f=ff; g=gg; H=HH; J = JJ; S = SS;
        else
            step=step/2;
        end
    end
    it=it+1;
end
it=it-1;