function [estmsl,msloutput] = mslAgrad(Y,Xf,Xr,mfo,R,sv,p2p)

N = mfo.N;
T = mfo.T;
J = mfo.J;

nfx = size(Xf,2);
nrx = size(Xr,2);
np = nfx + nrx + nrx + (nrx^2-nrx)/2;

rng(10);
dr_sn = randn(nrx,R*N);

Xn = cell2mat(arrayfun(@(c) Xr(:,c:end),(1:nrx),'unif',0));
Xn = reshape([Xf Xr Xn]',[np J*T N]);

Y = reshape(Y,[J*T N]);

rvmap = nonzeros(tril(ones(nrx)).*(ones(nrx,1)*(1:nrx)))';
egmap = [ones(1,nfx+nrx) (rvmap+1)]';

function [ll,g] = like(parms)
    
    est = p2p(parms);

    const = reshape(Xf*est.FC + Xr*est.RCmean,[J*T N]);
    x = Xr*est.cholRCvar;
    
    eg = zeros(nrx+1,J*T,N);
    ll = 0;

    for n = 1:N
        
        v = x((n-1)*J*T+1:n*T*J,:)*dr_sn(:,(n-1)*R+1:n*R);
        v = bsxfun(@plus,const(:,n),v);
        v = reshape(v,[J T*R]);    

        expv = exp(v);
        pr = reshape(bsxfun(@rdivide,expv,sum(expv,1)),[J*T R]);
        like = prod(pr(Y(:,n)==1,:),1);   
    
        ll = ll + log(mean(like)); 
        
        qi = like./sum(like,2);

        eg(:,:,n) = [ones(1,R);dr_sn(:,(n-1)*R+1:n*R)]*bsxfun(@times,qi,bsxfun(@minus,Y(:,n),pr))';
                
    end
    
    ll = -ll;
    g = -sum(sum(Xn.*eg(egmap,:,:),3),2);

end

o1 = optimoptions(@fminunc,'Display','iter','MaxIter',1e6,'MaxFunEvals',1e10,'Algorithm','quasi-newton','GradObj','on','DerivativeCheck','off');
tic
[estmsl,fval,exitflag,msloutput] = fminunc(@like, p2p(sv), o1);
msloutput.time = toc;

msloutput.fval = fval;
msloutput.exitflag = exitflag;
estmsl = p2p(estmsl);
   
end




