% generate G matrix from the in-school network data
clear
lb = 2;
ub = inf;
%% replace SQID by AID in network data
load data\data_inschl.mat
ID = [data(:,strcmp('SQID',colheaders)),data(:,strcmp('AID',colheaders))];
clear data colheaders

load data\ntwk_inschl.mat
data = data(data(:,1)<999999,:); 

[~,IA,IB] = intersect(ID(:,1),data(:,1));
data = [ID(IA,2),data(IB,2:11)]; % aid,nominations
%% match network data with AID
load input\data_xy.mat
ID = [AID,SCID,SSCID];

[~,IA,IB] = intersect(ID(:,1),data(:,1));
data = [ID(IA,2:3),data(IB,:)]; % scid,sscid,aid,nominations

data(isnan(data)) = 0;
data(data==77777777) = 0;
data(data==88888888) = 0;
data(data==99959995) = 0;
data(data==99999999) = 0;
data = sortrows(data,1);
%% partition students into networks
n = size(data,1);
scid = unique(data(:,1)); % unique list of school IDs

GID = zeros(n,1);
PID = zeros(n,1);
gni = zeros(n*10,1);
gnj = zeros(n*10,1);
gsize = zeros(n,1);

n2 = 0; % number of individuals
n3 = 0; % number of links
ng = 0; % number of networks
for s = 1:length(scid)
    tmp = data(data(:,1)==scid(s),3:13); % students from the same school
    nr = size(tmp,1);
    if nr <= 1
        continue
    end
    % *************************************
    % drop isolated students
    idc = zeros(nr,1);
    for i = 1:nr
        aid = removerows(tmp(:,1),'ind',i);
        nom = removerows(tmp(:,2:11),'ind',i);
        
        idc1 = ~isempty(intersect(tmp(i,2:11),aid));
        idc2 = ~isempty(find(nom==tmp(i,1),1));
        idc(i) = (idc1||idc2);
    end
    tmp = tmp(idc==1,:);
    nr = size(tmp,1);
    if nr <= 1
        continue
    end
    % *************************************
    % create the adjacency matrix
    gi = zeros(nr*10,1);
    gj = zeros(nr*10,1);
    n4 = 0;
    for i = 1:nr
        for j = 2:11
            if tmp(i,j) > 0
                idx = (tmp(:,1)==tmp(i,j));
                if sum(idx) == 1
                    if find(idx) ~= i
                        n4 = n4+1;
                        gi(n4) = i;
                        gj(n4) = find(idx);
                    end
                end
            end
        end
    end
    gi = gi(1:n4);
    gj = gj(1:n4);
    G0 = sparse(gi,gj,ones(n4,1),nr,nr);
    % *************************************
    % partition into networks
    Gs = double((G0+G0')>0);
    G1 = Gs;
    G2 = Gs*Gs;
    while max(max((G1>0)~=((G1+G2)>0)))
        G1 = G1+G2;
        G2 = G2*Gs;
    end
    % *************************************
    % assign the new group ID
    k = 0;
    while ~isempty(G1)
        k = k+1;
        mr = sum(G1(1,:)>0);
        if (mr>=lb) && (mr<=ub)
            ng = ng+1;
            gsize(ng) = mr;
            GID(n2+1:n2+mr) = 1e6*scid(s)+k;
            PID(n2+1:n2+mr) = tmp(G1(1,:)>0,1);
            Gr = G0(G1(1,:)>0,:);
            Gr = Gr(:,G1(1,:)>0);
            [g0i,g0j,g0s] = find(Gr);
            nn = size(g0s,1);
            gni(n3+1:n3+nn) = g0i+n2;
            gnj(n3+1:n3+nn) = g0j+n2;
            n2 = n2+mr;
            n3 = n3+nn;
        end
        tmp = tmp(G1(1,:)==0,:);
        
        G2 = G0(G1(1,:)==0,:);
        G0 = G2(:,G1(1,:)==0);
                
        G2 = G1(G1(1,:)==0,:);
        G1 = G2(:,G1(1,:)==0);
    end
end
GID = GID(1:n2);
PID = PID(1:n2);

gni = gni(1:n3);
gnj = gnj(1:n3);
Gn = sparse(gni,gnj,ones(n3,1),n2,n2);

gsize = gsize(1:ng);

save input\data_G.mat PID GID Gn gsize