clear
load input\data_xy.mat
load input\data_G.mat
lnTV = 1; % 1 if ln(Y) (TV watching time)
symm = 0; % symmetric G
%%
if symm == 1
    Gn = double((Gn+Gn')>0);
    Gn = Gn-diag(diag(Gn));
    Gn = sparse(Gn);
end
%% re-order data according to PID
n = size(PID,1);
[~,idx] = ismember(PID,AID);
data = DAT(idx,:);
%% Exogenous Variables
age = (data(:,strcmp('IYEAR',NAM))+data(:,strcmp('IMONTH',NAM))/100)-(data(:,strcmp('H1GI1Y',NAM))+data(:,strcmp('H1GI1M',NAM))/100);
age = floor(age);
sex = data(:,strcmp('BIO_SEX',NAM))-1;
% *************************************
% race
latin = double(data(:,strcmp('H1GI4',NAM))==1);
white = double(data(:,strcmp('H1GI6A',NAM))==1);
black = double(data(:,strcmp('H1GI6B',NAM))==1);
nativ = double(data(:,strcmp('H1GI6C',NAM))==1);
asian = double(data(:,strcmp('H1GI6D',NAM))==1);
orace = double(data(:,strcmp('H1GI6E',NAM))==1);

race1 = white;
race1((latin+black+asian+nativ+orace)>0) = 0; % white only

race2 = black;
race2((latin+white+asian+nativ+orace)>0) = 0; % black only

race3 = asian;
race3((latin+white+black+nativ+orace)>0) = 0; % asian only

races = 1-(race1+race2);
%races = 1-(race1+race2+race3); % other races
% *************************************
% grade
grade = data(:,strcmp('H1GI20',NAM));
grade = grade-6;

freshm = double(grade<=2);
junior = double(grade<=4)-double(grade<=2);
senior = double(grade<=6)-double(grade<=4);
% *************************************
% family structure
father = (data(:,strcmp('H1HR3A',NAM))==11).*(data(:,strcmp('H1HR6A',NAM))==1);
for i = 'B':'T'
    father = father+(data(:,strcmp(['H1HR3',i],NAM))==11).*(data(:,strcmp(['H1HR6',i],NAM))==1);
end
father = double(father==1);

mother = double(data(:,strcmp('H1HR3A',NAM))==14);
for i = 'B':'T'
    mother = mother+(data(:,strcmp(['H1HR3',i],NAM))==14).*(data(:,strcmp(['H1HR6',i],NAM))==7);
end
mother = double(mother==1);

parent = double((father+mother)==2); % live with both biological parents
% *************************************
% residential mother education
medu = data(:,strcmp('H1RM1',NAM));
medu(isnan(medu)) = 100;

medu1 = (medu==1)+(medu==2)+(medu==10); % less than high school
medu2 = (medu==3)+(medu==4)+(medu==5)+(medu==6)+(medu==7); % high school graduate and above
medu3 = (medu==8)+(medu==9); % college and above

medu_mis = 1-(medu1+medu2+medu3); % unknown education level
% *************************************
% residential father education
fedu = data(:,strcmp('H1RF1',NAM));
fedu(isnan(fedu)) = 100;

fedu1 = (fedu==1)+(fedu==2)+(fedu==10); % less than high school
fedu2 = (fedu==3)+(fedu==4)+(fedu==5)+(fedu==6)+(fedu==7); % high school graduate and above
fedu3 = (fedu==8)+(fedu==9); % college and above

fedu_mis = 1-(fedu1+fedu2+fedu3); % unknown education level
% *************************************
% residential parent education (if both parents present, use mother education)
pedu1 = medu1;
pedu2 = medu2;
pedu3 = medu3;

pedu1(medu_mis==1) = fedu1(medu_mis==1);
pedu2(medu_mis==1) = fedu2(medu_mis==1);
pedu3(medu_mis==1) = fedu3(medu_mis==1);

pedu_mis = 1-(pedu1+pedu2+pedu3);
% *************************************
% non-residential biological mother education
medu = data(:,strcmp('H1NM4',NAM));
medu(isnan(medu)) = 100;

nmedu1 = (medu==1)+(medu==2)+(medu==10); % less than high school
nmedu2 = (medu==3)+(medu==4)+(medu==5)+(medu==6)+(medu==7); % high school graduate and above
nmedu3 = (medu==8)+(medu==9); % college and above

nmedu_mis = 1-(nmedu1+nmedu2+nmedu3); % unknown education level
% *************************************
% non-residential biological father education
fedu = data(:,strcmp('H1NF4',NAM));
fedu(isnan(fedu)) = 100;

nfedu1 = (fedu==1)+(fedu==2)+(fedu==10); % less than high school
nfedu2 = (fedu==3)+(fedu==4)+(fedu==5)+(fedu==6)+(fedu==7); % high school graduate and above
nfedu3 = (fedu==8)+(fedu==9); % college and above

nfedu_mis = 1-(nfedu1+nfedu2+nfedu3); % unknown education level
% *************************************
% non-residential biological parent education
npedu1 = nmedu1;
npedu2 = nmedu2;
npedu3 = nmedu3;

npedu1(nmedu_mis==1) = nfedu1(nmedu_mis==1);
npedu2(nmedu_mis==1) = nfedu2(nmedu_mis==1);
npedu3(nmedu_mis==1) = nfedu3(nmedu_mis==1);

npedu_mis = 1-(npedu1+npedu2+npedu3);

ncollg = double(nfedu3+nmedu3>=1 );
% *************************************
% residential mother job
mjob = data(:,strcmp('H1RM4',NAM));
mjob(isnan(mjob)) = 100;

mjob1 = double(mjob<=4); % professional
mjob2 = (mjob<=6)-(mjob<=4); % office worker 
mjob3 = (mjob<=15)-(mjob<=6); % other
mjob4 = double(mjob==16); % none

mjob_mis = 1-(mjob1+mjob2+mjob3+mjob4);
% *************************************
% residential father job
fjob = data(:,strcmp('H1RF4',NAM));
fjob(isnan(fjob)) = 100;

fjob1 = double(fjob<=4); % professional
fjob2 = (fjob<=6)-(fjob<=4); % office worker 
fjob3 = (fjob<=15)-(fjob<=6); % other
fjob4 = double(fjob==16); % none

fjob_mis = 1-(fjob1+fjob2+fjob3+fjob4);
% *************************************
% residential parent job (if both parents present, use mother job)
pjob1 = mjob1;
pjob2 = mjob2;
pjob3 = mjob3;
pjob4 = mjob4;

pjob1(mjob_mis==1) = fjob1(mjob_mis==1);
pjob2(mjob_mis==1) = fjob2(mjob_mis==1);
pjob3(mjob_mis==1) = fjob3(mjob_mis==1);
pjob4(mjob_mis==1) = fjob4(mjob_mis==1);

pjob_mis = 1-(pjob1+pjob2+pjob3+pjob4);
% *************************************
% parents work for pay
mw4pay = double(data(:,strcmp('H1RM5',NAM))==1);
fw4pay = double(data(:,strcmp('H1RF5',NAM))==1);
pw4pay = double((mw4pay+fw4pay)==2);
% parents on welfare
mwfare = double(data(:,strcmp('H1RM9',NAM))==1);
fwfare = double(data(:,strcmp('H1RF9',NAM))==1);
pwfare = double((mwfare+fwfare)==2);
% *************************************
% parental care
%{
mcare = data(:,strcmp('H1PF1',NAM));
mcare = (mcare == 5);

fcare = data(:,strcmp('H1PF23',NAM));
fcare = (fcare == 5);

pcare = double((fcare+mcare)>=1); % parent cares very much
%}
% *************************************
% general health
health = double(data(:,strcmp('H1GH1',NAM))==1);
% *************************************
% living condition
sfhome = (data(:,strcmp('H1IR10',NAM))==1)+(data(:,strcmp('H1IR10',NAM))==3);
livcnd = double(data(:,strcmp('H1IR11',NAM))==1);
hrural = double(data(:,strcmp('H1IR12',NAM))==1);
% *************************************
% own decision on TV time
tvtime = double(data(:,strcmp('H1WP4',NAM))==1);
% own decision on TV program
tvprgm = double(data(:,strcmp('H1WP5',NAM))==1);
% own decision on bed time
bdtime = double(data(:,strcmp('H1WP6',NAM))==1);
% well coordinate
%athlet = double(data(:,strcmp('H1PF29',NAM))==1);
% handicap
hndicp = double(data(:,strcmp('H1PL1',NAM))==1);
%% Regressors
%{
Xname = [{'age'},{'sex'},{'race2'},{'races'},{'health'},{'parent'},{'pedu2'},{'pedu3'},{'pedu_mis'},{'pjob1'},{'pjob2'},{'pjob3'},{'pjob_mis'},{'livcnd'}];
Xn = [age,sex,race2,races,health,parent,pedu2,pedu3,pedu_mis,pjob1,pjob2,pjob3,pjob_mis,livcnd];
%}
Xname = [{'age'},{'sex'},{'race2'},{'races'},{'junior'},{'senior'},{'health'},{'livcnd'},{'parent'},{'pedu2'},{'pedu3'},{'pedu_mis'},{'pjob1'},{'pjob2'},{'pjob3'},{'pjob_mis'}];
Xn  = [age,sex,race2,races,junior,senior,health,livcnd,parent,pedu2,pedu3,pedu_mis,pjob1,pjob2,pjob3,pjob_mis];
X0  = [age,sex,race1,race2,races,freshm,junior,senior,health,livcnd,parent,pedu1,pedu2,pedu3,pedu_mis,pjob1,pjob2,pjob3,pjob4,pjob_mis];

X1name = [{'age'},{'sex'},{'parent'},{'pedu2'},{'pedu3'},{'livcnd'}];
XX1 = [age,sex,parent,pedu2,pedu3,livcnd];
%XX1 = Xn;

X2name = [{'age'},{'sex'},{'parent'},{'livcnd'}];
XX2 = [age,sex,parent,livcnd];
%XX2 = Xn;

%{
X1name = {'age'};
X2name = {'age'};
XX1 = age;
XX2 = age;
%}
%% IVs
%{
name1 = [Xname,{'tvprgm'},{'tvtime'}];
name2 = [Xname,{'athlet'},{'hndicp'}];
IV1 = [tvprgm,tvtime];
IV2 = [athlet,hndicp];
%}

name1 = [Xname,{'npedu3'}];
IV1 = npedu3;
name2 = [Xname,{'tvtime'}];
IV2 = tvtime;
names = [Xname,{'npedu3'},{'tvtime'}];
%% Dependent variables
% GPA
Ya = [data(:,strcmp('H1ED11',NAM)),data(:,strcmp('H1ED12',NAM)),data(:,strcmp('H1ED13',NAM)),data(:,strcmp('H1ED14',NAM))];
Ya = 5-Ya;
Y1 = mean(Ya,2);
% *************************************  
% TV
Yb = [data(:,strcmp('H1DA8',NAM)),data(:,strcmp('H1DA9',NAM)),data(:,strcmp('H1DA10',NAM))];

Y2 = sum(Yb,2);
Y2(Y2>70) = 70;

if lnTV == 1
    Y2 = log(Y2+1);
end
%{
[~,~,~,~,Y2] = factoran(Yb,1);
Y2 = Y2-min(Y2);
%}
% *************************************
ng = length(gsize);

SCID = floor(GID/1e6);
ssid = unique(SCID);
ssiz = zeros(length(ssid),1);
for j = 1:length(ssid)
    ssiz(j) = sum(SCID==ssid(j));
end
fid = fopen('output\data_summary.txt','w');
fprintf(fid,'%s \n',date);
fprintf(fid,'sample size = %4.0f\n',n);
fprintf(fid,'# of schools = %4.0f\n',length(ssid));
fprintf(fid,'school size min  = %5.0f, max = %5.0f\n',min(ssiz),max(ssiz));

fprintf(fid,'# of networks = %4.0f\n',ng);
fprintf(fid,'gsize mean = %7.2f, std = %7.2f\n',mean(gsize),std(gsize));
fprintf(fid,'gsize min  = %5.0f, max = %5.0f\n',min(gsize),max(gsize));

fnum = full(sum(Gn,2));
ffnum = full(Gn*sex);
fmnum = full(Gn*(1-sex));

fprintf(fid,'row-sum of Gn: mean = %7.2f, std = %7.2f\n',mean(fnum),std(fnum));
fprintf(fid,'row-sum of Gn: min  = %5.0f, max = %5.0f\n',min(fnum),max(fnum));

fprintf(fid,'percentage of respondents give 10 friends %4.3f\n',sum(fnum==10)/ng);
fprintf(fid,'percentage of respondents give 5 female friends %4.3f\n',sum(ffnum>=5)/ng);
fprintf(fid,'percentage of respondents give 5 male friends %4.3f\n',sum(fmnum>=5)/ng);
fprintf(fid,'percentage of the Y1 being 0 = %4.3f\n',sum(Ya==0)/n);
fprintf(fid,'percentage of the Y2 being 0 = %4.3f\n',sum(mean(Yb,2)==0)/n);

temp = [Y1,Y2,X0,IV1,IV2];
Xavg = mean(temp,1);
Xstd = std(temp,0,1);
Xmin = min(temp,[],1);
Xmax = max(temp,[],1);

nk = size(temp,2);
fprintf(fid,'X_avg \n');
for ip = 1:nk
    fprintf(fid,'%7.2f\n',Xavg(ip));
end
fprintf(fid,'*******************\n\n');

fprintf(fid,'X_std \n');
for ip = 1:nk
    fprintf(fid,'%7.2f\n',Xstd(ip));
end
fprintf(fid,'*******************\n\n');
fprintf(fid,'X_min \n');
for ip = 1:nk
    fprintf(fid,'%7.2f\n',Xmin(ip));
end
fprintf(fid,'*******************\n\n');
fprintf(fid,'X_max \n');
for ip = 1:nk
    fprintf(fid,'%7.2f\n',Xmax(ip));
end
fprintf(fid,'*******************\n');
fprintf(fid,'*******************\n\n');
fclose(fid);

n0 = 0;
gg = zeros(ng,1); % number of networks such that I,G,G^2,G^3,G^4 are LI
for g = 1:ng
    mr = gsize(g);
    Gr = Gn(n0+1:n0+mr,n0+1:n0+mr);
    n0 = n0+mr;
    
    G2 = Gr*Gr;
    G3 = Gr*G2;
    G4 = Gr*G3;
    for i = 1:mr
        for j = 1:mr
            if i ~= j
                if G4(i,j) ~= 0
                    if (G3(i,j)==0)||(G2(i,j)==0)||(Gr(i,j)==0)
                        gg(g) = 1;
                        break
                    end
                end
            end
        end
        if gg(g) == 1
            break
        end
    end
end

Wn = wnorm(Gn);
save input\data_input Wn Xn XX1 XX2 IV1 IV2 Y1 Y2 gsize symm lnTV name1 name2 names X1name X2name