
clear 
mata: mata clear
set more off
global dirdata "C:\AddHealth"

cd $dirdata
#delimit;
use sfriend,clear;
sort sqid;
save sfriend_sort,replace;

use Inschool.dta,clear;
keep sqid aid sschlcde s1 s2 s3 s4 s6a s6b s6c s6d s6e s9 s10 s10a s10b s10c s10d s11 s12 s14 s15 s17 s18 s20 s21 s47 s48 
s59a s59b s59c s59d s59e s59f s59g s44a1 s44a2 s44a3 s44a4 s44a5 s44a6 s44a7 s44a8 s44a9 s44a10 s44a11 s44a12 s44a13 s44a14 s44a15 s44a16 s44a17 s44a18
s44a19 s44a20 s44a21 s44a22 s44a23 s44a24 s44a25 s44a26 s44a27 s44a28 s44a29 s44a30 s44a31 s44a32 s44a33;
sort sqid;
merge sqid using sfriend_sort;
tab _merge;
drop _merge;

destring , replace;


/***  ID & SCHOOL ID ***/

drop sqid;
drop if aid==.;
rename sschlcde scid;



/*** AGE ***/

rename s1 age;
drop if age==.;
drop if age==99;


/*** GRADE ***/

rename s3 grade;
drop if grade==.;
drop if grade==99;
drop if grade==13;



/*** SEX ***/

gen sex_miss=0;
replace sex_miss=1 if s2==9 | s2==.; /*multiple choice*/
gen male=.;
replace male=0 if s2==2;
replace male=1 if s2==1;
drop s2;

gen female=.;
replace female=1 if male==0;
replace female=0 if male==1;

drop if sex_miss==1;



/*** RACE ***/

/*recode race, multiple race ( if hisp then code as hisp, regardless of multiple race or not.*/

gen black=0;
replace black=1 if s6b==1;
gen Asian=0;
replace Asian=1 if s6b~=1 & s6c==1;
gen hisp=0;
replace hisp=1 if s6b~=1 & s6c~=1 & s4==1;
gen race_other=0;
replace race_other=1 if s6b~=1 & s6c~=1 & s4~=1 & s6d==1; 
replace race_other=1 if s6b~=1 & s6c~=1 & s4~=1 & s6e==1;
gen white=0;
replace white=1 if s6b~=1 & s6c~=1 & s4~=1 & s6d~=1 & s6e~=1 & s6a==1;

gen race_miss=. ;
replace race_miss=1 if black==0 & Asian==0 & hisp==0 & race_other==0 & white==0;

replace black=. if race_miss==1;
replace Asian=. if race_miss==1;
replace hisp=. if race_miss==1;
replace race_other=. if race_miss==1;
replace white=. if race_miss==1;

/* omitted: black */

drop if race_miss==1;

drop s4 s6a s6b s6c s6d s6e;



/*** SCHOOL YEAR ***/

rename s9 yr_school;
drop if yr_school==.;
drop if yr_school==99;



/*** LIVE WITH PARENTS ***/

gen with_mom=0;
replace with_mom=1 if s11==1;
gen withmom_miss=0;
replace withmom_miss=1 if s11==.|s11==9;


gen with_dad=0;
replace with_dad=1 if s17==1;
gen withdad_miss=0;
replace withdad_miss=1 if s17==.|s17==9;

gen both_par=0;
replace both_par=1 if with_mom==1 & with_dad==1;
gen with_mom_only=0;
replace with_mom_only=1 if with_mom==1 & with_dad==0;
gen with_dad_only=0;
replace with_dad_only=1 if with_mom==0 & with_dad==1;
gen with_mom_miss=0;
replace with_mom_miss=1 if with_mom==1 & withdad_miss==1;
gen with_dad_miss=0;
replace with_dad_miss=1 if withmom_miss==1 & with_dad==1;
gen both_miss=0;
replace both_miss=1 if withmom_miss==1 & withdad_miss==1;
/* omitted: neith_par*/

gen with_one_par =0;
replace with_one_par =1 if with_mom_only==1 | with_dad_only==1;
gen with_one_miss =0;
replace with_one_miss =1 if with_mom_miss==1 | with_dad_miss==1;


replace both_par=. if with_one_miss ==1 | both_miss==1;

gen not_both_par=.;
replace not_both_par=1 if both_par ==0;
replace not_both_par=0 if both_par ==1;

drop if both_par==.; 



/*** MOM'S EDUCATION ***/

gen less_hs=0;
replace less_hs=1 if s12==1 | s12==2 | s12==10;
gen hs=0;
replace hs=1 if s12==3 | s12==4;
gen more_hs=0;
replace more_hs=1 if s12==5 | s12==6 | s12==7 | s12==8;

gen momedu_skip=0;
replace momedu_skip=1 if s12==97;

gen momedu_miss=0;
replace momedu_miss=1 if s12==.|s12==9|s12==11|s12==99;
/*omitted:hs, leave out monedu_skip--not now*/


gen Homemaker=1 if s14==1;
replace Homemaker=0 if s14~=1;
gen Prof1=1 if s14==2;
replace Prof1=0 if s14~=2;
gen Prof2=1 if s14==3;
replace Prof2=0 if s14~=3;
gen Manager=1 if s14==4;
replace Manager=0 if s14~=4 ;
gen Technical=1 if s14==5;
replace Technical=0 if s14~=5;
gen Office=1 if s14==6;
replace Office=0 if s14~=6;
gen Sales=1 if s14==7;
replace Sales=0 if s14~=7;
gen Restaurant=1 if s14==8;
replace Restaurant=0 if s14~=8;
gen Craft=1 if s14==9;
replace Craft=0 if s14~=9;
gen Construction=1 if s14==10;
replace Construction=0 if s14~=10;
gen Mechanic=1 if s14==11;
replace Mechanic=0 if s14~=11;
gen Factory=1 if s14==12;
replace Factory=0 if s14~=12;
gen Trans=1 if s14==13;
replace Trans=0 if s14~=13;
gen Military=1 if s14==14;
replace Military=0 if s14~=14;
gen Farm=1 if s14==15;
replace Farm=0 if s14~=15;
gen nowork_nodisabled=1 if s14==16;
replace nowork_nodisabled=0 if s14~=16;
gen nowork_disabled=1 if s14==17;
replace nowork_disabled=0 if s14~=17;
gen Retired=1 if s14==18;
replace Retired=0 if s14~=18;
gen Welfare=1 if s14==19;
replace Welfare=0 if s14~=19;

gen momjob_skip=0;
replace momjob_skip=1 if s14==97;

gen momjob_miss=0;
replace momjob_miss=1 if s14==.|s14==20|s14==99;
/*omitted: Homekaker, leave out momjob_skip--not now, since skip captures the same information as "not live with Mom"*/

/* combine*/
gen Prof=0;
replace Prof=1 if s14==2 | s14==3 |s14==4;
gen Home=0;
replace Home=1 if s14==1 |s14==16 |s14==17 |s14==18 ;
gen job_other=0;
replace job_other=1 if s14==5 |s14==6 |s14==7 |s14==8 |s14==9 |s14==10 |s14==11 |s14==12 |s14==13 |s14==14 |s14==15;

/* now have Home (omitted), Prof, job_other, welfare and missing*/



gen work_pay=0;
replace work_pay=1 if s15==1;
gen work_pay_skip=0;
replace work_pay_skip=1 if s15==7;

gen work_pay_miss=0;
replace work_pay_miss=1 if s15==.|s15==8|s15==9;

/* add dad's occupation*/


gen Homemaker_dad=1 if s20==1;
replace Homemaker_dad=0 if s20~=1;
gen Prof1_dad=1 if s20==2;
replace Prof1_dad=0 if s20~=2;
gen Prof2_dad=1 if s20==3;
replace Prof2_dad=0 if s20~=3;
gen Manager_dad=1 if s20==4;
replace Manager_dad=0 if s20~=4 ;
gen Technical_dad=1 if s20==5;
replace Technical_dad=0 if s20~=5;
gen Office_dad=1 if s20==6;
replace Office_dad=0 if s20~=6;
gen Sales_dad=1 if s20==7;
replace Sales_dad=0 if s20~=7;
gen Restaurant_dad=1 if s20==8;
replace Restaurant_dad=0 if s20~=8;
gen Craft_dad=1 if s20==9;
replace Craft_dad=0 if s20~=9;
gen Construction_dad=1 if s20==10;
replace Construction_dad=0 if s20~=10;
gen Mechanic_dad=1 if s20==11;
replace Mechanic_dad=0 if s20~=11;
gen Factory_dad=1 if s20==12;
replace Factory_dad=0 if s20~=12;
gen Trans_dad=1 if s20==13;
replace Trans_dad=0 if s20~=13;
gen Military_dad=1 if s20==14;
replace Military_dad=0 if s20~=14;
gen Farm_dad=1 if s20==15;
replace Farm_dad=0 if s20~=15;
gen nowork_nodisabled_dad=1 if s20==16;
replace nowork_nodisabled_dad=0 if s20~=16;
gen nowork_disabled_dad=1 if s20==17;
replace nowork_disabled_dad=0 if s20~=17;
gen Retired_dad=1 if s20==18;
replace Retired_dad=0 if s20~=18;
gen Welfare_dad=1 if s20==19;
replace Welfare_dad=0 if s20~=19;

gen momjob_skip_dad=0;
replace momjob_skip_dad=1 if s20==97;

gen momjob_miss_dad=0;
replace momjob_miss_dad=1 if s20==.|s20==20|s20==99;
/*omitted: Homekaker, leave out momjob_skip, since skip captures the same information as "not live with Mom"*/


gen work_pay_dad=0;
replace work_pay_dad=1 if s21==1;
gen work_pay_skip_dad=0;
replace work_pay_skip_dad=1 if s21==7;

gen work_pay_miss_dad=0;
replace work_pay_miss_dad=1 if s21==.|s21==8|s21==9;



/***  GPA ***/
/* note: the GPA calculation need to be re-considered */


foreach var1 in s10a s10b s10c s10d {;
replace `var1'=. if `var1'==5 | `var1'==7 | `var1'==8 | `var1'==9;
};

gen ED11=.;
replace ED11=4 if s10a==1;
replace ED11=3 if s10a==2;
replace ED11=2 if s10a==3;
replace ED11=1 if s10a==4;
gen ED12=.;
replace ED12=4 if s10b==1;
replace ED12=3 if s10b==2;
replace ED12=2 if s10b==3;
replace ED12=1 if s10b==4;
gen ED13=.;
replace ED13=4 if s10c==1;
replace ED13=3 if s10c==2;
replace ED13=2 if s10c==3;
replace ED13=1 if s10c==4;
gen ED14=.;
replace ED14=4 if s10d==1;
replace ED14=3 if s10d==2;
replace ED14=2 if s10d==3;
replace ED14=1 if s10d==4;

egen gpa = rmean(ED11 ED12 ED13 ED14);
replace gpa=0 if gpa==.;



/* 73859 obs having gpa (need to impute (84435-73859)=10576 missing gpa);
if claculate gpa by average of 4 subjects as before, will have only 53675 obs and need to impute ( 84435- 53675). 
Not consider friend aid missing yet.
if delete all the obs with no valid friend aid, then the first way need to impute ( 67393- 60037) gpa.
the second way need to impute ( 67393- 44090) gpa.*/


foreach friends in mf1aid mf2aid mf3aid mf4aid mf5aid ff1aid ff2aid ff3aid ff4aid ff5aid {;
replace `friends'=. if `friends'==77777777 | `friends'==88888888 | `friends'==99959995 | `friends'==99999999;
};

rename mf1aid fid1;
rename mf2aid fid2;
rename mf3aid fid3;
rename mf4aid fid4;
rename mf5aid fid5;
rename ff1aid fid6;
rename ff2aid fid7;
rename ff3aid fid8;
rename ff4aid fid9;
rename ff5aid fid10;

drop s10 s10a s10b s10c s10d s11 s12 s14 s15 s17 s20 s21 s59a s59b s59c s59d s59e s59f s59g;
sort aid;

sort scid;
egen ggroup =group(scid grade);
sort ggroup;


save fulldata.dta, replace;


gen f1=1 if fid1~=.;
gen f2=1 if fid2~=.;
gen f3=1 if fid3~=.;
gen f4=1 if fid4~=.;
gen f5=1 if fid5~=.;
gen f6=1 if fid6~=.;
gen f7=1 if fid7~=.;
gen f8=1 if fid8~=.;
gen f9=1 if fid9~=.;
gen f10=1 if fid10~=.;
replace f1=0 if f1==.;
replace f2=0 if f2==.;
replace f3=0 if f3==.;
replace f4=0 if f4==.;
replace f5=0 if f5==.;
replace f6=0 if f6==.;
replace f7=0 if f7==.;
replace f8=0 if f8==.;
replace f9=0 if f9==.;
replace f10=0 if f10==.;
gen n_friend=f1+f2+f3+f4+f5+f6+f7+f8+f9+f10;

/*** The below is to keep group smaller data***/


gen size_group=1;
replace size_group=0 if gpa==0;
replace size_group=size_group[_n]+size_group[_n-1] if ggroup[_n]==ggroup[_n-1];
egen gsize=max(size_group), by(ggroup);
drop size_group;
/*
drop if gpa==0;   /* keep or drop missing observations */
*/
drop if gsize<10 | gsize>50;   /* drop group with group size <10 & >50 */

sort ggroup gpa; 
egen ggroup2 = group(scid grade);
gen size_group=1;
replace size_group=size_group[_n]+size_group[_n-1] if ggroup2[_n]==ggroup2[_n-1];
egen gsize2=max(size_group), by(ggroup2);

sort ggroup2;
by ggroup2: egen mean_age=mean(age);
gen overage=.;
replace overage=1 if age>=mean_age;
replace overage=0 if overage==.;

drop if ggroup2>30;
local g_number=ggroup2[_N];

save partialdata.dta, replace;


#delimit cr
local i = 1
forvalues i = 1(1)`g_number'{
		 cd $dirdata
	     use partialdata.dta, replace	
             drop if ggroup2~=`i'	            
	     drop  scid grade s18 race_miss
	     save C:\Data\GPA_200_with_missing\group\group`i', replace
	     outsheet using C:\Data\GPA_50_with_missing\group\group`i'.dat, nonames replace	
	     local i = `i' + 1
           }



forvalues g = 1(1)`g_number'{
		use C:\Data\GPA_50_with_missing\group\group`g', clear
		keep  age		
		version 10
		mata:  st_view(age=., ., "age")
		mata:  AGE=J(rows(age),rows(age),0)
		mata {
		for (i=1; i<=rows(age); i++) {
    			for (j=1; j<=rows(age); j++) {                         
         			if(age[i]==age[j]) AGE[i,j]=1			
				}			                         
                 }	    	 
		}       			
		mata: st_matrix("AGE",AGE)
		mata: mata clear
		
		svmat double AGE, names(AGE)
		drop age
		outsheet using C:\Data\GPA_50_with_missing\age\age`g'.dat, nonames replace
           	}


/*** DYADIC DATA: SEX ***/

forvalues g = 1(1)`g_number'{
	      use C:\Data\GPA_50_with_missing\group\group`g', clear
		keep  male
		version 10  
		mata:  st_view(male=., ., "male")
		mata:  MALE=J(rows(male),rows(male),0)
		mata {
		for (i=1; i<=rows(male); i++) {
    			for (j=1; j<=rows(male); j++) {                         
         			if(male[i]==male[j]) MALE[i,j]=1			
				}			                         
                 }	    	
		}         		
		mata: st_matrix("MALE",MALE)
		mata: mata clear
		
		svmat double MALE, names(MALE)
		drop male
		outsheet using C:\Data\GPA_50_with_missing\sex\sex`g'.dat, nonames replace
		}


/*** DYADIC DATA: RACE ***/

forvalues g = 1(1)`g_number'{
	    use C:\Data\GPA_50_with_missing\group\group`g', clear
		keep  black Asian hisp race_other white
		version 10 
		mata:  st_view(black=., ., "black")
		mata:  st_view(Asian=., ., "Asian")
		mata:  st_view(hisp=., ., "hisp")
		mata:  st_view(white=., ., "white")
		mata:  st_view(race_other=., ., "race_other")	
		mata:  RACE=J(rows(white),rows(white),0)
		mata {
		for (i=1; i<=rows(white); i++) {
    			for (j=1; j<=rows(white); j++) {                         
         			if(black[i]==black[j] & Asian[i]==Asian[j] & hisp[i]==hisp[j] & white[i]==white[j] & race_other[i]==race_other[j] ) RACE[i,j]=1			
				}			                         
                 }	    	   
		}      			
		mata: st_matrix("RACE",RACE)
		mata: mata clear
		
		svmat double RACE, names(RACE)
		drop black Asian hisp race_other white
		outsheet using C:\Data\GPA_50_with_missing\race\race`g'.dat, nonames replace	
           	}


/*** FRIENDSHIP NETWORK  ***/

forvalues g = 1(1)`g_number'{
		use C:\Data\GPA_50_with_missing\group\group`g', clear
	    keep  aid fid1 fid2 fid3 fid4 fid5 fid6 fid7 fid8 fid9 fid10
		version 10            
		mata:  st_view(aid=., ., "aid")
		mata:  st_view(fid1=., ., "fid1")
		mata:  st_view(fid2=., ., "fid2")
		mata:  st_view(fid3=., ., "fid3")
		mata:  st_view(fid4=., ., "fid4")
		mata:  st_view(fid5=., ., "fid5")
		mata:  st_view(fid6=., ., "fid6")
		mata:  st_view(fid7=., ., "fid7")
		mata:  st_view(fid8=., ., "fid8")
		mata:  st_view(fid9=., ., "fid9")
		mata:  st_view(fid10=., .,"fid10")
		mata:  w=J(rows(aid),rows(aid),.)
		mata:  fid=(fid1, fid2, fid3, fid4, fid5, fid6, fid7, fid8, fid9, fid10)
		
		mata {
		for (i=1; i<=rows(aid); i++) {
    			for (j=1; j<=rows(aid); j++) {                         
         		w[i,j] =aid[j] 
	  			for (k=1; k<=10; k++) {
				if(w[i,j]==fid[i,k]) w[i,j]=1			
				}			                         
                 }	    	
         	}
		}
		mata {				
		for (i=1; i<=rows(aid); i++) {
    			for (j=1; j<=rows(aid); j++) {                         
	  			for (k=1; k<=10; k++) {
				if(w[i,j]~=1) w[i,j]=0		
				}			                       
                 }	    	
         	}	
		}
		mata: st_matrix("w",w)
		mata: mata clear
		
		svmat double w, names(w)
		drop aid fid1 fid2 fid3 fid4 fid5 fid6 fid7 fid8 fid9 fid10	
		outsheet using C:\Data\GPA_50_with_missing\network\network`g'.dat, nonames replace		
		}

/*** ATTRIBUTE DATA ****/

local i = 1
while `i' <= `g_number'{
	     use C:\Users\Chih-Sheng\Desktop\research\2011_social_interaction_model_with_selectivity\Data\GPA_200_with_missing\group\group`i', clear
	     keep age male sport yr_school white black Asian hisp race_other both_par less_hs more_hs momedu_miss Prof job_other Welfare momjob_miss gpa overage	    	 	           
	     order age male black Asian hisp race_other both_par less_hs more_hs momedu_miss Welfare momjob_miss Prof job_other sport white yr_school gpa overage 
	     outsheet using C:\Data\GPA_50_with_missing\group\group`i'.dat, nonames replace
	     local i = `i' + 1
           }






