#delimit;
capture log close;
set more off;
clear;

* preparacion de datos inicial;
* seleccion del suroeste;
* creaccion de todas las variables;

******************************** Sendas ****************************************;
global output "C:\AAPAPERS\HISPANOS\OUTPUT\datos00.log";
global datain "C:\Aadata\CPS\";
global dataout  "C:\AAPAPERS\HISPANOS\DATA\";
********************************************************************************;
capture program drop codemsa;
program define codemsa;
	* cambios en la codificacion hasta 1994;
	replace msafips=8800 if msafips==3810 & year<=1994;
	replace msafips=7360 if msafips==7485 & year<=1994;	
	* cambios en la codificacion hasta 1995;
	replace msafips=8735 if msafips==6000 & year<=1995;
	replace msafips=5945 if msafips== 360 & year<=1995;
	* cambios en la codificacion desde 1995;
	replace msafips=  74 if msafips==4080 & year>=1995;
	replace msafips=  85 if msafips==4100 & year>=1995;
	replace msafips=  74 if msafips==5800 & year>=1995;
	replace msafips=  93 if msafips==4940 & year>=1995;
	replace msafips=  93 if msafips==7460 & year>=1995;
	replace msafips=  85 if msafips==7490 & year>=1995;
	replace msafips=6920 if msafips==9270 & year>=1995;
	replace msafips=  86 if msafips==9360 & year>=1995;
end;

capture program drop data9402;
program define data9402;
	* california, arizona, new mexico, texas;
*	keep if (state==93 | state==86 | state==85 | state==74);
	replace msafips=state if msafips==.;
	qui codemsa;
	* generacion de "Relationship to Reference Person" (relref) homogenea;
	gen byte relref=.;
	if year==1994 {;
		replace relref=relref94;
		};
	if year==1995 {;
		replace relref=relref94 if intmonth<=2;
		replace relref=relref95 if intmonth>=3;
		replace relref=11 if relref95>=13 & intmonth>=3;
		};
	if year>=1996 {;
		replace relref=relref95;
		replace relref=11 if relref95>=13;
		};
	* Variables de experiencia y educacion;
	qui educa2;
	* Errores de codificacion en la variable de relacion con el cabeza de familia;
	replace relref=1 if relref==2;
	egen scab=sum(relref==1), by(year intmonth minsamp state hhid hhnum);
	egen sspo=sum(relref==3), by(year intmonth minsamp state hhid hhnum);
	drop if scab~=1;
	drop if sspo==2;
	* Logaritmo del salario nominal a la hora;
	gen byte cenizq=0;
	replace cenizq=1 if earnwke==1923 & year<=1997;
	replace cenizq=2 if earnwke==2884 & year>=1998;
	replace earnwke=1.33*earnwke if cenizq>=1;
	* a partir de 1994, cuando el numero de horas varia, se codifica como missing;
	replace uhourse=hourslw if lfsr94==1 & class94<=5 & uhourse==. & hourslw<.;
	gen salarios = ln(earnwke/uhourse);
	la var salarios "Salarios a la hora-logaritmos";
	keep hhid hhnum minsamp year intmonth earnwt state msafips sex race ethnic age 
		marital veteran grade92 relref penatvty pemntvty pefntvty earnwke class94 
		lfsr94 dind docc80 uhourse unionmme educa voca exp salarios cenizq;
end;

capture program drop educa2;
program define educa2;
	gen byte educa=.;
	replace educa=0 if grade92==31;
	replace educa=2.5 if grade92==32;
	replace educa=5.5 if grade92==33;
	replace educa=7.5 if grade92==34;
	replace educa=9 if grade92==35;
	replace educa=10 if grade92==36;
	replace educa=11 if grade92==37;
	replace educa=12 if grade92==38;
	replace educa=12 if grade92==39;
	replace educa=13.5 if grade92==40;
	replace educa=14 if grade92==41;
	replace educa=14 if grade92==42;
	replace educa=16.5 if grade92==43;
	replace educa=18 if grade92==44;
	replace educa=18 if grade92==45;
	replace educa=20 if grade92==46;
	gen byte exp = age - educa - 6;
	gen byte voca=(grade92==41)+(grade92==42);
	la var educa "Anyos en educacion";
	la var voca "Associate Degree";
	la var exp "Experiencia";
end;

capture program drop agrega2;
program define agrega2;
	drop if minsamp==4;
*	keep if (state==93 | state==86 | state==85 | state==74);
	replace msafips=state if msafips==.;
	qui educa2;
	qui codemsa;
	keep weight year msafips ethnic grade92 docc80 dind educa;
end;

capture program drop agrega3;
program define agrega3;
	use ${dataout}temp00.dta;
	gen phisp=(ethnic<8);
	gen meduca=educa;
	collapse (mean) phisp meduca [iw=weight], by(`1');
	sort phisp;
	gen byte cphisp=_n;
	sort meduca;
	gen byte cmeduca=_n;
	sort `1';
	local fich = "${dataout}" + "`1'" + ".dta";
	save `fich',replace;
end;

********************************************************************************;

* Recogida de datos individuales para el household;
local i=94;
while `i'<=99 {;
	use ${datain}morg`i'.dta,clear;
	qui data9402;
	if `i'>94 {;
		append using ${dataout}datos01.dta;
		};
	save ${dataout}datos01.dta, replace;
	local i=`i'+1;
	};
local i=0;
while `i'<=2 {;
	use ${datain}morg0`i'.dta,clear;
	qui data9402;
	append using ${dataout}datos01.dta;
	save ${dataout}datos01.dta, replace;
	local i=`i'+1;
	};

save ${dataout}datos00.dta, replace;

use ${dataout}datos00.dta, clear;

* Filtros sobre las variables de salarios;

* se eliminan 15 observaciones para las que no tenemos earnings
* pero estan trabajando como asalariados;
drop if earnwke==. & lfsr94==1 & class94<=5;

* se eliminan 21 observaciones para las que no tenemos 
* horas trabajadas;
drop if lfsr94==1 & class94<=5 & (uhourse==.);

* se eliminan todos aquellos con earnings u horas iguales a cero que trabajan;
tab year if (earnwke==0 | uhourse==0) & lfsr94<=2 & class94<=5;
drop if (earnwke==0 | uhourse==0) & lfsr94<=2 & class94<=5;
tab year if salarios==. & lfsr94==2;
drop if salarios==. & lfsr94==2;
tab year if earnwke<. & lfsr94>=3;

**** censura por la izquierda;
**** 1997 es el anyo con un porcentage mayor de censurados por la izquierda: 1.48%;
****tab year cenizq if class94<=5, row;
	egen onepc=pctile(earnwke), p(98.52) by(year);
	gen censura=(earnwke>onepc) if earnwke<.;
	drop if censura==1;
* universo del analisis;
	keep if sex==1;
	keep if age>24 & age<63;
	keep if (ethnic<4 | ethnic==8);
	drop if (race>1 & ethnic==8);
	gen thirdg=(penatvty==57) & ((pemntvty==57)|(pefntvty==57));
	gen border=(state==93)|(state==86)|(state==85)|(state==74);
* variable de participacion;
	gen wagesmpl=(class94<=5) & (lfsr94<=2);
	replace wagesmpl=0 if dind==51;
* Seleccion de variables relevantes;
	replace exp=0 if exp<0;
	gen etnia=(ethnic<4);
	keep class94 state year salarios etnia age educa wagesmpl thirdg border;
save ${dataout}datos00.dta,replace;
beep;
