/* DOWEIGHTS
**
** Syntax example: 
** DOWEIGHTS boct geo sex bqq sss if mrk==1, cat(cat) old(catold) weight(wgt) rev12;
**
** Note: The default is to merge strata based on the last variable, the
** two last variables etc. Option "rev12" merges strata on the 2nd-last 
** variable (instead of the last) before merging on the two last variables.
**
*/
#delimit ;
capture program drop DOWEIGHTS;
program define DOWEIGHTS, sortpreserve;
syntax varlist(min=1 max=8 numeric) [if] [in], WEIGHT(string) CAT(string) OLD(string) [REV12]; * [, MISsing] WID(integer 7) DEC(integer 1);
local nn:word count `varlist';
tokenize `varlist';
tempvar touse;
mark `touse' `if' `in';
tempfile aysfile absfile;
tempvar samN samn;
    /* CONSTRUCT CATEGORY VARIABLE FOR AYS DATA AND COUNT */;
quietly summarize `1';
local dig1=int(log10(r(max)))+1;
quietly gen long `cat'=`1' if `touse';
forvalues ii=2/`nn' {;
  quietly summarize ``ii'';
  local dig`ii'=int(log10(r(max)))+1;
  quietly replace `cat'=`cat'*(10^`dig`ii'')+``ii'' if `touse';
};
capture assert `cat'~=. if `touse';
if _rc~=0 {;
  display in error "some AYS records have missing category information";
  crash;
};
format %12.0f `cat';
sort `touse' `cat';
quietly by `touse': gen int `samN'=_N if `touse'; * "int" ok as long as "touse";
quietly by `touse' `cat': gen int `samn'=_N if `touse';
quietly save `aysfile';
    /* LOAD ABS DATA */;
use "D:/Research/AUYouth/Data/ABS/lm2.dta", clear; * pop in 1000 persons;
quietly gen int boct=.;
quietly replace boct=1969 if year==1989&age==19;
quietly replace boct=1970 if year==1989&age==18;
quietly replace boct=1971 if year==1989&age==17;
quietly replace boct=1972 if year==1989&age==16;
quietly replace boct=1973 if year==1990&age==16;
quietly replace boct=1974 if year==1991&age==16;
quietly replace boct=1975 if year==1992&age==16;
quietly replace boct=1976 if year==1993&age==16;
quietly replace boct=1977 if year==1994&age==16;
quietly drop if year>1989&age>16;
assert boct~=.;
drop year age;
quietly drop if geo==10|geo==20|geo==30|geo==40|geo==50|geo==61|geo==62; * otherwise popN wrong;
rename pop popn;
egen float popN=sum(popn);
* SPLIT popn NUMBERS INTO BIRTH QUARTERS;
if index("`varlist'","bqq")~=0 {;
  local N=_N;
  local tmp:type popn;
  gen `tmp' tmp1=popn/4;
  gen `tmp' tmp2=popn/4;
  gen `tmp' tmp3=popn/4;
  gen `tmp' tmp4=popn/4;
  drop popn;
  gen int tmpi=_n;
  quietly reshape long tmp, i(tmpi) j(bqq);
  rename tmp popn;
  drop tmp*;
};
* SPLIT popn NUMBERS INTO THOSE IN SS AND THOSE NOT;
if index("`varlist'","sss")~=0 {;
  local N=_N;
  quietly expand 2;
  quietly gen byte sss=1+(_n>`N');
  quietly replace popn=0.034*popn if boct==1969&sss==1;
  quietly replace popn=0.188*popn if boct==1970&sss==1;
  quietly replace popn=0.569*popn if boct==1971&sss==1;
  quietly replace popn=0.795*popn if boct==1972&sss==1;
  quietly replace popn=0.802*popn if boct==1973&sss==1;
  quietly replace popn=0.840*popn if boct==1974&sss==1;
  quietly replace popn=0.853*popn if boct==1975&sss==1;
  quietly replace popn=0.841*popn if boct==1976&sss==1;
  quietly replace popn=0.833*popn if boct==1977&sss==1;
  quietly replace popn=(1-0.034)*popn if boct==1969&sss==2;
  quietly replace popn=(1-0.188)*popn if boct==1970&sss==2;
  quietly replace popn=(1-0.569)*popn if boct==1971&sss==2;
  quietly replace popn=(1-0.795)*popn if boct==1972&sss==2;
  quietly replace popn=(1-0.802)*popn if boct==1973&sss==2;
  quietly replace popn=(1-0.840)*popn if boct==1974&sss==2;
  quietly replace popn=(1-0.853)*popn if boct==1975&sss==2;
  quietly replace popn=(1-0.841)*popn if boct==1976&sss==2;
  quietly replace popn=(1-0.833)*popn if boct==1977&sss==2;
  egen float tmp=sum(popn);
  assert tmp==popN;
  drop tmp;
};
    /* CONSTRUCT CATEGORY VARIABLE FOR ABS DATA AND COUNT */;
gen byte `touse'=1;
quietly gen long `cat'=`1' if `touse';
forvalues ii=2/`nn' {;
  quietly summarize ``ii'';
  if `dig`ii''>int(log10(r(max)))+1 {;
    di in error "DOWEIGHTS-ERROR  too few digits allowed in category variable";
    crash;
  };
  quietly replace `cat'=`cat'*(10^`dig`ii'')+``ii'' if `touse';
};
capture assert `cat'~=. if `touse';
if _rc~=0 {;
  di in error "DOWEIGHTS-ERROR  some ABS records have missing category information";
  crash;
};
format %12.0f `cat';
keep `touse' `cat' popN popn;
sort `touse' `cat';
quietly save `absfile';
    /* MERGE FILES */;
use `aysfile', clear;
quietly merge `touse' `cat' using `absfile', update;
erase "`aysfile'";
erase "`absfile'";
assert `samn'==. if _merge==2;
assert (_merge==1)==(~`touse');
sort `cat';
by `cat': assert popn==popn[1];
by `cat': assert popN==popN[1];
by `cat': assert `samn'==`samn'[1];
    /* MERGE EMPTY STRATA */;
* category codes for (potentially) merged categories;
local tmp=0;
forvalues ii=`nn'(-1)2 {;
  tempvar tmp2`ii';
  local tmp=`tmp'+`dig`ii'';
  quietly gen long `tmp2`ii''=int(`cat'/10^`tmp')*10^`tmp';
};
if "`rev12'"~="" {;
  * merge on 2nd variable before merging on (1st,2nd);
  local tmp=`nn'-1;
  local tmp=`dig`nn''+`dig`tmp'';
  quietly replace `tmp2`nn''=int(`cat'/10^`tmp')*10^`tmp'+mod(`cat',10^`dig`nn'');
};
tempvar tmp0 tmp1 touseB tmp2 tmp3 tmp4 tmp5;
tempname XR;
gen long `old'=`cat';
by `cat': gen byte `tmp0'=_n;
while 1==1 {;
  quietly gen byte `tmp1'=`samn'==.&`touse';
  quietly replace `tmp1'=sum(`tmp1')==1&`tmp1'==1;
  quietly count if `tmp1'~=0;
  if r(N)==0 {; continue, break; };
  if r(N)~=1 {; di in error "DOWEIGHTS-ERROR  shouldn't happen..."; crash; };
  quietly tabulate `cat' if `tmp1'==1, matrow(`XR');
  local localcat=`XR'[1,1];
  forvalues ii=`nn'(-1)2 {;
    quietly tabulate `tmp2`ii'' if `tmp1'==1, matrow(`XR');
    local tmp=`XR'[1,1];
    gen byte `touseB'=`tmp2`ii''==`tmp';
    quietly count if `touseB'&`samn'~=.;
    if r(N)==0 {;
      drop `touseB';
      continue;
    };
    di in text "NOTE: Merging category" %10.0f `localcat' " =>" %10.0f `tmp' ", total observations" %8.0f r(N);
    quietly replace `cat'=`tmp2`ii'' if `touseB';
    quietly replace popn=. if `touseB'&`tmp0'~=1;
    quietly egen float `tmp3'=sum(popn) if `touseB';
    quietly replace popn=`tmp3' if `touseB';
    assert (_merge==2)==(`samn'==.) if `touseB';
    quietly drop if `touseB'&`samn'==.; * first row not in `touseB' anymore;
    quietly replace `samn'=. if `touseB'&`tmp0'~=1;
    quietly egen float `tmp4'=sum(`samn') if `touseB';
    quietly replace `samn'=`tmp4' if `touseB';
    quietly egen float `tmp5'=seq() if `touseB';
    quietly replace `tmp0'=`tmp5' if `touseB';
    drop `touseB' `tmp1' `tmp3' `tmp4' `tmp5';
    continue, break;
  };
  if `samn'[1]==.|`samn'[1]==0 {;
    di;
    di in error "DOWEIGHTS-ERROR  fix didn't work" %12.0f `localcat';
    crash;
  };
};
assert _merge~=2;
assert _merge==3 if `touse';
drop _merge;
quietly gen float `weight'=0;
quietly replace `weight'=(popn/popN)/(`samn'/`samN') if `touse'; * sums to samN;
*quietly gen float `weight'b=0;
*quietly replace `weight'b=(popn*1000)/`samn' if `touse'; * sums to popN;
drop popN popn;*wgtb;
end;
