/*
**   Dynamic Evaluation of Job Search Training 
**
**  Purpose:
**    Continuous-time dynamic treatment effect estimation
**    
**
**  Possible Estimates:
**    - Estimate Survivor functions and Dynamic Treatment Effects on Surviving Treated
**    - Estimate parameters of bivariate continuous-time models
**  Inputs:
**    - datafile
**  Outputs:
**    - Full Sample Effects (Table 2 in paper)
**    - Full Sample Parameters (Table 3 in paper)
**    - Regular Workers Heterogenous 50+/50 Effects (Table 6 in paper: must select sample ll.273 & 316-320)
**    - Regular Workers Heterogenous 50+/50 Parameters(Table 7 in paper: must select sample ll.273 & 316-320)
**    
**  Date:
**    June 2015
**
**  Author:
**   Stephen Kastoryano & Bas van der Klaauw
*/


#include <oxstd.h> // include the Ox standard library header
#include <oxfloat.h>
#import <maximize>

decl duur1, censor1, duur2, censor2, duur3, censor3, X, d, obs;


loglikelihood(const param)	 // Function: generate bivariate loglikelihood function
{
	decl theta1, theta2, theta3, delta1, delta2, delta, d1, d2, d3, hulp1, hulp2, hulp3, i,j, xb1, xb2, xb3, p, v1, v2, v3, v, lik;

	// Exponential of the duration dependence for the exit and training

	theta1 = exp(0|param[3+2*sizec(X)]|param[4+2*sizec(X)]|param[5+2*sizec(X)]|param[6+2*sizec(X)]|param[7+2*sizec(X)]|param[8+2*sizec(X)]|param[9+2*sizec(X)]);
    theta2 = exp(0|param[10+2*sizec(X)]|param[11+2*sizec(X)]|param[12+2*sizec(X)]|param[13+2*sizec(X)]|param[14+2*sizec(X)]|param[15+2*sizec(X)]|param[16+2*sizec(X)]);

	// Characterization of intervention effects	
	delta = exp(param[0]);		   //delta= effect of training on exit	

	// Duration until exit, duration until training 
    d1 = duur1;
    d2 = duur2;

	// Initialisation of Exit and Treatment hazards
	
    hulp1 = zeros(obs,2);
	hulp2 = zeros(obs,2);


	// For every duration dependence of dependent variable fill
	for(i=0;i<rows(d);++i)
		{
		hulp2[][0]= hulp2[][0] + d[i] * theta2[i] * (d2 .> d[i]) + theta2[i] * d2 .* (d2 .> 0) .* (d2 .<= d[i]);

		hulp2[][1]= hulp2[][1] + theta2[i] * (d2 .> 0) .* (d2 .<= d[i]);

		hulp1[][0]= hulp1[][0] + (d1 .> d[i]) .*
	 	(d[i] * theta1[i] * (d2 .> d[i])
		+ d[i] * theta1[i] .* delta .* (d2 .<= 0)
		+ theta1[i] * (d2 + delta .* (d[i]-d2)) .* (d2 .> 0) .* (d2 .<= d[i]))
		+ (d1 .> 0) .* (d1 .<= d[i]) .*
	 	(theta1[i] * (d2 + delta .* (d1 - d2)) .* (d2 .> 0) 
		+ theta1[i] .* delta .* d1 .* (d2 .<= 0));  
       
		hulp1[][1]= hulp1[][1] + theta1[i] * (d1 .> 0) .* (d1 .<= d[i]) .* (d2 .>= d1) + theta1[i] .* delta .*
	            (d1 .> 0) .* (d1 .<= d[i]) .* (d2 .< d1);

        d1 = d1 - d[i];
		d2 = d2	- d[i];
	 }

   	//********** Effect of personal characteristics, paramters, exponent************

	 xb1 = exp(X*param[3:3+sizec(X)-1]);	  // Matrix of Explanatory variables affecting exit
	 xb2 = exp(X*param[3+sizec(X):3+2*sizec(X)-1] );  // Matrix of Explanatory variables affecting treatment

	//*********** Exponent of the unobserved heterogeneity  ***************************
	// (taking one mass point: v1 and v2 are first period duration dependence (intercepts) )
	

	v1 = exp(param[1]);	 // Unobserved heterogeneity on exit
    v2 = exp(param[2]);	 // Unobserved heterogeneity on training	
	
	//*********** Generating likelihood fnuction to be minimized  ***************************
	lik = zeros(obs,1);	 // 
	lik[][] = (v1 * xb1 .* hulp1[][1]).^(censor1) .* exp(-v1 * xb1 .* hulp1[][0]).*
				   (v2 * xb2 .* hulp2[][1]).^(censor2) .* exp(-v2 * xb2 .* hulp2[][0]);
   
	  /********To print value of loglikelihood******/ 
	  //print(" loglik = ","%25.10g",sumc(log(lik)));
	 
	return log(lik);
}


likelihood(const param, const loglik, const score, const hessian)  // Function: loglikelihood function to be minimized
{
	decl logl;

	logl = loglikelihood(param);

	savemat("param.xls",param);
								   
	loglik[0] = meanc(logl);

	return 1;
}


derivative(const param)	  	 //Function: estimate score matrix for Delta method
{
	decl score, i, p, z1, z2, d0;

	score = <>;

	for(i=0; i<rows(param); ++i)
	{
		d0 = max(fabs(0.0005*param[i])|0.0005);		//increment in dx of dloglik(x)/dx
		p = param;
		p[i] = p[i] + d0;
		z1 = loglikelihood(p);
		p = param;
		p[i] = p[i] - d0;
		z2 = loglikelihood(p);
		score = score~((z1-z2)/(2*d0));
	}

	return score;
}

/* 
gradient(const param)	// Gradient for Delta method estimation of mixing probabilities	SE
{
	decl grad, i,q, z1, z2, d0;

	grad = <>;
	 	 
	for(i=0; i<3; ++i)
	{
		d0 = max(fabs(0.0005*param[i])|0.0005);
		q = param[5:7];
		q[i] = q[i] + d0;
		z1 = (1|exp(q[0])|exp(q[1])|exp(q[2]))./(1+exp(q[0])+exp(q[1])+exp(q[2]));
		q = param[5:7];
		q[i] = q[i] - d0;
		z2 = (1|exp(q[0])|exp(q[1])|exp(q[2]))./(1+exp(q[0])+exp(q[1])+exp(q[2]));
		grad = grad~((z1-z2)/(2*d0));		
	}	
   return grad;	 
}
*/

Survivors(const param, const t, const s)	  //Function: generate survivor functions

{
	decl theta1, theta2, theta3, delta1, delta2, delta, d1, d2, d3, hulp1, hulp2, hulp3, i,j, xb1, xb2, xb3, p, v1, v2, v3, v, lik, Se, W;

	theta1 = exp(0|param[3+2*sizec(X)]|param[4+2*sizec(X)]|param[5+2*sizec(X)]|param[6+2*sizec(X)]|param[7+2*sizec(X)]|param[8+2*sizec(X)]|param[9+2*sizec(X)]);
    theta2 = exp(0|param[10+2*sizec(X)]|param[11+2*sizec(X)]|param[12+2*sizec(X)]|param[13+2*sizec(X)]|param[14+2*sizec(X)]|param[15+2*sizec(X)]|param[16+2*sizec(X)]);

	delta = exp(param[0]);		   //delta= effect of training on exit	

    d1 = t;
    d2 = s;

    hulp1 = zeros(obs,2);
	hulp2 = zeros(obs,2);

	for(i=0;i<rows(d);++i)
	{
	hulp2[][0]= hulp2[][0] + d[i] * theta2[i] * (d2 .> d[i]) + theta2[i] * d2 .* (d2 .> 0) .* (d2 .<= d[i]);

	hulp2[][1]= hulp2[][1] + theta2[i] * (d2 .> 0) .* (d2 .<= d[i]);

	hulp1[][0]= hulp1[][0] + (d1 .> d[i]) .*
	 (d[i] * theta1[i] * (d2 .> d[i])
	+ d[i] * theta1[i] .* delta .* (d2 .<= 0)
	+ theta1[i] * (d2 + delta .* (d[i]-d2)) .* (d2 .> 0) .* (d2 .<= d[i]))
	+ (d1 .> 0) .* (d1 .<= d[i]) .*
	 (theta1[i] * (d2 + delta .* (d1 - d2)) .* (d2 .> 0) 
	+ theta1[i] .* delta .* d1 .* (d2 .<= 0));  
       
	hulp1[][1]= hulp1[][1] + theta1[i] * (d1 .> 0) .* (d1 .<= d[i]) .* (d2 .>= d1) + theta1[i] .* delta .*
	            (d1 .> 0) .* (d1 .<= d[i]) .* (d2 .< d1);

        d1 = d1 - d[i];
		d2 = d2	- d[i];
	 }

	 xb1 = exp(X*param[3:3+sizec(X)-1]);	  // Matrix of Explanatory variables affecting exit
	 xb2 = exp(X*param[3+sizec(X):3+2*sizec(X)-1] );  // Matrix of Explanatory variables affecting treatment

	v1 = exp(param[1]);	 // Unobserved heterogeneity on exit
    v2 = exp(param[2]);	 // Unobserved heterogeneity on training	
	
	Se = exp(-v1 * xb1 .* hulp1[][0]);
	W = (v2 * xb2 .* hulp2[][1]) .* (exp(-v2 * xb2 .* hulp2[][0]).*exp(-v1 * xb1 .* hulp1[][0]));

	return Se~W;
	
}

effect(const param, const t, const s)	  // Function: Estimate survivor functions and dynamic treatment effect on surviving treated
{
	decl SW,SWa,SWb, W, H, Y1, X, Y0, ATETS;

	SW = Survivors(param, s, s);

	W = SW[][1];
	H = SW[][0];

	SWa = Survivors(param, t, s);

	Y1 = 1 - sumc(W .* SWa[][0]./H)/sumc(W);			 // Treated

	SWb = Survivors(param, t, t);

	Y0 = 1 - sumc(W .* SWb[][0]./H)/sumc(W);			 // Control

	ATETS = sumc(W .* (SWb[][0]-SWa[][0])./H)/sumc(W);;

	return ATETS~Y1~Y0;
	
}


TreatmentEffect(const param, const t, const s, const varP)	 // Function: Estimate standard errors and print results
{
	decl score, i, d0, p, z1, z2, varE, TeXp, TeXse, TeX; 

	//println(effect(param,t,s));
	TeXp=effect(param,t,s);
	score = <>;

	for(i=0; i<rows(param); ++i)
	{
		d0 = max(fabs(0.0005*param[i])|0.0005);		//increment in dx of dloglik(x)/dx
		p = param;
		p[i] = p[i] + d0;
		z1 = effect(p,t,s);
		p = param;
		p[i] = p[i] - d0;
		z2 = effect(p,t,s);
		score = score|((z1-z2)/(2*d0));
	}
	
	varE = score'varP*score;
	TeXse=sqrt(diagonal(varE));
	//println(sqrt(diagonal(varE)));
	return TeXp[2]~TeXse[2]~TeXp[1]~TeXse[1]~TeXp[0]~TeXse[0];
}

calceffect(const mins,const dS,const dT, const param, const cov)
{
	decl effects,Y01,Y01SE,Y11,Y11SE,Delta,DeltaSE,s;
	
	Y01=0;
	Y01SE=0;
	Y11=0;
	Y11SE=0;
	Delta=0;
	DeltaSE=0;
	for(s=mins; s<mins+dS; ++s)
	{
	
		effects=TreatmentEffect(param, mins+dT, s, cov);
		Y01=Y01+effects[0]/dS;
		Y01SE=Y01SE+effects[1]/dS;
		Y11=Y11+effects[2]/dS;
		Y11SE=Y11SE+effects[3]/dS;
		Delta=Delta+effects[4]/dS;
		DeltaSE=DeltaSE+effects[5]/dS;
		
	}

	println(" Effects = ");
	println("%4.3f", 	Y01|Y01SE|Y11|Y11SE|Delta|DeltaSE);				

	return 1;
}
   
main()
{
	decl data, wwrecht, param, function, score, sdev, cov, gender,agelayoff,agelayoffsq,id2005,hourlywage,duurUIben,monthlayoff,yearlayoff,layoffJulAug;
	decl daywages,namepara,nbr, over50,month6,results,agelayoffcb,agelayoff4, grad, over50id,mins,dS,dT;

	/*** Upload and prepare data *****************************************************************************/
	
	// Upload data
	data = loadmat("jobtraining.csv");  // Load Data

	/**** Full sample or only regular workers *****/
	//data = selectifr(data,(data[][1] .== 0));	   // Sample all people who are not from Low Skilled job	
	//data = selectifr(data,(data[][9] .== 1));	   // Sample all people who are older than 50	
	//data = selectifr(data,(data[][9] .== 0));	   // Sample all people who are younger than 50	

	// declaring the different durations and indicators
	// duration 1= time until exit, censor1 = exit indicator
	// duration 2 = time until training, censor2 = training indicator	

	duur1 = data[][4];			 //time until reemployment 	  (make sure you are taking correct column in excel sheet)
	censor1 = data[][5];		 // indicator ==1 if individual exited employment
	duur2 = data[][6];
	duur2 = duur2 .* (duur2 .> 0) + (duur2 .<= 0);		//time until training (1 day if training dur is not observed)	
	censor2 = data[][7];				 // indicator ==1 if individual entered training	
	duur1 = duur1 .* (duur1 .< max(duur1)) + max(duur1) * (duur1 .>= max(duur1)); //if exit out of unempl. >=656 days, consider as censored
	censor2 = censor2 .* (duur2 .< duur1);	  // 1=received training AND iff time until training < time until exit

	censor2 = censor2 .*(duur2 .> 1);	// 1=received training AND iff time until training >1 AND iff time until training < time until exit 
	duur2 = duur2 .* (censor2 .== 1) + duur1 .* (censor2 .== 0);  // dur until training=dur until exit if no training observed
	 	  //print(duur1~duur2~censor1~censor2);

	// Declaring dependence variables (not all will be used
	gender =data[][0];		   // gender
	agelayoff=(data[][2]-20)./40;	   //age at time of layoff
	agelayoffsq=(agelayoff.^2);	   //age at time of layoff squared
	agelayoffcb=(agelayoff.^3);	   //age at time of layoff cubed
	agelayoff4=(agelayoff.^4)	;      //age at time of layoff power 4
	id2005 = data[][1];		   // people considered low skilled in 2005
	print(" obsID   =  ",sumc(id2005)); 
	hourlywage = data[][3];		   // dailywages in previous employment position
	over50 = data[][9]	;	   // dummy for people older than 50 at layoff
    month6 = (duur2.< 182) ;  	   //dummy for people who entered training within 6 months of falling unemployed	  
	duurUIben= data[][8];			// total duration UI benefits
	duurUIben= duurUIben./365;			// years UI benefits
	monthlayoff	=data[][10]	;	   // month of layoff
	yearlayoff=	data[][11];		   // year of layoff
	layoffJulAug=  (monthlayoff .== 7)	+ (monthlayoff .== 8);
	over50id= over50 + (1-over50).*id2005;    // Fast entry into treatment if above 50 or from low skilled job

	
	// Declaring X-matrix
// 	X = id2005~over50~gender~agelayoff~agelayoffsq~log(hourlywage)~duurUIben~layoffJulAug;//~agelayoffcb;//~agelayoff4 ;
 	X = id2005~over50~duurUIben;//~agelayoffcb;//~agelayoff4 ;

	
	/*** Selecting population *****************************************************************************/

	/**** UNDER 50, no LS *****/
	//X=selectifr(X, (over50id .==0));
	
	/**** OVER 50 or LS *****/
	//X=selectifr(X, (over50id .==1));
	
	obs = rows(X);	 // Number of observations
   	println(" obs   =  ",obs);
	println(" treated obs   =  ",sumc(censor2));
	/******************************************************************************************************/
	
	//Declaring duration dependence
	d= 60|60|60|60|60|60|120|max(duur1)-480;	 // d defines the intervals of the PWC duration dependence

	
	/******* Initializing parameter vector*************************************************************

	 total number of parameters = number of variables in the X's and Z's, plus the duration dependence
	 The parameters of unobserved heterogeneity should not be the same
	    	
	 **************************************************************************************************/
	 
	/****** Parameters for X with log(hourlywage)*******/
	  param = 0|-1|-2|-3|-4|0|zeros(17,1);	 // starting parameters
	  //param = loadmat("param50.xls");
	  
	//The following 3 lines minimize the likelihood function (do not include when generating Standard Errors)
  	MaxControl(-1,1);
  	MaxControlEps(5e-3,0);
  	MaxBFGS(likelihood, &param, &function, 0, 1);

	// update paramters for next step	
	savemat("param.xls",param);
	param=loadmat("param.xls");
	// Compute standard errors
	score = derivative(param);	// generate score matrix
	
	
	savemat("score.xls",score);	 // Save score matrix for estimation in MMPHallTreatFINAL.ox
	score=loadmat("score.xls");
	cov = invert(score'score);	   // Variance-Covariance Matrix
	sdev = sqrt(diagonal(cov))';   // Standard Errors of parameters



	/******* Results Output *************************************************************************/

	/*** Dynamic Treatment Effects (Table 2 in paper) ***/
	
	println("ATETS, Y1, Y0 (s.e. below)");

	mins=30;   // Initial period for multiple treatment estimation
	dS = 30;		// Treatment subintervals
	dT= 120;	// Evalutation length T-S

	println("S, T, Treat interval ", mins~mins+dT~dS);
	calceffect(mins, dS, dT,param,cov);

	mins=30;   // Initial period for multiple treatment estimation
	dS = 30;		// Treatment subintervals
	dT= 270;	// Evalutation length T-S

	println("S, T, Treat interval ", mins~mins+dT~dS);
	calceffect(mins, dS, dT,param,cov);


	mins=90;   // Initial period for multiple treatment estimation
	dS = 30;		// Treatment subintervals
	dT= 120;	// Evalutation length T-S

	println("S, T, Treat interval ", mins~mins+dT~dS);
	calceffect(mins, dS, dT,param,cov);

	mins=90;   // Initial period for multiple treatment estimation
	dS = 30;		// Treatment subintervals
	dT= 270;	// Evalutation length T-S

	println("S, T, Treat interval ", mins~mins+dT~dS);
	calceffect(mins, dS, dT,param,cov);


	mins=210;   // Initial period for multiple treatment estimation
	dS = 30;		// Treatment subintervals
	dT= 120;	// Evalutation length T-S

	println("S, T, Treat interval ", mins~mins+dT~dS);
	calceffect(mins, dS, dT,param,cov);

	mins=210;   // Initial period for multiple treatment estimation
	dS = 30;		// Treatment subintervals
	dT= 270;	// Evalutation length T-S

	println("S, T, Treat interval ", mins~mins+dT~dS);
	calceffect(mins, dS, dT,param,cov);


	/**/
	/*** Parameter estimates (Table 3 in paper) ***/

	decl iNull, vT,vPvalT, dof,i;
	 iNull=0;   // H_0=0 is null hypothesis
	 // t-test	conditional probability
	 vT=(fabs(param-iNull))./sdev;	 	 
	 //println(" vT  ", vT);
	 dof=sizec(X)	;    // degrees of freedom 
	 vPvalT=zeros(rows(param),1);
	 for(i=0; i<rows(param); ++i)
	 {
	 vPvalT[i] = 2*(1-probt(vT[i], obs-dof));	// P-value of t-Test 
	 }

	 decl results0;
	 results0=zeros(rows(param)*2,2);
	 for(i=0; i<rows(param); ++i)
	 {
	 results0[2*i:2*(i+1)-1][]=param[i]~vT[i]|sdev[i]~vPvalT[i];
	 }
	  
	println("%r",{"treatment  &","           &","baseE &","     &","baseP &","     &",
	                "id2005E &", "           &","over50E &", "           &", "GenderE &","            &","ageE &", "           &",
					"agesqE &", "           &",	"log(wage)E &","        &","duurUIbenE &","     &","layoffJulAugE  &","        &",
					"id2005P &", "           &","over50P &", "           &", "GenderE &","            &","ageP &", "           &",
					"agesqP &", "           &",	"log(wage)P &","        &","duurUIbenP &","     &","layoffJulAugP  &","        &",
					"baseE2  &","        &","baseE3  &","        &","baseE4  &","        &","baseE5  &","        &",
					"baseE6  &","        &","baseE7  &","        &","baseE8  &","        &",
					"baseP2  &","        &","baseP3  &","        &","baseP4  &","        &","baseP5  &","        &"
					,"baseP6  &","        &","baseP7  &","        &","baseP8  &","        &"},"%14.5g",
					"%c", { "param & sdev", "tstat & Pval"}, results0);

	/*
	println("%r",{"treatment  &","           &","baseE &","     &","baseP &","     &",
	                "over50E &", "           &", "GenderE &","            &","ageE &", "           &",
					"agesqE &", "           &",	"log(wage)E &","        &","duurUIbenE &","     &","layoffJulAugE  &","        &",
					"over50P &", "           &", "GenderE &","            &","ageP &", "           &",
					"agesqP &", "           &",	"log(wage)P &","        &","duurUIbenP &","     &","layoffJulAugP  &","        &",
					"baseE2  &","        &","baseE3  &","        &","baseE4  &","        &","baseE5  &","        &",
					"baseE6  &","        &","baseE7  &","        &","baseE8  &","        &",
					"baseP2  &","        &","baseP3  &","        &","baseP4  &","        &","baseP5  &","        &"
					,"baseP6  &","        &","baseP7  &","        &","baseP8  &","        &"},"%14.5g",
					"%c", { "param & sdev", "tstat & Pval"}, results0);
	
	
		println("%r",{"treatment  &","           &","baseE &","     &","baseP &","     &",
	                 "GenderE &","            &","ageE &", "           &",
					"agesqE &", "           &",	"log(wage)E &","        &","duurUIbenE &","     &","layoffJulAugE  &","        &",
					 "GenderE &","            &","ageP &", "           &",
					"agesqP &", "           &",	"log(wage)P &","        &","duurUIbenP &","     &","layoffJulAugP  &","        &",
					"baseE2  &","        &","baseE3  &","        &","baseE4  &","        &","baseE5  &","        &",
					"baseE6  &","        &","baseE7  &","        &","baseE8  &","        &",
					"baseP2  &","        &","baseP3  &","        &","baseP4  &","        &","baseP5  &","        &"
					,"baseP6  &","        &","baseP7  &","        &","baseP8  &","        &"},"%14.5g",
					"%c", { "param & sdev", "tstat & Pval"}, results0);
	 	println("%4.3f", results0[][0]);
	   */

    
	
}