###########################
Definition of the functions
###########################

w_testindscore<-function(dep1, dep11, reg1, wei, smoothing1, dep2, reg2, order, coefks, m=length(dep1), grid=seq(0.1,0.9,0.01), nreps=1000, method="br", trim=0.045){
	grid=grid[abs(grid-0.5)>trim]
	grid=c(grid,0.25,0.75,0.5)
	nq=length(grid)
	wei=wei/mean(wei)
	temp=w_after1(dep1,reg1,wei,coefks,smoothing1)
	P=temp[[3]]
	Pder=temp[[2]]
	n=length(dep1)
	k1=ncol(reg1)
	reg2=as.matrix(reg2)
	k2=ncol(reg2)
	deltap=solve(temp[[1]])/n
	scoreks=Pder*c((dep1-P)/(P*(1-P)))
	index=reg1%*%coefks
	prob1<-glm(dep1~index,family=binomial(probit),weights=wei)$coef
	g<-prob1[1]+prob1[2]*index
	mill<-cbind(1,dnorm(g)/pnorm(g))
	if(order>1) for(i in 2:order) mill<-cbind(mill,mill[,2]^i)
	res<-matrix(,k2+order+1,0)
	for(q in 1:nq) res<-cbind(res,rq.wfit(cbind(reg2[dep11==1,],mill[dep11==1,]),dep2[dep11==1],tau=grid[q],weights=wei[dep11==1],method=method)$coef)
	r<-dep11*cbind(reg2,mill)
	dmill<-rep(0,n)
	for(i in 1:order) dmill<-cbind(dmill,i*(dnorm(g)/pnorm(g))^(i-1)*prob1[2]*((1/(2*pi)^0.5)*exp(-g^2/2)*(-g)*pnorm(g)-dnorm(g)^2)/pnorm(g)^2)
	deltafrx<-rep(list(matrix(,k2+order+1,k1)),nq)
	z<-rep(list(matrix(,n,k2+order+1)),nq)
	deltafr<-rep(list(matrix(,k2+order+1,k2+order+1)),nq)
	scoreqr<-rep(list(matrix(,n,k2+order+1)),nq)
	for(q in 1:nq){
		resid<-dep2-cbind(reg2,mill)%*%res[,q]
		h<-bandwidth.rq(grid[q],sum(dep11))
		if(grid[q]-h<0.001) h<-grid[q]-0.001
		if(grid[q]+h>0.999) h<-0.999-grid[q]
		h<-(qnorm(grid[q]+h)-qnorm(grid[q]-h))*min(sd(resid[dep11==1]),(quantile(resid[dep11==1],0.75)-quantile(resid[dep11==1],0.25))/1.34)
		den<-dnorm(resid/h)/h
		deltafr[[q]]<-crossprod(c(sqrt(den))*sqrt(wei)*r)/n
		deltafrx[[q]]<-t(c(den)*c(wei)*c(dmill%*%res[(k2+1):(k2+order+1),q])*r)%*%reg1[,-1]/n
		scoreqr[[q]]=c((grid[q]-(resid<0)))*r
		z[[q]]=(scoreqr[[q]]-scoreks%*%solve(deltap)%*%t(deltafrx[[q]]))%*%solve(deltafr[[q]])
	}
	for(q in 1:nq){
		z[[q]]=z[[q]]%*%rbind(diag(k2),matrix(0,order+1,k2))
	}
	finite_nq_var=cov.wt(cbind(z[[nq-2]],z[[nq-1]],z[[nq]]),wei)$cov/n
	R=rbind(cbind(diag(k2),matrix(0,k2,k2),-diag(k2)),cbind(matrix(0,k2,k2),diag(k2),-diag(k2)))
	finite_test=t(R%*%c(res[1:k2,(nq-2):nq]))%*%solve(R%*%finite_nq_var%*%t(R))%*%(R%*%c(res[1:k2,(nq-2):nq]))
	finite_p=1-pchisq(finite_test,2*k2)
	grid=grid[1:(nq-3)]
	zmed=z[[nq]]
	res_med=res[1:k2,nq]
	for(q in 1:(nq-3)){
		z[[q]]=z[[q]]-zmed
	}
	weit=c()
	weit_coef_by_coef=c()
	for(q in 1:(nq-3)){
		temp=cov.wt(z[[q]],wei)$cov/n
		weit[[q]]=solve(temp)
		weit_coef_by_coef[[q]]=1/sqrt(diag(temp))
	} 
	temp<-rep(list(matrix(,length(grid),nreps,)),length(m))
	coef_by_coef=array(,c(length(grid),nreps,length(m),k2))
	for(q in 1:length(grid)){
		for(j in 1:nreps){
			set.seed(j)
			sboot<-sample(1:n,max(m),TRUE)
			for(i in 1:length(m)){
				temp[[i]][q,j]=sqrt(wmeanc(as.matrix(z[[q]][sboot[1:m[i]],]),wei[sboot[1:m[i]]])%*%weit[[q]]%*%wmeanc(as.matrix(z[[q]][sboot[1:m[i]],]),wei[sboot[1:m[i]]]))
				for(k in 1:k2){
					coef_by_coef[q,j,i,k]=abs(wmeanc(z[[q]][sboot[1:m[i]],k,drop=FALSE],wei[sboot[1:m[i]]]))*weit_coef_by_coef[[q]][k]
				}
			}
		}
	}
	testks=matrix(,nreps,length(m))
	testcvm=matrix(,nreps,length(m))
	testks_coef_by_coef=array(,c(nreps,length(m),k2))
	testcvm_coef_by_coef=array(,c(nreps,length(m),k2))
	for(j in 1:nreps){
		for(i in 1:length(m)){
			testks[j,i]=sqrt(m[i])*max(temp[[i]][,j])
			testcvm[j,i]=m[i]*mean(temp[[i]][,j]^2)
			for(k in 1:k2){
				testks_coef_by_coef[j,i,k]=sqrt(m[i])*max(coef_by_coef[,j,i,k])
				testcvm_coef_by_coef[j,i,k]=m[i]*mean(coef_by_coef[,j,i,k]^2)
			}
		}
	}
	sample=matrix(res[1:k2,1:length(grid)],k2,length(grid))
	temp1=matrix(,length(grid),k2+1)
	for(q in 1:length(grid)){
		temp1[q,k2+1]=sqrt(c(sample[,q]-res_med)%*%weit[[q]]%*%c(sample[,q]-res_med))
		for(k in 1:k2){
			temp1[q,k]=abs(sample[k,q]-res_med[k])*weit_coef_by_coef[[q]][k]
		}
	}
	tvalueks=sqrt(length(dep1))*max(temp1[,k2+1])
	tvaluecvm=length(dep1)*mean(temp1[,k2+1]^2)
	cvm=pmean(tvaluecvm<testcvm)
	ks=pmean(tvalueks<testks)
	cvm_coef_by_coef=matrix(,k2,length(m))
	ks_coef_by_coef=matrix(,k2,length(m))
	for(k in 1:k2){
		tvalueks=sqrt(length(dep1))*max(temp1[,k])
		tvaluecvm=length(dep1)*mean(temp1[,k]^2)
		cvm_coef_by_coef[k,]=apply(tvaluecvm<testcvm_coef_by_coef[,,k,drop=FALSE],2,mean)
		ks_coef_by_coef[k,]=apply(tvalueks<testks_coef_by_coef[,,k,drop=FALSE],2,mean)
	}
	list(ks=ks,cvm=cvm,ks_coef_by_coef=ks_coef_by_coef,cvm_coef_by_coef=cvm_coef_by_coef,finite_p=finite_p,coef=res[1:k2,])
}

ob_inter<-function(b,y,x,w,h,np=500){
	b=c(1,b)
	k<-dim(x)[2]
	n<-dim(x)[1]
	xb<-crossprod(t(x),b)
	xb<-xb-weighted.mean(xb,w)
	e<-0.5/n
	hh<-c(sqrt(cov.wt(xb,w)$cov))
	hh<-h*hh
	if(np==0){
		xb_ev=xb
	} else{
		xb_ev=wq(xb,w,seq(0,1,1/np))
	}
	nev=length(xb_ev)
	g_ev<-rep(NA,nev)
	for(i in 1:nev){
		h=hh
		kw=dnorm(xb_ev[i]-xb,sd=h)
		kw[kw<0.001*mean(kw)]=0
		yi=y[kw>0]
		kw<-kw[kw>0]/sum(kw[kw>0])
		g_ev[i]<-crossprod(yi,kw)
	}
	g=approx(c(xb_ev),c(g_ev),xout=c(xb),rule=2)$y
	-weighted.mean(y*log(pmin(1-e,pmax(g,e)))+(1-y)*log(pmin(1-e,pmax(e,1-g))),w)
}

wq<-function(y,w,prob){
	o<-order(y)
	y<-y[o]; w<-w[o]
	a<-0; i<-1; w=w/sum(w)
	res<-c()
	n=length(w)
	for(q in prob){
		while(a<q & i<n){
			a<-a+w[i]
			i<-i+1
		}
		res<-c(res,y[i])
	}
	return(res)
}

w_after1=function(y,x,w,b,h){
	n=length(y)
	k=dim(x)[2]
	w<-w/mean(w)
	e<-0.5/n
	cov<-as.matrix(cov.wt(x,w)$cov[,2:k])
	temp<-w_newtk1(y,x,w,b,h,cov)
	g<-temp[[2]]
	dg=temp[[3]]
	Qn<-wsum(y*log(g[,1]+e)+(1-y)*log(1+e-g[,1]),w)
	gcv<-Qn/weighted.mean(1-g[,2],w)^2
	g[,1]=pmax(pmin(g[,1],1-e),e)
	vk<-solve(t(dg/c(g[,1]*(1-g[,1])))%*%(w*dg))
	list(vk,dg,g[,1],gcv)
}

w_newtk1<-function(y,x,w,b,h,t){
	k<-dim(x)[2]
	n<-dim(x)[1]
	f<-matrix(0,n,1)
	g<-f
	dg<-matrix(0,n,k-1)
	xb<-crossprod(t(x),b)
	xb<-xb-weighted.mean(xb,w)
	hh<-c(sqrt(cov.wt(xb,w)$cov))
	dm<-crossprod(b,t/hh^2)
	hh<-h*hh
	e<-0.5/n
	i<-1
	while(i<=n){
		h=hh
		v<-(xb[i,1]-xb)/h
		kw=dnorm(xb[i,1]-xb,sd=h)
		kw=kw/sum(kw)
		g[i]<-crossprod(y,kw)
		dg[i,]<-crossprod(kw*v*(y-g[i]),(x[,2:k]-matrix(x[i,2:k],n,k-1,byrow=TRUE))/hh)
		dg[i,]<-dg[i,]+crossprod(kw*v*(y-g[i]),c(v)*matrix(dm,n,k-1,byrow=TRUE))
		f[i]<-max(kw)/sum(kw)
		i<-i+1
	}
	v<-dg*c((y+((2*y-1)*e)-g)/((g+e)*(1+e-g)))
	xb<-wsumc(v,w)
	i<-dg*c(g+e)
	i<-i/c(1+e-g)
	i<-t(w*dg)%*%i
	i<-solve(i)
	xb<-c(0,i%*%xb)
	list(cbind(xb,c(0,sqrt(n)*pmean(v)/apply(v,2,sd))),cbind(g,f),dg)
}

wsumc<-function(y,w) psum(y*w)
psum<-function(x){
	n<-nrow(x)
	c(matrix(1,1,n)%*%x)
}
pmean<-function(x){
	n<-nrow(x)
	c(matrix(1,1,n)%*%x)/n
}
wsum<-function(y,w) sum(y*w)

w_optimalselrqgcv<-function(dep1, dep11, reg1, wei, dep2, reg2, quant, maxorder, coefks, method="br"){
	wei=wei/mean(wei)
	reg2=as.matrix(reg2)
	index=reg1%*%coefks
	prob1<-glm(dep1~index,family=binomial(probit),weights=wei)$coef
	g<-prob1[1]+prob1[2]*index
	mill<-cbind(1,dnorm(g)/pnorm(g))
	for(i in 2:maxorder) mill<-cbind(mill,mill[,2]^i)
	gcv=c()
	for(i in 1:maxorder){
		beta=rq.wfit(cbind(reg2[dep11==1,],mill[dep11==1,1:(1+i)]),dep2[dep11==1],tau=quant,weights=wei[dep11==1],method=method)$coef
		if(rcond(t(cbind(reg2[dep11==1,],mill[dep11==1,1:(i+1)])*wei[dep11==1])%*%cbind(reg2[dep11==1,],mill[dep11==1,1:(i+1)]))<.Machine$double.eps) break
		gcv<-c(gcv,w_gcvq(dep2[dep11==1],cbind(reg2[dep11==1,],mill[dep11==1,1:(i+1)]), wei[dep11==1], beta,quant))
	}
	gcv
}

w_gcvq<-function(dep,reg,wei,beta,quantile){
	pred<-reg%*%beta
	i=1
	H=0
	while(((i-1)*1000)<length(dep)){
		j=min(length(dep),i*1000)
		H=H+wsum(diag(reg[((i-1)*1000+1):j,]%*%solve(t(reg*wei)%*%reg)%*%t(reg[((i-1)*1000):j,])),wei[((i-1)*1000+1):j])
		i=i+1
	}
	weighted.mean(check(dep-pred,quantile),wei)/(1-H/sum(wei))^2
}

check<-function(lambda,q) (q-(lambda<0))*lambda
wmeanc<-function(y,w) psum(y*w)/sum(w)

library(foreign)
library(quantreg)
#folder where the file "application.dta" is saved
folder=" "
data=read.dta(paste(folder,"application.dta",sep=""))
attach(data)
x2=cbind(exp, exp2, exp_edu, exp2_edu, hsg, some_college, associate, college, advanced, mw, So, We, married)
y=lwage
w=wgt
w=w/mean(w)
x1=cbind(child02, child35, child613, child02_m, child35_m, child613_m, exp, exp2, exp_edu, exp2_edu, hsg, some_college, associate, college, advanced, mw, So, We, married)
y1=ft
y11=rep(0, length(y1))
y11[ft==1 & awage==0 & is.na(lwage)==0]=1
y11[is.na(y)==1]=0
y[y11==0]=0
detach(data)
rm(data)

hgrid=c(0.01,0.05,0.1,0.25,0.5,1,2,5)
temp<-glm(y1~x1,weights=w,family=binomial(probit))$coef
b1<-temp[3:length(temp)]/temp[2]
res=matrix(,length(hgrid),length(b1))
for(i in 1:length(hgrid)){
	temp=optim(b1,ob_inter,method="Nelder-Mead",y=y1,x=x1,w=w,h=hgrid[i],np=400,control=list(maxit=5000,trace=10))
	res[i,]=temp$par		
}
gcv=rep(-10e10,length(hgrid))
for(i in 1:length(hgrid)){
	gcv[i]=w_after1(y1,x1,w,c(1,res[i,]),hgrid[i])[[4]]
}
optorder=c()
for(i in seq(0.1,0.9,0.01)){
	temp=w_optimalselrqgcv(y1,y11,x1,w,y,x2,i,6,c(1,res[(1:length(hgrid))[gcv==max(gcv)],]), method="br")
	optorder=c(optorder,(1:length(temp))[temp==min(temp)])
}
temp=colSums(optorder==matrix(1:6,length(optorder),6,byrow=TRUE))
order=(1:6)[temp==max(temp)]
test_results_0298=w_testindscore(y1,y11,x1,w,hgrid[gcv==max(gcv)],y,x2,order,c(1,res[(1:length(hgrid))[gcv==max(gcv)],]),m=c(20+length(y1)^0.25,20+length(y1)^(1/2.01),length(y1)/4,length(y1)),grid=seq(0.02,0.98,0.01),nreps=1000,method="br",trim=0.045)
test_results_2080=w_testindscore(y1,y11,x1,w,hgrid[gcv==max(gcv)],y,x2,order,c(1,res[(1:length(hgrid))[gcv==max(gcv)],]),m=c(20+length(y1)^0.25,20+length(y1)^(1/2.01),length(y1)/4,length(y1)),grid=seq(0.2,0.8,0.01),nreps=1000,method="br",trim=0.045)

#Table 4
rbind(c(test_results_0298$ks[c(2,4)],test_results_2080$ks[c(2,4)],test_results_0298$cvm[c(2,4)],test_results_2080$cvm[c(2,4)]),
cbind(test_results_0298$ks_coef_by_coef[,c(2,4)],
test_results_2080$ks_coef_by_coef[,c(2,4)],
test_results_0298$cvm_coef_by_coef[,c(2,4)],
test_results_2080$cvm_coef_by_coef[,c(2,4)]))

#Figure 3
dep1=y1; dep11=y11; reg1=x1; wei=w; smoothing1=hgrid[gcv==max(gcv)]; dep2=y; reg2=x2; coefks=c(1,res[(1:length(hgrid))[gcv==max(gcv)],]); grid=seq(0.02,0.98,0.01); method="br"
afterks=w_after1(dep1,reg1,wei,coefks,smoothing1)
nq=length(grid)
wei=wei/mean(wei)
P=afterks[[3]]
Pder=afterks[[2]]
n=length(dep1)
k1=ncol(reg1)
reg2=as.matrix(reg2)
k2=ncol(reg2)
deltap=solve(afterks[[1]])/n
scoreks=Pder*c((dep1-P)/(P*(1-P)))
index=reg1%*%coefks
prob1<-glm(dep1~index,family=binomial(probit),weights=wei)$coef
g<-prob1[1]+prob1[2]*index
mill<-cbind(1,dnorm(g)/pnorm(g))
if(order>1) for(i in 2:order) mill<-cbind(mill,mill[,2]^i)
res<-matrix(,k2+order+1,0)
for(q in 1:nq) res<-cbind(res,rq.wfit(cbind(reg2[dep11==1,],mill[dep11==1,]),dep2[dep11==1],tau=grid[q],weights=wei[dep11==1],method=method)$coef)
r<-dep11*cbind(reg2,mill)
dmill<-rep(0,n)
for(i in 1:order) dmill<-cbind(dmill,i*(dnorm(g)/pnorm(g))^(i-1)*prob1[2]*((1/(2*pi)^0.5)*exp(-g^2/2)*(-g)*pnorm(g)-dnorm(g)^2)/pnorm(g)^2)
deltafrx<-rep(list(matrix(,k2+order+1,k1)),nq)
z<-rep(list(matrix(,n,k2+order+1)),nq)
deltafr<-rep(list(matrix(,k2+order+1,k2+order+1)),nq)
scoreqr<-rep(list(matrix(,n,k2+order+1)),nq)
for(q in 1:nq){
	resid<-dep2-cbind(reg2,mill)%*%res[,q]
	h<-bandwidth.rq(grid[q],sum(dep11))
	if(grid[q]-h<0.001) h<-grid[q]-0.001
	if(grid[q]+h>0.999) h<-0.999-grid[q]
	h<-(qnorm(grid[q]+h)-qnorm(grid[q]-h))*min(sd(resid[dep11==1]),(quantile(resid[dep11==1],0.75)-quantile(resid[dep11==1],0.25))/1.34)
	den<-dnorm(resid/h)/h
	deltafr[[q]]<-crossprod(c(sqrt(den))*sqrt(wei)*r)/n
	deltafrx[[q]]<-t(c(den)*c(wei)*c(dmill%*%res[(k2+1):(k2+order+1),q])*r)%*%reg1[,-1]/n
	scoreqr[[q]]=c((grid[q]-(resid<0)))*r
	z[[q]]=(scoreqr[[q]]-scoreks%*%solve(deltap)%*%t(deltafrx[[q]]))%*%solve(deltafr[[q]])
}
sd_res=matrix(,97,13)
for(q in 1:97){
	sd_res[q,]=sqrt(diag(cov.wt(z[[q]],wei)$cov)/n)[1:13]
}
layout.show(layout(matrix(c(1,2,3,4), 2,2,byrow=TRUE),c(1.1,1),c(1,1)))
par(mar=c(2,4,4,1))
plot(seq(0.05,0.95,0.01),res[1,4:94],xlim=c(0,1),ylim=c(-0.001,0.043),type="l",lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="Experience")
polygon(c(seq(0.05,0.95,0.01),rev(seq(0.05,0.95,0.01))),c(res[1,4:94]-1.96*sd_res[4:94,1],rev(res[1,4:94]+1.96*sd_res[4:94,1])),border=NA,col="grey")
lines(seq(0.05,0.95,0.01),res[1,4:94],xlim=c(0,1),lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="Experience")
axis(1);axis(3,c(-100,100))
par(mar=c(2,2,4,1))
plot(seq(0.05,0.95,0.01),res[2,4:94],xlim=c(0,1),ylim=c(-0.000715,0.00005),type="l",lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="Experience squared")
polygon(c(seq(0.05,0.95,0.01),rev(seq(0.05,0.95,0.01))),c(res[2,4:94]-1.96*sd_res[4:94,2],rev(res[2,4:94]+1.96*sd_res[4:94,2])),border=NA,col="grey")
lines(seq(0.05,0.95,0.01),res[2,4:94],xlim=c(0,1),lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="Experience squared")
axis(1);axis(3,c(-100,100))
par(mar=c(4,4,4,1))
plot(seq(0.05,0.95,0.01),res[5,4:94],xlim=c(0,1),ylim=c(-0.02,0.501),type="l",lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="High School")
polygon(c(seq(0.05,0.95,0.01),rev(seq(0.05,0.95,0.01))),c(res[5,4:94]-1.96*sd_res[4:94,5],rev(res[5,4:94]+1.96*sd_res[4:94,5])),border=NA,col="grey")
lines(seq(0.05,0.95,0.01),res[5,4:94],xlim=c(0,1),lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="High School")
axis(1);axis(3,c(-100,100))
par(mar=c(4,2,4,1))
plot(seq(0.05,0.95,0.01),res[8,4:94],xlim=c(0,1),ylim=c(0.12,0.32),type="l",lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="College")
polygon(c(seq(0.05,0.95,0.01),rev(seq(0.05,0.95,0.01))),c(res[8,4:94]-1.96*sd_res[4:94,8],rev(res[8,4:94]+1.96*sd_res[4:94,8])),border=NA,col="grey")
lines(seq(0.05,0.95,0.01),res[8,4:94],xlim=c(0,1),lwd=2,xlab="Quantile",ylab="Quantile regression coefficient",main="College")
axis(1);axis(3,c(-100,100))

#Table 3
cbind(c(-coefks[c(7,8,11:15,9,10,19)],-1,-coefks[2:6]),
c(diag(afterks[[1]])[c(6,7,10:14,8,9,18)]^0.5,0,diag(afterks[[1]])[1:5]^0.5),
c(res[c(1,2,5:9,3,4,13),24],rep(NA,6)),
c(sd_res[24,c(1,2,5:9,3,4,13)],rep(NA,6)),
c(res[c(1,2,5:9,3,4,13),49],rep(NA,6)),
c(sd_res[49,c(1,2,5:9,3,4,13)],rep(NA,6)),
c(res[c(1,2,5:9,3,4,13),74],rep(NA,6)),
c(sd_res[74,c(1,2,5:9,3,4,13)],rep(NA,6)))

