# this code is needed to run analysis_derivative

clean<-function(DAT){
#######
#
# we trim all the observations that are beyond 3 times standard deviation from the mean
#
    DAT[,2]<-ifelse(is.na(DAT[,2]),0,DAT[,2])    # ifelse(Bedingung, Vorgehen, alternatives Vorgehen)
    DAT[,1]<-ifelse(is.na(DAT[,1]),0,DAT[,1])
    DAT<-DAT[DAT[,1]>0 & DAT[,2]>0,]        # only positive values
    sd1<- sd(DAT[,1])
    sd2<- sd(DAT[,2])
    m1<-mean(DAT[,1])
    m2<-mean(DAT[,2])
    DAT<-DAT[DAT[,1]>= (m1- 3*sd1) & DAT[,1]<= (m1+3*sd1),]    # cleaning extreme values
    DAT<-DAT[DAT[,2]>= (m2- 3*sd2) & DAT[,2]<= (m2+3*sd2),]
    o<-order(DAT[,1])        # order with income
    W<-DAT[o,]
    W
}

clean_wz<-function(DAT){
#
# we trim all the observations that are beyond 3 times standard deviation from the mean
#
    DAT[,2]<-ifelse(is.na(DAT[,2]),0,DAT[,2])    # ifelse(Bedingung, Vorgehen, alternatives Vorgehen)
    DAT[,1]<-ifelse(is.na(DAT[,1]),0,DAT[,1])
    DAT<-DAT[DAT[,1]>=0 & DAT[,2]>=0,]        # only positive or zero values
    sd1<- sd(DAT[,1])
    sd2<- sd(DAT[,2])
    m1<-mean(DAT[,1])
    m2<-mean(DAT[,2])
    DAT<-DAT[DAT[,1]>= (m1- 3*sd1) & DAT[,1]<= (m1+3*sd1),]    # cleaning extreme values
    DAT<-DAT[DAT[,2]>= (m2- 3*sd2) & DAT[,2]<= (m2+3*sd2),]
    o<-order(DAT[,1])        # order with income
    W<-DAT[o,]
    W
}


bootstrap<-function(X,it,method)
    {
    X<-X[order(X[,1]),]
    ## confidence intervals
    n<-nrow(X)          
    M<-matrix(nrow=it, ncol=n)    

    ## Iterative loop   
      for(i in 1:it)
        {
        sam<-sample(1:n,n,replace=TRUE)    
        Xn<-X[sam,] 
        if (method=="sm1" | method=="sm2"){
                         M[i,]<-sm.regression(Xn[,1], Xn[,2], eval.points=X[,1], display="none")$estimate
                         }
        if (method=="lokern1" | method=="lokern2"){
                              count<-rep(0,nrow(Xn))
                              for(j in 1:(nrow(Xn)-1)){
                                                   if (Xn[j,1]==Xn[j+1,1]){count[j+1]<-1}
                                                     }
                                                  E<-Xn[which(count==0),]
                                                  M[i,]<-lokerns(E[,1], E[,2], hetero=TRUE, x.out=X[,1])$est 
                              }  
        }       #  end of iterative loop 
    M<-apply(M,2,sort) # sort with estimation (ascending)
    qy<-function(y){quantile(y, c(0.025,0.975) )}  # Quantilfunction
    qnt<-apply(M,2, qy)            
    sopra<-cbind(X[,1],qnt[2,])    
    sotto<-cbind(X[,1],qnt[1,])  
    if (method=="lokern2" | method=="sm2"){   
    sm_sopra<-sm.regression(sopra[,1], sopra[,2], display="none")   # sopra = regression with values from 0,975 quantil
    sm_sotto<-sm.regression(sotto[,1], sotto[,2], display="none")   # sotto = regression with values from 0,025 quantil
    sm_0.975_eval.points<-sm_sopra[[1]]
    sm_0.975_estimate<-sm_sopra[[2]]
    sm_0.025_eval.points<-sm_sotto[[1]]
    sm_0.025_estimate<-sm_sotto[[2]]
    B<-matrix(nrow=50, ncol=4)
    colnames(B)<-c("sm_0.975_eval.points","sm_0.975_estimate","sm_0.025_eval.points","sm_0.025_estimate")
    B[,1]<-sm_0.975_eval.points
    B[,2]<-sm_0.975_estimate
    B[,3]<-sm_0.025_eval.points
    B[,4]<-sm_0.025_estimate 
    }
    if (method=="sm1" |method=="lokern1"){ B<- cbind(sopra[,2],sotto[,2])}  
    B ## output
    }

fun_avderiv<-function(DAT){    
### method for estimating average derivative
# see H?rdle and Stoker JASA 1989
# library(KernSmooth)
    DAT<-DAT[order(DAT[,1]),]
    #E<-log(DAT)
    #E<-DAT
    X= (DAT[,1] - mean(DAT[,1])) / sd (DAT[,1]) # normalization
    Y= (DAT[,2] - mean(DAT[,2])) #/sd (DAT[,2])  # normalization
    #X=DAT[,1]
    #Y= DAT[,2]
    n=length(X)
    bw<-dpik(X, kernel="biweight")
    K0<- function(u) {ifelse(abs(u)<=1, 15/16 *(1-u^2)^2, 0)}   # kernel  
    K1<- function(u) {ifelse(abs(u)<=1, 15/4 *(u^3-u), 0)}   # kernel  for first derivative
    fk0<-function(X1,z1) {K0((X1-z1)/bw)}  
    fk1<-function(X1,z1) {K1((X1-z1)/bw)}  
    fhat1<-sapply(1:n,function(b){sum(mapply(fk1,X,X[b]))/(-n*bw^2)})
    fhat0<-sapply(1:n,function(b){sum(mapply(fk0,X,X[b]))/(n*bw)})
    alpha<-round(0.05*n)
    trimd<-order(fhat0)[1:alpha]
    fhat0[trimd]<-NA
    avd<-1:n
    for (i in 1:n){
    avd[i]<- (fhat1[i]*Y[i] )/ fhat0[i]
    }
    avd[is.na(avd)]<-0
    AVD<-sum(avd)/-n
    AVD
    }
    
fun_avd_se<-function(DAT, niter){   
 # standard errors average derivatives
    DAT<-DAT[order(DAT[,1]),]
    n=nrow(DAT)
    bavd<-1:niter
    for (B in 1:niter){
    DATn<-DAT[sample(1:n,n,replace=TRUE),]
    DATn<-DATn[order(DATn[,1]),]
    #E<-log(DATn)
    X= (DATn[,1] - mean(DATn[,1])) / sd (DATn[,1]) # normalization
    Y= (DATn[,2] - mean(DATn[,2])) #/sd (DATn[,2])  # normalization
    bw<-dpik(X, kernel="biweight")
    K0<- function(u) {ifelse(abs(u)<=1, 15/16 *(1-u^2)^2, 0)}   # kernel  
    K1<- function(u) {ifelse(abs(u)<=1, 15/4 *(u^3-u), 0)}   # kernel  for first derivative
    fk0<-function(X1,z1) {K0((X1-z1)/bw)}  
    fk1<-function(X1,z1) {K1((X1-z1)/bw)}  
    fhat1<-sapply(1:n,function(b){sum(mapply(fk1,X,X[b]))/(-n*bw^2)})
    fhat0<-sapply(1:n,function(b){sum(mapply(fk0,X,X[b]))/(n*bw)})
    alpha<-round(0.05*n)
    trimd<-order(fhat0)[1:alpha]
    fhat0[trimd]<-NA
    avd<-1:n
    for (i in 1:n){
    avd[i]<- (fhat1[i]*Y[i] )/ fhat0[i]
    }
    avd[is.na(avd)]<-0
    bavd[B]<-sum(avd)/-n
    }
    SE<-sd(bavd)
    SE
    }    
    
 fun_avd_se_fast<-function(DAT){   
 # standard errors average derivatives  (approximated)
 # depends on library(sm)
    DAT<-DAT[order(DAT[,1]),]
    n=nrow(DAT)
    bavd<-1:200
    for (B in 1:200){
    DATn<-DAT[sample(1:n,n,replace=TRUE),]
    DATn<-DATn[order(DATn[,1]),]
    E<-log(DATn)
    X<- (E[,1] - mean(E[,1])) / sd (E[,1]) # normalization
    Y<- (E[,2] - mean(E[,2])) /sd (E[,2])  # normalization
    smr<-sm.regression(X,Y, ngrid=100)
    X<-smr$eval.points
    Y<-smr$estimate
    nn<-length(X)
    bw<-dpik(X, kernel="biweight")
    K0<- function(u) {ifelse(abs(u)<=1, 15/16 *(1-u^2)^2, 0)}   # kernel  
    K1<- function(u) {ifelse(abs(u)<=1, 15/4 *(u^3-u), 0)}   # kernel  for first derivative
    fk0<-function(X1,z1) {K0((X1-z1)/bw)}  
    fk1<-function(X1,z1) {K1((X1-z1)/bw)}  
    fhat1<-sapply(1:nn,function(b){sum(mapply(fk1,X,X[b]))/(-nn*bw^2)})
    fhat0<-sapply(1:nn,function(b){sum(mapply(fk0,X,X[b]))/(nn*bw)})
    alpha<-round(0.05*nn)
    trimd<-order(fhat0)[1:alpha]
    fhat0[trimd]<-NA
    avd<-1:nn
    for (i in 1:nn){
    avd[i]<- (fhat1[i]*Y[i] )/ fhat0[i]
    }
    avd[is.na(avd)]<-0
    bavd[B]<-sum(avd)/-nn
    }
    SE<-sd(bavd)
    SE
 }    
    
#### function to remove NA
remna<-function(x,i){
x[!is.na(x[,i]) , ]
}
#######
#### function to remove outliers
remout<-function(x,i){
mn<-mean(x[,i])
sn<-sd(x[,i])
x[abs(x[,i]-mn)< 3*sn , ]
}

##### function to estimate average derivative with the method of Banerjee (JoE) 
fun_avd_banerjee<-function(DAT){    
    DAT<-DAT[order(DAT[,1]),]
    E<-log(DAT)
    X= E[,1] 
    Y= E[,2]
    T<-length(X)
    h<- sqrt(log(T)) / T^(3/4) 
    #h<-h*max(X)
    k<-floor(h^(-1))
    B<-seq(from=min(X), to=max(X), length=k+1)
    bet<-rep(0,k)
    I3<-rep(0,k)
    wg<-rep(0,k)
    for(i in 1:k)
    {
    ww<-which(X>= B[i] & X<=B[i+1])
    wg[i]<-length(ww)/T
    if(length(ww)>=3){
    I3[i]<-1
    bet[i]<-lm(Y[ww]~X[ww])$coefficients[2]
   }
   }
AVD<-sum(wg*bet)
AVD
}

##### function to estimate average derivative directly
fun_avd_direct<-function(DAT){    
    DAT<-DAT[order(DAT[,1]),]
    E<-log(DAT)
    X= E[,1] 
    Y= E[,2]
    nt=200
    smd<-sm.regression(X,Y, ngrid=nt)
    xx<-smd$eval.points
    yy<-smd$estimate
    delty<-1:(nt-1)
    for(i in 1:(nt-1)){
    delty[i]<-(yy[i+1]-yy[i])/(xx[i+1]-xx[i])
    }
    mean(delty) 
    }
    
#### parametric method to estimate the average derivative
fun_avd_par<-function(DAT){
E<-DAT
E<-E[order(E[,1]),]
ll<-lm(log(E[,2]) ~ log(E[,1]) )
cc<-coef(ll)
elas<-cc[2]  # 0.5985644
elas}
