Data definition

-----
Data has been extracted from the following matrices in the FES data set (XX stands for the year, i.e. XX = 93 for 1993)

incmatrix: FESXX_INCAGG
expmatrix: FESXX_EXPAGG
expdatamatrix: FESXX_EXPDATA
charmatrix: FESXX_CHAR
char2matrix: FESXX_MORECHARS

----
#Definition of relevant variables (saved in the matrix "rawmatrix") [[i]] stands for the sample year+73, e.g. [[1]] = sample in 1974

rawmatrix[[i]][,1]  = charmatrix[[i]][,9];#/*Employments status*/
rawmatrix[[i]][,2]  = charmatrix[[i]][,7];#/*Head age*/
rawmatrix[[i]][,3]  = charmatrix[[i]][,20];#/*Number of persons*/
rawmatrix[[i]][,4]  = charmatrix[[i]][,8];#sex
rawmatrix[[i]][,5]  = char2matrix[[i]][,13];#educyears
rawmatrix[[i]][,6]  = char2matrix[[i]][,4]+char2matrix[[i]][,5]+char2matrix[[i]][,6]+char2matrix[[i]][,7];#/*Number of children*/
rawmatrix[[i]][,7]  = char2matrix[[i]][,2]+char2matrix[[i]][,3];#/*Number of persons working*/
rawmatrix[[i]][,8]  = expmatrix[[i]][,5];#/*Food*/
rawmatrix[[i]][,9]  = expmatrix[[i]][,7];#/*Alcohol and tobacco*/
rawmatrix[[i]][,10] = expmatrix[[i]][,12];#/*Fuel and light*/
rawmatrix[[i]][,11] = expmatrix[[i]][,14]+ expmatrix[[i]][,22] + expdatamatrix[[i]][,64];#/*Services*/
rawmatrix[[i]][,12] = expmatrix[[i]][,16];#/*Clothing*/
rawmatrix[[i]][,13] = newtotalcons[[i]];#/*TOTAL expenditure (our definition, see below)*/
rawmatrix[[i]][,14] = (incmatrix[[i]][,10] - incmatrix[[i]][,4]- incmatrix[[i]][,15]);#/*Income_BHC w/o Investment income w/o tax, i.e. non-property-income*/
rawmatrix[[i]][,15] = incmatrix[[i]][,4];#/*investment income*/
rawmatrix[[i]][,16] = expmatrix[[i]][,2];#/*TOTAL expenditure BHC*/
rawmatrix[[i]][,17] = expmatrix[[i]][,1];#CASENO
rawmatrix[[i]][,18]  = se[[i]];#/*Employment status: se*  (see below)/
rawmatrix[[i]][,19]  = ue[[i]];#/*Employment status: ue*  (see below)/
rawmatrix[[i]][,20]  = ret[[i]];#/*Employment status:ret* (see below)/

#Derived variable newtotalcons (our definition of total expenditure):
newtotalcons[[i]] <- expmatrix[[i]][,4] + expmatrix[[i]][,7] + expmatrix[[i]][,12] + expdatamatrix[[i]][,47] + expdatamatrix[[i]][,48] + expmatrix[[i]][,14] + expmatrix[[i]][,16] + expdatamatrix[[i]][,64] + expdatamatrix[[i]][,63] + expmatrix[[i]][,20] + expmatrix[[i]][,21] - expdatamatrix[[i]][,72] + expmatrix[[i]][,22]

#Derived variable (dummies for the employment status):
n <-length(charmatrix[[i]][,9])
se[[i]]<-rep(0,n)
se[[i]][charmatrix[[i]][,9]==2]<-1

ue[[i]]<-rep(0,n)
ue[[i]][charmatrix[[i]][,9]==3 | charmatrix[[i]][,9]==4 | charmatrix[[i]][,9]==5 | charmatrix[[i]][,9]==7 ]<-1

ret[[i]]<-rep(0,n)
ret[[i]][charmatrix[[i]][,9]==6]<-1

--------
#Outlier elimination (only households with positive income and positive expenditure on this category will be considered)
rawmatrix[[i]] <- rawmatrix[[i]][rawmatrix[[i]][,cat] > 0.0 & rawmatrix[[i]][,13] > 0.0,]

--------

# Starting with the rawmatrix, we create two types of files for the analysis:
# 1. Basicsample for the bandwidth selection in N (Nonparametric software by Jeff Racine (http://www.economics.mcmaster.ca/racine/)
# 2. Basicsample for the estimation and bootstrap purposes in Matlab

# As for 1., the format mimics the regression of c on y and a. (Saved in file macrodata_cat_year.txt).

A = rawmatrix[[i]]
Dat[[i]] = data.frame(A[,cat], factor(A[,18]), factor(A[,19]), factor(A[,20]), ordered(A[,3]-A[,6]), ordered(A[,6]),	ordered(A[,7]), A[,2], log(A[,14]))
colnames(Dat[[i]]) <- c("exp.for.category","self.employed", "unemployed", "retired","n.adults", "n.child", "n.working", "age","log.income")

#The columns of the data file for a certain commodity group contain the following variables
#1 = Expediture for the commodity group
#2 = Self employment dummy
#3 = Unemployed/Unoccupied dummy
#4 = Retired dummy
#5 = Number of adults
#6 = Number of children
#7 = Number of persons working
#8 = Age of household head
#9 = (log of) Non-property-income

# A sample of data (for total expenditure 1993) is saved in macrodata_cat_year.txt
# Analogue holds for the regression of log(c) on y and a (saved in microdata_cat_year.txt)

# As for 2., the format for the basicsample is as follows:
A = data.frame(rawmatrix[[i]][,1:3],rawmatrix[[i]][,6:16])

#The columns of the data file contain the following variables
#1 = Employments status
#2 = Head age
#3 = Number of persons
#4 = Number of children
#5 = Number of persons working
#6 = Food (cat = 6)
#7 = Alcohol and tobacco (cat = 7)
#8 = Fuel and light (cat = 8)
#9 = Services (cat = 9)
#10 = Clothing (cat = 10)
#11 = TOTAL expenditure BHC (our definition) (cat = 11)
#12 = (log of) Non-property-income
#13 = Investment income
#14 = TOTAL expenditure BHC (FES definition)

#This data is saved in basicsample_year.txt
#A sample of this data is attached